diff options
author | トトも <85485984+ElectronicsArchiver@users.noreply.github.com> | 2022-04-16 20:18:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-16 14:18:56 -0400 |
commit | 33cc3b9dd7bf3dae1c6cf86e46bb4923f96e7fff (patch) | |
tree | 02fb808d19aee560ac9d381ca5a52509881cdd44 /src | |
parent | 8f5a73a6585bd425807430fd80ce1e3a737f4c5f (diff) | |
download | bfs-33cc3b9dd7bf3dae1c6cf86e46bb4923f96e7fff.tar.xz |
Source / Include Folder (#88)
Moved Source Files Into `src` Folder
Diffstat (limited to 'src')
-rw-r--r-- | src/bar.c | 248 | ||||
-rw-r--r-- | src/bar.h | 57 | ||||
-rw-r--r-- | src/bfs.h | 32 | ||||
-rw-r--r-- | src/bftw.c | 1494 | ||||
-rw-r--r-- | src/bftw.h | 223 | ||||
-rw-r--r-- | src/color.c | 1125 | ||||
-rw-r--r-- | src/color.h | 120 | ||||
-rw-r--r-- | src/ctx.c | 311 | ||||
-rw-r--r-- | src/ctx.h | 212 | ||||
-rw-r--r-- | src/darray.c | 103 | ||||
-rw-r--r-- | src/darray.h | 110 | ||||
-rw-r--r-- | src/diag.c | 233 | ||||
-rw-r--r-- | src/diag.h | 108 | ||||
-rw-r--r-- | src/dir.c | 303 | ||||
-rw-r--r-- | src/dir.h | 124 | ||||
-rw-r--r-- | src/dstring.c | 220 | ||||
-rw-r--r-- | src/dstring.h | 194 | ||||
-rw-r--r-- | src/eval.c | 1644 | ||||
-rw-r--r-- | src/eval.h | 113 | ||||
-rw-r--r-- | src/exec.c | 715 | ||||
-rw-r--r-- | src/exec.h | 121 | ||||
-rw-r--r-- | src/expr.h | 235 | ||||
-rw-r--r-- | src/fsade.c | 392 | ||||
-rw-r--r-- | src/fsade.h | 83 | ||||
-rw-r--r-- | src/main.c | 141 | ||||
-rw-r--r-- | src/mtab.c | 246 | ||||
-rw-r--r-- | src/mtab.h | 71 | ||||
-rw-r--r-- | src/opt.c | 1088 | ||||
-rw-r--r-- | src/opt.h | 37 | ||||
-rw-r--r-- | src/parse.c | 3959 | ||||
-rw-r--r-- | src/parse.h | 36 | ||||
-rw-r--r-- | src/printf.c | 927 | ||||
-rw-r--r-- | src/printf.h | 68 | ||||
-rw-r--r-- | src/pwcache.c | 293 | ||||
-rw-r--r-- | src/pwcache.h | 117 | ||||
-rw-r--r-- | src/stat.c | 376 | ||||
-rw-r--r-- | src/stat.h | 155 | ||||
-rw-r--r-- | src/trie.c | 693 | ||||
-rw-r--r-- | src/trie.h | 156 | ||||
-rw-r--r-- | src/typo.c | 176 | ||||
-rw-r--r-- | src/typo.h | 31 | ||||
-rw-r--r-- | src/util.c | 510 | ||||
-rw-r--r-- | src/util.h | 317 | ||||
-rw-r--r-- | src/xregex.c | 301 | ||||
-rw-r--r-- | src/xregex.h | 97 | ||||
-rw-r--r-- | src/xspawn.c | 318 | ||||
-rw-r--r-- | src/xspawn.h | 123 | ||||
-rw-r--r-- | src/xtime.c | 323 | ||||
-rw-r--r-- | src/xtime.h | 86 |
49 files changed, 19165 insertions, 0 deletions
diff --git a/src/bar.c b/src/bar.c new file mode 100644 index 0000000..b0e595e --- /dev/null +++ b/src/bar.c @@ -0,0 +1,248 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "bar.h" +#include "dstring.h" +#include "util.h" +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <sys/ioctl.h> +#include <unistd.h> + +struct bfs_bar { + int fd; + volatile sig_atomic_t width; + volatile sig_atomic_t height; +}; + +/** The global status bar instance. */ +static struct bfs_bar the_bar = { + .fd = -1, +}; + +/** Get the terminal size, if possible. */ +static int bfs_bar_getsize(struct bfs_bar *bar) { +#ifdef TIOCGWINSZ + struct winsize ws; + if (ioctl(bar->fd, TIOCGWINSZ, &ws) != 0) { + return -1; + } + + bar->width = ws.ws_col; + bar->height = ws.ws_row; + return 0; +#else + errno = ENOTSUP; + return -1; +#endif +} + +/** Async Signal Safe puts(). */ +static int ass_puts(int fd, const char *str) { + size_t len = strlen(str); + return xwrite(fd, str, len) == len ? 0 : -1; +} + +/** Number of decimal digits needed for terminal sizes. */ +#define ITOA_DIGITS ((sizeof(unsigned short) * CHAR_BIT + 2) / 3) + +/** Async Signal Safe itoa(). */ +static char *ass_itoa(char *str, unsigned int n) { + char *end = str + ITOA_DIGITS; + *end = '\0'; + + char *c = end; + do { + *--c = '0' + (n % 10); + n /= 10; + } while (n); + + size_t len = end - c; + memmove(str, c, len + 1); + return str + len; +} + +/** Update the size of the scrollable region. */ +static int bfs_bar_resize(struct bfs_bar *bar) { + char esc_seq[12 + ITOA_DIGITS] = + "\0337" // DECSC: Save cursor + "\033[;"; // DECSTBM: Set scrollable region + + // DECSTBM takes the height as the second argument + char *ptr = esc_seq + strlen(esc_seq); + ptr = ass_itoa(ptr, bar->height - 1); + + strcpy(ptr, + "r" // DECSTBM + "\0338" // DECRC: Restore the cursor + "\033[J" // ED: Erase display from cursor to end + ); + + return ass_puts(bar->fd, esc_seq); +} + +#ifdef SIGWINCH +/** SIGWINCH handler. */ +static void sighand_winch(int sig) { + int error = errno; + + bfs_bar_getsize(&the_bar); + bfs_bar_resize(&the_bar); + + errno = error; +} +#endif + +/** Reset the scrollable region and hide the bar. */ +static int bfs_bar_reset(struct bfs_bar *bar) { + return ass_puts(bar->fd, + "\0337" // DECSC: Save cursor + "\033[r" // DECSTBM: Reset scrollable region + "\0338" // DECRC: Restore cursor + "\033[J" // ED: Erase display from cursor to end + ); +} + +/** Signal handler for process-terminating signals. */ +static void sighand_reset(int sig) { + bfs_bar_reset(&the_bar); + raise(sig); +} + +/** Register sighand_reset() for a signal. */ +static void reset_before_death_by(int sig) { + struct sigaction sa = { + .sa_handler = sighand_reset, + .sa_flags = SA_RESETHAND, + }; + sigemptyset(&sa.sa_mask); + sigaction(sig, &sa, NULL); +} + +/** printf() to the status bar with a single write(). */ +BFS_FORMATTER(2, 3) +static int bfs_bar_printf(struct bfs_bar *bar, const char *format, ...) { + va_list args; + va_start(args, format); + char *str = dstrvprintf(format, args); + va_end(args); + + if (!str) { + return -1; + } + + int ret = ass_puts(bar->fd, str); + dstrfree(str); + return ret; +} + +struct bfs_bar *bfs_bar_show(void) { + if (the_bar.fd >= 0) { + errno = EBUSY; + goto fail; + } + + char term[L_ctermid]; + ctermid(term); + if (strlen(term) == 0) { + errno = ENOTTY; + goto fail; + } + + the_bar.fd = open(term, O_RDWR | O_CLOEXEC); + if (the_bar.fd < 0) { + goto fail; + } + + if (bfs_bar_getsize(&the_bar) != 0) { + goto fail_close; + } + + reset_before_death_by(SIGABRT); + reset_before_death_by(SIGINT); + reset_before_death_by(SIGPIPE); + reset_before_death_by(SIGQUIT); + reset_before_death_by(SIGTERM); + +#ifdef SIGWINCH + struct sigaction sa = { + .sa_handler = sighand_winch, + .sa_flags = SA_RESTART, + }; + sigemptyset(&sa.sa_mask); + sigaction(SIGWINCH, &sa, NULL); +#endif + + bfs_bar_printf(&the_bar, + "\n" // Make space for the bar + "\0337" // DECSC: Save cursor + "\033[;%ur" // DECSTBM: Set scrollable region + "\0338" // DECRC: Restore cursor + "\033[1A", // CUU: Move cursor up 1 row + (unsigned int)(the_bar.height - 1) + ); + + return &the_bar; + +fail_close: + close_quietly(the_bar.fd); + the_bar.fd = -1; +fail: + return NULL; +} + +unsigned int bfs_bar_width(const struct bfs_bar *bar) { + return bar->width; +} + +int bfs_bar_update(struct bfs_bar *bar, const char *str) { + return bfs_bar_printf(bar, + "\0337" // DECSC: Save cursor + "\033[%u;0f" // HVP: Move cursor to row, column + "\033[K" // EL: Erase line + "\033[7m" // SGR reverse video + "%s" + "\033[27m" // SGR reverse video off + "\0338", // DECRC: Restore cursor + (unsigned int)bar->height, + str + ); +} + +void bfs_bar_hide(struct bfs_bar *bar) { + if (!bar) { + return; + } + + signal(SIGABRT, SIG_DFL); + signal(SIGINT, SIG_DFL); + signal(SIGPIPE, SIG_DFL); + signal(SIGQUIT, SIG_DFL); + signal(SIGTERM, SIG_DFL); +#ifdef SIGWINCH + signal(SIGWINCH, SIG_DFL); +#endif + + bfs_bar_reset(bar); + + xclose(bar->fd); + bar->fd = -1; +} diff --git a/src/bar.h b/src/bar.h new file mode 100644 index 0000000..3e509d6 --- /dev/null +++ b/src/bar.h @@ -0,0 +1,57 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A terminal status bar. + */ + +#ifndef BFS_BAR_H +#define BFS_BAR_H + +/** A terminal status bar. */ +struct bfs_bar; + +/** + * Create a terminal status bar. Only one status bar is supported at a time. + * + * @return + * A pointer to the new status bar, or NULL on failure. + */ +struct bfs_bar *bfs_bar_show(void); + +/** + * Get the width of the status bar. + */ +unsigned int bfs_bar_width(const struct bfs_bar *bar); + +/** + * Update the status bar message. + * + * @param bar + * The status bar to update. + * @param str + * The string to display. + * @return + * 0 on success, -1 on failure. + */ +int bfs_bar_update(struct bfs_bar *bar, const char *str); + +/** + * Hide the status bar. + */ +void bfs_bar_hide(struct bfs_bar *status); + +#endif // BFS_BAR_H diff --git a/src/bfs.h b/src/bfs.h new file mode 100644 index 0000000..93d8d79 --- /dev/null +++ b/src/bfs.h @@ -0,0 +1,32 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2021 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Constants about the bfs program itself. + */ + +#ifndef BFS_H +#define BFS_H + +#ifndef BFS_VERSION +# define BFS_VERSION "2.5" +#endif + +#ifndef BFS_HOMEPAGE +# define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html" +#endif + +#endif // BFS_H diff --git a/src/bftw.c b/src/bftw.c new file mode 100644 index 0000000..6f97bf6 --- /dev/null +++ b/src/bftw.c @@ -0,0 +1,1494 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The bftw() implementation consists of the following components: + * + * - struct bftw_file: A file that has been encountered during the traversal. + * They have reference-counted links to their parents in the directory tree. + * + * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, + * used for openat() to minimize the amount of path re-traversals. + * + * - struct bftw_queue: The queue of bftw_file's left to explore. Implemented + * as a simple circular buffer. + * + * - struct bftw_state: Represents the current state of the traversal, allowing + * various helper functions to take fewer parameters. + */ + +#include "bftw.h" +#include "dir.h" +#include "darray.h" +#include "dstring.h" +#include "mtab.h" +#include "stat.h" +#include "trie.h" +#include "util.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +/** + * A file. + */ +struct bftw_file { + /** The parent directory, if any. */ + struct bftw_file *parent; + /** The root under which this file was found. */ + struct bftw_file *root; + /** The next file in the queue, if any. */ + struct bftw_file *next; + + /** The previous file in the LRU list. */ + struct bftw_file *lru_prev; + /** The next file in the LRU list. */ + struct bftw_file *lru_next; + + /** This file's depth in the walk. */ + size_t depth; + /** Reference count. */ + size_t refcount; + + /** An open descriptor to this file, or -1. */ + int fd; + + /** This file's type, if known. */ + enum bfs_type type; + /** The device number, for cycle detection. */ + dev_t dev; + /** The inode number, for cycle detection. */ + ino_t ino; + + /** The offset of this file in the full path. */ + size_t nameoff; + /** The length of the file's name. */ + size_t namelen; + /** The file's name. */ + char name[]; +}; + +/** + * A cache of open directories. + */ +struct bftw_cache { + /** The head of the LRU list. */ + struct bftw_file *head; + /** The insertion target for the LRU list. */ + struct bftw_file *target; + /** The tail of the LRU list. */ + struct bftw_file *tail; + /** The remaining capacity of the LRU list. */ + size_t capacity; +}; + +/** Initialize a cache. */ +static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { + cache->head = NULL; + cache->target = NULL; + cache->tail = NULL; + cache->capacity = capacity; +} + +/** Destroy a cache. */ +static void bftw_cache_destroy(struct bftw_cache *cache) { + assert(!cache->tail); + assert(!cache->target); + assert(!cache->head); +} + +/** Add a bftw_file to the cache. */ +static void bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { + assert(cache->capacity > 0); + assert(file->fd >= 0); + assert(!file->lru_prev); + assert(!file->lru_next); + + if (cache->target) { + file->lru_prev = cache->target; + file->lru_next = cache->target->lru_next; + } else { + file->lru_next = cache->head; + } + + if (file->lru_prev) { + file->lru_prev->lru_next = file; + } else { + cache->head = file; + } + + if (file->lru_next) { + file->lru_next->lru_prev = file; + } else { + cache->tail = file; + } + + // Prefer to keep the root paths open by keeping them at the head of the list + if (file->depth == 0) { + cache->target = file; + } + + --cache->capacity; +} + +/** Remove a bftw_file from the cache. */ +static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { + if (cache->target == file) { + cache->target = file->lru_prev; + } + + if (file->lru_prev) { + assert(cache->head != file); + file->lru_prev->lru_next = file->lru_next; + } else { + assert(cache->head == file); + cache->head = file->lru_next; + } + + if (file->lru_next) { + assert(cache->tail != file); + file->lru_next->lru_prev = file->lru_prev; + } else { + assert(cache->tail == file); + cache->tail = file->lru_prev; + } + + file->lru_prev = NULL; + file->lru_next = NULL; + ++cache->capacity; +} + +/** Mark a cache entry as recently used. */ +static void bftw_cache_use(struct bftw_cache *cache, struct bftw_file *file) { + bftw_cache_remove(cache, file); + bftw_cache_add(cache, file); +} + +/** Close a bftw_file. */ +static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) { + assert(file->fd >= 0); + + bftw_cache_remove(cache, file); + + xclose(file->fd); + file->fd = -1; +} + +/** Pop a directory from the cache. */ +static void bftw_cache_pop(struct bftw_cache *cache) { + assert(cache->tail); + bftw_file_close(cache, cache->tail); +} + +/** + * Shrink the cache, to recover from EMFILE. + * + * @param cache + * The cache in question. + * @param saved + * A bftw_file that must be preserved. + * @return + * 0 if successfully shrunk, otherwise -1. + */ +static int bftw_cache_shrink(struct bftw_cache *cache, const struct bftw_file *saved) { + struct bftw_file *file = cache->tail; + if (!file) { + return -1; + } + + if (file == saved) { + file = file->lru_prev; + if (!file) { + return -1; + } + } + + bftw_file_close(cache, file); + cache->capacity = 0; + return 0; +} + +/** Compute the name offset of a child path. */ +static size_t bftw_child_nameoff(const struct bftw_file *parent) { + size_t ret = parent->nameoff + parent->namelen; + if (parent->name[parent->namelen - 1] != '/') { + ++ret; + } + return ret; +} + +/** Create a new bftw_file. */ +static struct bftw_file *bftw_file_new(struct bftw_file *parent, const char *name) { + size_t namelen = strlen(name); + size_t size = BFS_FLEX_SIZEOF(struct bftw_file, name, namelen + 1); + + struct bftw_file *file = malloc(size); + if (!file) { + return NULL; + } + + file->parent = parent; + + if (parent) { + file->root = parent->root; + file->depth = parent->depth + 1; + file->nameoff = bftw_child_nameoff(parent); + ++parent->refcount; + } else { + file->root = file; + file->depth = 0; + file->nameoff = 0; + } + + file->next = NULL; + + file->lru_prev = NULL; + file->lru_next = NULL; + + file->refcount = 1; + file->fd = -1; + + file->type = BFS_UNKNOWN; + file->dev = -1; + file->ino = -1; + + file->namelen = namelen; + memcpy(file->name, name, namelen + 1); + + return file; +} + +/** + * Open a bftw_file relative to another one. + * + * @param cache + * The cache to hold the file. + * @param file + * The file to open. + * @param base + * The base directory for the relative path (may be NULL). + * @param at_fd + * The base file descriptor, AT_FDCWD if base == NULL. + * @param at_path + * The relative path to the file. + * @return + * The opened file descriptor, or negative on error. + */ +static int bftw_file_openat(struct bftw_cache *cache, struct bftw_file *file, struct bftw_file *base, const char *at_path) { + assert(file->fd < 0); + + int at_fd = AT_FDCWD; + if (base) { + bftw_cache_use(cache, base); + at_fd = base->fd; + } + + int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; + int fd = openat(at_fd, at_path, flags); + + if (fd < 0 && errno == EMFILE) { + if (bftw_cache_shrink(cache, base) == 0) { + fd = openat(at_fd, at_path, flags); + } + } + + if (fd >= 0) { + if (cache->capacity == 0) { + bftw_cache_pop(cache); + } + + file->fd = fd; + bftw_cache_add(cache, file); + } + + return fd; +} + +/** + * Open a bftw_file. + * + * @param cache + * The cache to hold the file. + * @param file + * The file to open. + * @param path + * The full path to the file. + * @return + * The opened file descriptor, or negative on error. + */ +static int bftw_file_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { + // Find the nearest open ancestor + struct bftw_file *base = file; + do { + base = base->parent; + } while (base && base->fd < 0); + + const char *at_path = path; + if (base) { + at_path += bftw_child_nameoff(base); + } + + int fd = bftw_file_openat(cache, file, base, at_path); + if (fd >= 0 || errno != ENAMETOOLONG) { + return fd; + } + + // Handle ENAMETOOLONG by manually traversing the path component-by-component + + // Use the ->next linked list to temporarily hold the reversed parent + // chain between base and file + struct bftw_file *cur; + for (cur = file; cur->parent != base; cur = cur->parent) { + cur->parent->next = cur; + } + + // Open the files in the chain one by one + for (base = cur; base; base = base->next) { + fd = bftw_file_openat(cache, base, base->parent, base->name); + if (fd < 0 || base == file) { + break; + } + } + + // Clear out the linked list + for (struct bftw_file *next = cur->next; cur != file; cur = next, next = next->next) { + cur->next = NULL; + } + + return fd; +} + +/** + * Open a bftw_file as a directory. + * + * @param cache + * The cache to hold the file. + * @param file + * The directory to open. + * @param path + * The full path to the directory. + * @return + * The opened directory, or NULL on error. + */ +static struct bfs_dir *bftw_file_opendir(struct bftw_cache *cache, struct bftw_file *file, const char *path) { + int fd = bftw_file_open(cache, file, path); + if (fd < 0) { + return NULL; + } + + return bfs_opendir(fd, NULL); +} + +/** Free a bftw_file. */ +static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { + assert(file->refcount == 0); + + if (file->fd >= 0) { + bftw_file_close(cache, file); + } + + free(file); +} + +/** + * A queue of bftw_file's to examine. + */ +struct bftw_queue { + /** The head of the queue. */ + struct bftw_file *head; + /** The insertion target. */ + struct bftw_file **target; +}; + +/** Initialize a bftw_queue. */ +static void bftw_queue_init(struct bftw_queue *queue) { + queue->head = NULL; + queue->target = &queue->head; +} + +/** Add a file to a bftw_queue. */ +static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { + assert(file->next == NULL); + + file->next = *queue->target; + *queue->target = file; + queue->target = &file->next; +} + +/** Pop the next file from the head of the queue. */ +static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { + struct bftw_file *file = queue->head; + queue->head = file->next; + file->next = NULL; + if (queue->target == &file->next) { + queue->target = &queue->head; + } + return file; +} + +/** The split phase of mergesort. */ +static struct bftw_file **bftw_sort_split(struct bftw_file **head, struct bftw_file **tail) { + struct bftw_file **tortoise = head, **hare = head; + + while (*hare != *tail) { + tortoise = &(*tortoise)->next; + hare = &(*hare)->next; + if (*hare != *tail) { + hare = &(*hare)->next; + } + } + + return tortoise; +} + +/** The merge phase of mergesort. */ +static struct bftw_file **bftw_sort_merge(struct bftw_file **head, struct bftw_file **mid, struct bftw_file **tail) { + struct bftw_file *left = *head, *right = *mid, *end = *tail; + *mid = NULL; + *tail = NULL; + + while (left || right) { + struct bftw_file *next; + if (left && (!right || strcoll(left->name, right->name) <= 0)) { + next = left; + left = left->next; + } else { + next = right; + right = right->next; + } + + *head = next; + head = &next->next; + } + + *head = end; + return head; +} + +/** + * Sort a (sub-)list of files. + * + * @param head + * The head of the (sub-)list to sort. + * @param tail + * The tail of the (sub-)list to sort. + * @return + * The new tail of the (sub-)list. + */ +static struct bftw_file **bftw_sort_files(struct bftw_file **head, struct bftw_file **tail) { + struct bftw_file **mid = bftw_sort_split(head, tail); + if (*mid == *head || *mid == *tail) { + return tail; + } + + mid = bftw_sort_files(head, mid); + tail = bftw_sort_files(mid, tail); + + return bftw_sort_merge(head, mid, tail); +} + +/** + * Holds the current state of the bftw() traversal. + */ +struct bftw_state { + /** bftw() callback. */ + bftw_callback *callback; + /** bftw() callback data. */ + void *ptr; + /** bftw() flags. */ + enum bftw_flags flags; + /** Search strategy. */ + enum bftw_strategy strategy; + /** The mount table. */ + const struct bfs_mtab *mtab; + + /** The appropriate errno value, if any. */ + int error; + + /** The cache of open directories. */ + struct bftw_cache cache; + /** The queue of directories left to explore. */ + struct bftw_queue queue; + /** The start of the current batch of files. */ + struct bftw_file **batch; + + /** The current path. */ + char *path; + /** The current file. */ + struct bftw_file *file; + /** The previous file. */ + struct bftw_file *previous; + + /** The currently open directory. */ + struct bfs_dir *dir; + /** The current directory entry. */ + struct bfs_dirent *de; + /** Storage for the directory entry. */ + struct bfs_dirent de_storage; + /** Any error encountered while reading the directory. */ + int direrror; + + /** Extra data about the current file. */ + struct BFTW ftwbuf; +}; + +/** + * Initialize the bftw() state. + */ +static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { + state->callback = args->callback; + state->ptr = args->ptr; + state->flags = args->flags; + state->strategy = args->strategy; + state->mtab = args->mtab; + + state->error = 0; + + if (args->nopenfd < 1) { + errno = EMFILE; + return -1; + } + + state->path = dstralloc(0); + if (!state->path) { + return -1; + } + + bftw_cache_init(&state->cache, args->nopenfd); + bftw_queue_init(&state->queue); + state->batch = NULL; + + state->file = NULL; + state->previous = NULL; + + state->dir = NULL; + state->de = NULL; + state->direrror = 0; + + return 0; +} + +/** Cached bfs_stat(). */ +static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, struct bftw_stat *cache, enum bfs_stat_flags flags) { + if (!cache->buf) { + if (cache->error) { + errno = cache->error; + } else if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, &cache->storage) == 0) { + cache->buf = &cache->storage; + } else { + cache->error = errno; + } + } + + return cache->buf; +} + +const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + const struct bfs_stat *ret; + + if (flags & BFS_STAT_NOFOLLOW) { + ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); + if (ret && !S_ISLNK(ret->mode) && !mutbuf->stat_cache.buf) { + // Non-link, so share stat info + mutbuf->stat_cache.buf = ret; + } + } else { + ret = bftw_stat_impl(mutbuf, &mutbuf->stat_cache, BFS_STAT_FOLLOW); + if (!ret && (flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(errno)) { + ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); + } + } + + return ret; +} + +const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + if (flags & BFS_STAT_NOFOLLOW) { + return ftwbuf->lstat_cache.buf; + } else if (ftwbuf->stat_cache.buf) { + return ftwbuf->stat_cache.buf; + } else if ((flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(ftwbuf->stat_cache.error)) { + return ftwbuf->lstat_cache.buf; + } else { + return NULL; + } +} + +enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + if (flags & BFS_STAT_NOFOLLOW) { + if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + return ftwbuf->type; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { + return ftwbuf->type; + } + } else { + if (ftwbuf->type != BFS_LNK) { + return ftwbuf->type; + } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { + return BFS_ERROR; + } + } + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); + if (statbuf) { + return bfs_mode_to_type(statbuf->mode); + } else { + return BFS_ERROR; + } +} + +/** + * Update the path for the current file. + */ +static int bftw_update_path(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + size_t length = file ? file->nameoff + file->namelen : 0; + + assert(dstrlen(state->path) >= length); + dstresize(&state->path, length); + + if (name) { + if (length > 0 && state->path[length - 1] != '/') { + if (dstrapp(&state->path, '/') != 0) { + return -1; + } + } + if (dstrcat(&state->path, name) != 0) { + return -1; + } + } + + return 0; +} + +/** Check if a stat() call is needed for this visit. */ +static bool bftw_need_stat(const struct bftw_state *state) { + if (state->flags & BFTW_STAT) { + return true; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + if (ftwbuf->type == BFS_UNKNOWN) { + return true; + } + + if (ftwbuf->type == BFS_LNK && !(ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + return true; + } + + if (ftwbuf->type == BFS_DIR) { + if (state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) { + return true; + } +#if __linux__ + } else if (state->mtab) { + // Linux fills in d_type from the underlying inode, even when + // the directory entry is a bind mount point. In that case, we + // need to stat() to get the correct type. We don't need to + // check for directories because they can only be mounted over + // by other directories. + if (bfs_might_be_mount(state->mtab, ftwbuf->path)) { + return true; + } +#endif + } + + return false; +} + +/** Initialize bftw_stat cache. */ +static void bftw_stat_init(struct bftw_stat *cache) { + cache->buf = NULL; + cache->error = 0; +} + +/** + * Open a file if necessary. + * + * @param file + * The file to open. + * @param path + * The path to that file or one of its descendants. + * @return + * The opened file descriptor, or -1 on error. + */ +static int bftw_ensure_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { + int ret = file->fd; + + if (ret < 0) { + char *copy = strndup(path, file->nameoff + file->namelen); + if (!copy) { + return -1; + } + + ret = bftw_file_open(cache, file, copy); + free(copy); + } + + return ret; +} + +/** + * Initialize the buffers with data about the current path. + */ +static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { + struct bftw_file *file = state->file; + const struct bfs_dirent *de = state->de; + + struct BFTW *ftwbuf = &state->ftwbuf; + ftwbuf->path = state->path; + ftwbuf->root = file ? file->root->name : ftwbuf->path; + ftwbuf->depth = 0; + ftwbuf->visit = visit; + ftwbuf->type = BFS_UNKNOWN; + ftwbuf->error = state->direrror; + ftwbuf->at_fd = AT_FDCWD; + ftwbuf->at_path = ftwbuf->path; + ftwbuf->stat_flags = BFS_STAT_NOFOLLOW; + bftw_stat_init(&ftwbuf->lstat_cache); + bftw_stat_init(&ftwbuf->stat_cache); + + struct bftw_file *parent = NULL; + if (de) { + parent = file; + ftwbuf->depth = file->depth + 1; + ftwbuf->type = de->type; + ftwbuf->nameoff = bftw_child_nameoff(file); + } else if (file) { + parent = file->parent; + ftwbuf->depth = file->depth; + ftwbuf->type = file->type; + ftwbuf->nameoff = file->nameoff; + } + + if (parent) { + // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG + if (bftw_ensure_open(&state->cache, parent, state->path) >= 0) { + ftwbuf->at_fd = parent->fd; + ftwbuf->at_path += ftwbuf->nameoff; + } else { + ftwbuf->error = errno; + } + } + + if (ftwbuf->depth == 0) { + // Compute the name offset for root paths like "foo/bar" + ftwbuf->nameoff = xbasename(ftwbuf->path) - ftwbuf->path; + } + + if (ftwbuf->error != 0) { + ftwbuf->type = BFS_ERROR; + return; + } + + int follow_flags = BFTW_FOLLOW_ALL; + if (ftwbuf->depth == 0) { + follow_flags |= BFTW_FOLLOW_ROOTS; + } + bool follow = state->flags & follow_flags; + if (follow) { + ftwbuf->stat_flags = BFS_STAT_TRYFOLLOW; + } + + const struct bfs_stat *statbuf = NULL; + if (bftw_need_stat(state)) { + statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (statbuf) { + ftwbuf->type = bfs_mode_to_type(statbuf->mode); + } else { + ftwbuf->type = BFS_ERROR; + ftwbuf->error = errno; + return; + } + } + + if (ftwbuf->type == BFS_DIR && (state->flags & BFTW_DETECT_CYCLES)) { + for (const struct bftw_file *ancestor = parent; ancestor; ancestor = ancestor->parent) { + if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) { + ftwbuf->type = BFS_ERROR; + ftwbuf->error = ELOOP; + return; + } + } + } +} + +/** Check if the current file is a mount point. */ +static bool bftw_is_mount(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + if (!file) { + return false; + } + + const struct bftw_file *parent = name ? file : file->parent; + if (!parent) { + return false; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + return statbuf && statbuf->dev != parent->dev; +} + +/** Fill file identity information from an ftwbuf. */ +static void bftw_fill_id(struct bftw_file *file, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; + if (!statbuf || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + statbuf = ftwbuf->lstat_cache.buf; + } + if (statbuf) { + file->dev = statbuf->dev; + file->ino = statbuf->ino; + } +} + +/** + * Visit a path, invoking the callback. + */ +static enum bftw_action bftw_visit(struct bftw_state *state, const char *name, enum bftw_visit visit) { + if (bftw_update_path(state, name) != 0) { + state->error = errno; + return BFTW_STOP; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + bftw_init_ftwbuf(state, visit); + + // Never give the callback BFS_ERROR unless BFTW_RECOVER is specified + if (ftwbuf->type == BFS_ERROR && !(state->flags & BFTW_RECOVER)) { + state->error = ftwbuf->error; + return BFTW_STOP; + } + + if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { + return BFTW_PRUNE; + } + + enum bftw_action ret = state->callback(ftwbuf, state->ptr); + switch (ret) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + case BFTW_STOP: + goto done; + default: + state->error = EINVAL; + return BFTW_STOP; + } + + if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { + ret = BFTW_PRUNE; + goto done; + } + + if ((state->flags & BFTW_PRUNE_MOUNTS) && bftw_is_mount(state, name)) { + ret = BFTW_PRUNE; + goto done; + } + +done: + if (state->file && !name) { + bftw_fill_id(state->file, ftwbuf); + } + + return ret; +} + +/** + * Push a new file onto the queue. + */ +static int bftw_push(struct bftw_state *state, const char *name, bool fill_id) { + struct bftw_file *parent = state->file; + struct bftw_file *file = bftw_file_new(parent, name); + if (!file) { + state->error = errno; + return -1; + } + + if (state->de) { + file->type = state->de->type; + } + + if (fill_id) { + bftw_fill_id(file, &state->ftwbuf); + } + + bftw_queue_push(&state->queue, file); + + return 0; +} + +/** + * Build the path to the current file. + */ +static int bftw_build_path(struct bftw_state *state) { + const struct bftw_file *file = state->file; + + size_t pathlen = file->nameoff + file->namelen; + if (dstresize(&state->path, pathlen) != 0) { + state->error = errno; + return -1; + } + + // Try to find a common ancestor with the existing path + const struct bftw_file *ancestor = state->previous; + while (ancestor && ancestor->depth > file->depth) { + ancestor = ancestor->parent; + } + + // Build the path backwards + while (file && file != ancestor) { + if (file->nameoff > 0) { + state->path[file->nameoff - 1] = '/'; + } + memcpy(state->path + file->nameoff, file->name, file->namelen); + + if (ancestor && ancestor->depth == file->depth) { + ancestor = ancestor->parent; + } + file = file->parent; + } + + state->previous = state->file; + return 0; +} + +/** + * Pop the next file from the queue. + */ +static int bftw_pop(struct bftw_state *state) { + if (!state->queue.head) { + return 0; + } + + state->file = bftw_queue_pop(&state->queue); + + if (bftw_build_path(state) != 0) { + return -1; + } + + return 1; +} + +/** + * Open the current directory. + */ +static void bftw_opendir(struct bftw_state *state) { + assert(!state->dir); + assert(!state->de); + + state->direrror = 0; + + state->dir = bftw_file_opendir(&state->cache, state->file, state->path); + if (!state->dir) { + state->direrror = errno; + } +} + +/** + * Read an entry from the current directory. + */ +static int bftw_readdir(struct bftw_state *state) { + if (!state->dir) { + return -1; + } + + int ret = bfs_readdir(state->dir, &state->de_storage); + if (ret > 0) { + state->de = &state->de_storage; + } else if (ret == 0) { + state->de = NULL; + } else { + state->de = NULL; + state->direrror = errno; + } + + return ret; +} + +/** + * Flags controlling which files get visited when done with a directory. + */ +enum bftw_gc_flags { + /** Don't visit anything. */ + BFTW_VISIT_NONE = 0, + /** Visit the file itself. */ + BFTW_VISIT_FILE = 1 << 0, + /** Visit the file's ancestors. */ + BFTW_VISIT_PARENTS = 1 << 1, + /** Visit both the file and its ancestors. */ + BFTW_VISIT_ALL = BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, +}; + +/** + * Close the current directory. + */ +static enum bftw_action bftw_closedir(struct bftw_state *state, enum bftw_gc_flags flags) { + struct bftw_file *file = state->file; + enum bftw_action ret = BFTW_CONTINUE; + + if (state->dir) { + assert(file->fd >= 0); + + if (file->refcount > 1) { + // Keep the fd around if any subdirectories exist + file->fd = bfs_freedir(state->dir); + } else { + bfs_closedir(state->dir); + file->fd = -1; + } + + if (file->fd < 0) { + bftw_cache_remove(&state->cache, file); + } + } + + state->de = NULL; + state->dir = NULL; + + if (state->direrror != 0) { + if (flags & BFTW_VISIT_FILE) { + ret = bftw_visit(state, NULL, BFTW_PRE); + } else { + state->error = state->direrror; + } + state->direrror = 0; + } + + return ret; +} + +/** + * Finalize and free a file we're done with. + */ +static enum bftw_action bftw_gc_file(struct bftw_state *state, enum bftw_gc_flags flags) { + enum bftw_action ret = BFTW_CONTINUE; + + if (!(state->flags & BFTW_POST_ORDER)) { + flags = 0; + } + bool visit = flags & BFTW_VISIT_FILE; + + while (state->file) { + struct bftw_file *file = state->file; + if (--file->refcount > 0) { + state->file = NULL; + break; + } + + if (visit && bftw_visit(state, NULL, BFTW_POST) == BFTW_STOP) { + ret = BFTW_STOP; + flags &= ~BFTW_VISIT_PARENTS; + } + visit = flags & BFTW_VISIT_PARENTS; + + struct bftw_file *parent = file->parent; + if (state->previous == file) { + state->previous = parent; + } + bftw_file_free(&state->cache, file); + state->file = parent; + } + + return ret; +} + +/** + * Drain all the entries from a bftw_queue. + */ +static void bftw_drain_queue(struct bftw_state *state, struct bftw_queue *queue) { + while (queue->head) { + state->file = bftw_queue_pop(queue); + bftw_gc_file(state, BFTW_VISIT_NONE); + } +} + +/** + * Dispose of the bftw() state. + * + * @return + * The bftw() return value. + */ +static int bftw_state_destroy(struct bftw_state *state) { + dstrfree(state->path); + + bftw_closedir(state, BFTW_VISIT_NONE); + + bftw_gc_file(state, BFTW_VISIT_NONE); + bftw_drain_queue(state, &state->queue); + + bftw_cache_destroy(&state->cache); + + errno = state->error; + return state->error ? -1 : 0; +} + +/** Start a batch of files. */ +static void bftw_batch_start(struct bftw_state *state) { + if (state->strategy == BFTW_DFS) { + state->queue.target = &state->queue.head; + } + state->batch = state->queue.target; +} + +/** Finish adding a batch of files. */ +static void bftw_batch_finish(struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + state->queue.target = bftw_sort_files(state->batch, state->queue.target); + } +} + +/** + * Streaming mode: visit files as they are encountered. + */ +static int bftw_stream(const struct bftw_args *args) { + struct bftw_state state; + if (bftw_state_init(&state, args) != 0) { + return -1; + } + + assert(!(state.flags & (BFTW_SORT | BFTW_BUFFER))); + + bftw_batch_start(&state); + for (size_t i = 0; i < args->npaths; ++i) { + const char *path = args->paths[i]; + + switch (bftw_visit(&state, path, BFTW_PRE)) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + continue; + case BFTW_STOP: + goto done; + } + + if (bftw_push(&state, path, true) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + while (bftw_pop(&state) > 0) { + bftw_opendir(&state); + + bftw_batch_start(&state); + while (bftw_readdir(&state) > 0) { + const char *name = state.de->name; + + switch (bftw_visit(&state, name, BFTW_PRE)) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + continue; + case BFTW_STOP: + goto done; + } + + if (bftw_push(&state, name, true) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + if (bftw_closedir(&state, BFTW_VISIT_ALL) == BFTW_STOP) { + goto done; + } + if (bftw_gc_file(&state, BFTW_VISIT_ALL) == BFTW_STOP) { + goto done; + } + } + +done: + return bftw_state_destroy(&state); +} + +/** + * Batching mode: queue up all children before visiting them. + */ +static int bftw_batch(const struct bftw_args *args) { + struct bftw_state state; + if (bftw_state_init(&state, args) != 0) { + return -1; + } + + bftw_batch_start(&state); + for (size_t i = 0; i < args->npaths; ++i) { + if (bftw_push(&state, args->paths[i], false) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + while (bftw_pop(&state) > 0) { + enum bftw_gc_flags gcflags = BFTW_VISIT_ALL; + + switch (bftw_visit(&state, NULL, BFTW_PRE)) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + gcflags &= ~BFTW_VISIT_FILE; + goto next; + case BFTW_STOP: + goto done; + } + + bftw_opendir(&state); + + bftw_batch_start(&state); + while (bftw_readdir(&state) > 0) { + if (bftw_push(&state, state.de->name, false) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + if (bftw_closedir(&state, gcflags) == BFTW_STOP) { + goto done; + } + + next: + if (bftw_gc_file(&state, gcflags) == BFTW_STOP) { + goto done; + } + } + +done: + return bftw_state_destroy(&state); +} + +/** Select bftw_stream() or bftw_batch() appropriately. */ +static int bftw_auto(const struct bftw_args *args) { + if (args->flags & (BFTW_SORT | BFTW_BUFFER)) { + return bftw_batch(args); + } else { + return bftw_stream(args); + } +} + +/** + * Iterative deepening search state. + */ +struct bftw_ids_state { + /** The wrapped callback. */ + bftw_callback *delegate; + /** The wrapped callback arguments. */ + void *ptr; + /** Which visit this search corresponds to. */ + enum bftw_visit visit; + /** Whether to override the bftw_visit. */ + bool force_visit; + /** The current minimum depth (inclusive). */ + size_t min_depth; + /** The current maximum depth (exclusive). */ + size_t max_depth; + /** The set of pruned paths. */ + struct trie pruned; + /** An error code to report. */ + int error; + /** Whether the bottom has been found. */ + bool bottom; + /** Whether to quit the search. */ + bool quit; +}; + +/** Iterative deepening callback function. */ +static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) { + struct bftw_ids_state *state = ptr; + + if (state->force_visit) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + mutbuf->visit = state->visit; + } + + if (ftwbuf->type == BFS_ERROR) { + if (ftwbuf->depth + 1 >= state->min_depth) { + return state->delegate(ftwbuf, state->ptr); + } else { + return BFTW_PRUNE; + } + } + + if (ftwbuf->depth < state->min_depth) { + if (trie_find_str(&state->pruned, ftwbuf->path)) { + return BFTW_PRUNE; + } else { + return BFTW_CONTINUE; + } + } else if (state->visit == BFTW_POST) { + if (trie_find_str(&state->pruned, ftwbuf->path)) { + return BFTW_PRUNE; + } + } + + enum bftw_action ret = BFTW_CONTINUE; + if (ftwbuf->visit == state->visit) { + ret = state->delegate(ftwbuf, state->ptr); + } + + switch (ret) { + case BFTW_CONTINUE: + if (ftwbuf->type == BFS_DIR && ftwbuf->depth + 1 >= state->max_depth) { + state->bottom = false; + ret = BFTW_PRUNE; + } + break; + case BFTW_PRUNE: + if (ftwbuf->type == BFS_DIR) { + if (!trie_insert_str(&state->pruned, ftwbuf->path)) { + state->error = errno; + state->quit = true; + ret = BFTW_STOP; + } + } + break; + case BFTW_STOP: + state->quit = true; + break; + } + + return ret; +} + +/** Initialize iterative deepening state. */ +static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *state, struct bftw_args *ids_args) { + state->delegate = args->callback; + state->ptr = args->ptr; + state->visit = BFTW_PRE; + state->force_visit = false; + state->min_depth = 0; + state->max_depth = 1; + trie_init(&state->pruned); + state->error = 0; + state->bottom = false; + state->quit = false; + + *ids_args = *args; + ids_args->callback = bftw_ids_callback; + ids_args->ptr = state; + ids_args->flags &= ~BFTW_POST_ORDER; + ids_args->strategy = BFTW_DFS; +} + +/** Finish an iterative deepening search. */ +static int bftw_ids_finish(struct bftw_ids_state *state) { + int ret = 0; + + if (state->error) { + ret = -1; + } else { + state->error = errno; + } + + trie_destroy(&state->pruned); + + errno = state->error; + return ret; +} + +/** + * Iterative deepening bftw() wrapper. + */ +static int bftw_ids(const struct bftw_args *args) { + struct bftw_ids_state state; + struct bftw_args ids_args; + bftw_ids_init(args, &state, &ids_args); + + while (!state.quit && !state.bottom) { + state.bottom = true; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + state.quit = true; + } + + ++state.min_depth; + ++state.max_depth; + } + + if (args->flags & BFTW_POST_ORDER) { + state.visit = BFTW_POST; + state.force_visit = true; + + while (!state.quit && state.min_depth > 0) { + --state.max_depth; + --state.min_depth; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + state.quit = true; + } + } + } + + return bftw_ids_finish(&state); +} + +/** + * Exponential deepening bftw() wrapper. + */ +static int bftw_eds(const struct bftw_args *args) { + struct bftw_ids_state state; + struct bftw_args ids_args; + bftw_ids_init(args, &state, &ids_args); + + while (!state.quit && !state.bottom) { + state.bottom = true; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + state.quit = true; + } + + state.min_depth = state.max_depth; + state.max_depth *= 2; + } + + if (!state.quit && (args->flags & BFTW_POST_ORDER)) { + state.visit = BFTW_POST; + state.min_depth = 0; + ids_args.flags |= BFTW_POST_ORDER; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + } + } + + return bftw_ids_finish(&state); +} + +int bftw(const struct bftw_args *args) { + switch (args->strategy) { + case BFTW_BFS: + return bftw_auto(args); + case BFTW_DFS: + return bftw_batch(args); + case BFTW_IDS: + return bftw_ids(args); + case BFTW_EDS: + return bftw_eds(args); + } + + errno = EINVAL; + return -1; +} diff --git a/src/bftw.h b/src/bftw.h new file mode 100644 index 0000000..c458e1b --- /dev/null +++ b/src/bftw.h @@ -0,0 +1,223 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2021 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A file-walking API based on nftw(). + */ + +#ifndef BFS_BFTW_H +#define BFS_BFTW_H + +#include "dir.h" +#include "stat.h" +#include <stddef.h> + +/** + * Possible visit occurrences. + */ +enum bftw_visit { + /** Pre-order visit. */ + BFTW_PRE, + /** Post-order visit. */ + BFTW_POST, +}; + +/** + * Cached bfs_stat() info for a file. + */ +struct bftw_stat { + /** A pointer to the bfs_stat() buffer, if available. */ + const struct bfs_stat *buf; + /** Storage for the bfs_stat() buffer, if needed. */ + struct bfs_stat storage; + /** The cached error code, if any. */ + int error; +}; + +/** + * Data about the current file for the bftw() callback. + */ +struct BFTW { + /** The path to the file. */ + const char *path; + /** The string offset of the filename. */ + size_t nameoff; + + /** The root path passed to bftw(). */ + const char *root; + /** The depth of this file in the traversal. */ + size_t depth; + /** Which visit this is. */ + enum bftw_visit visit; + + /** The file type. */ + enum bfs_type type; + /** The errno that occurred, if type == BFTW_ERROR. */ + int error; + + /** A parent file descriptor for the *at() family of calls. */ + int at_fd; + /** The path relative to at_fd for the *at() family of calls. */ + const char *at_path; + + /** Flags for bfs_stat(). */ + enum bfs_stat_flags stat_flags; + /** Cached bfs_stat() info for BFS_STAT_NOFOLLOW. */ + struct bftw_stat lstat_cache; + /** Cached bfs_stat() info for BFS_STAT_FOLLOW. */ + struct bftw_stat stat_cache; +}; + +/** + * Get bfs_stat() info for a file encountered during bftw(), caching the result + * whenever possible. + * + * @param ftwbuf + * bftw() data for the file to stat. + * @param flags + * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. + * @return + * A pointer to a bfs_stat() buffer, or NULL if the call failed. + */ +const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); + +/** + * Get bfs_stat() info for a file encountered during bftw(), if it has already + * been cached. + * + * @param ftwbuf + * bftw() data for the file to stat. + * @param flags + * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. + * @return + * A pointer to a bfs_stat() buffer, or NULL if no stat info is cached. + */ +const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); + +/** + * Get the type of a file encountered during bftw(), with flags controlling + * whether to follow links. This function will avoid calling bfs_stat() if + * possible. + * + * @param ftwbuf + * bftw() data for the file to check. + * @param flags + * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. + * @return + * The type of the file, or BFTW_ERROR if an error occurred. + */ +enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); + +/** + * Walk actions returned by the bftw() callback. + */ +enum bftw_action { + /** Keep walking. */ + BFTW_CONTINUE, + /** Skip this path's children. */ + BFTW_PRUNE, + /** Stop walking. */ + BFTW_STOP, +}; + +/** + * Callback function type for bftw(). + * + * @param ftwbuf + * Data about the current file. + * @param ptr + * The pointer passed to bftw(). + * @return + * An action value. + */ +typedef enum bftw_action bftw_callback(const struct BFTW *ftwbuf, void *ptr); + +/** + * Flags that control bftw() behavior. + */ +enum bftw_flags { + /** stat() each encountered file. */ + BFTW_STAT = 1 << 0, + /** Attempt to recover from encountered errors. */ + BFTW_RECOVER = 1 << 1, + /** Visit directories in post-order as well as pre-order. */ + BFTW_POST_ORDER = 1 << 2, + /** If the initial path is a symbolic link, follow it. */ + BFTW_FOLLOW_ROOTS = 1 << 3, + /** Follow all symbolic links. */ + BFTW_FOLLOW_ALL = 1 << 4, + /** Detect directory cycles. */ + BFTW_DETECT_CYCLES = 1 << 5, + /** Skip mount points and their descendents. */ + BFTW_SKIP_MOUNTS = 1 << 6, + /** Skip the descendents of mount points. */ + BFTW_PRUNE_MOUNTS = 1 << 7, + /** Sort directory entries before processing them. */ + BFTW_SORT = 1 << 8, + /** Read each directory into memory before processing its children. */ + BFTW_BUFFER = 1 << 9, +}; + +/** + * Tree search strategies for bftw(). + */ +enum bftw_strategy { + /** Breadth-first search. */ + BFTW_BFS, + /** Depth-first search. */ + BFTW_DFS, + /** Iterative deepening search. */ + BFTW_IDS, + /** Exponential deepening search. */ + BFTW_EDS, +}; + +/** + * Structure for holding the arguments passed to bftw(). + */ +struct bftw_args { + /** The path(s) to start from. */ + const char **paths; + /** The number of starting paths. */ + size_t npaths; + /** The callback to invoke. */ + bftw_callback *callback; + /** A pointer which is passed to the callback. */ + void *ptr; + /** The maximum number of file descriptors to keep open. */ + int nopenfd; + /** Flags that control bftw() behaviour. */ + enum bftw_flags flags; + /** The search strategy to use. */ + enum bftw_strategy strategy; + /** The parsed mount table, if available. */ + const struct bfs_mtab *mtab; +}; + +/** + * Breadth First Tree Walk (or Better File Tree Walk). + * + * Like ftw(3) and nftw(3), this function walks a directory tree recursively, + * and invokes a callback for each path it encounters. + * + * @param args + * The arguments that control the walk. + * @return + * 0 on success, or -1 on failure. + */ +int bftw(const struct bftw_args *args); + +#endif // BFS_BFTW_H diff --git a/src/color.c b/src/color.c new file mode 100644 index 0000000..9e267da --- /dev/null +++ b/src/color.c @@ -0,0 +1,1125 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "color.h" +#include "bftw.h" +#include "dir.h" +#include "dstring.h" +#include "expr.h" +#include "fsade.h" +#include "stat.h" +#include "trie.h" +#include "util.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +struct colors { + char *reset; + char *leftcode; + char *rightcode; + char *endcode; + char *clear_to_eol; + + char *bold; + char *gray; + char *red; + char *green; + char *yellow; + char *blue; + char *magenta; + char *cyan; + char *white; + + char *warning; + char *error; + + char *normal; + + char *file; + char *multi_hard; + char *executable; + char *capable; + char *setgid; + char *setuid; + + char *directory; + char *sticky; + char *other_writable; + char *sticky_other_writable; + + char *link; + char *orphan; + char *missing; + bool link_as_target; + + char *blockdev; + char *chardev; + char *door; + char *pipe; + char *socket; + + /** A mapping from color names (fi, di, ln, etc.) to struct fields. */ + struct trie names; + + /** A mapping from file extensions to colors. */ + struct trie ext_colors; +}; + +/** Initialize a color in the table. */ +static int init_color(struct colors *colors, const char *name, const char *value, char **field) { + if (value) { + *field = dstrdup(value); + if (!*field) { + return -1; + } + } else { + *field = NULL; + } + + struct trie_leaf *leaf = trie_insert_str(&colors->names, name); + if (leaf) { + leaf->value = field; + return 0; + } else { + return -1; + } +} + +/** Get a color from the table. */ +static char **get_color(const struct colors *colors, const char *name) { + const struct trie_leaf *leaf = trie_find_str(&colors->names, name); + if (leaf) { + return (char **)leaf->value; + } else { + return NULL; + } +} + +/** Set the value of a color. */ +static int set_color(struct colors *colors, const char *name, char *value) { + char **color = get_color(colors, name); + if (color) { + dstrfree(*color); + *color = value; + return 0; + } else { + return -1; + } +} + +/** + * Transform a file extension for fast lookups, by reversing and lowercasing it. + */ +static void extxfrm(char *ext) { + size_t len = strlen(ext); + for (size_t i = 0; i < len - i; ++i) { + char a = ext[i]; + char b = ext[len - i - 1]; + + // What's internationalization? Doesn't matter, this is what + // GNU ls does. Luckily, since there's no standard C way to + // casefold. Not using tolower() here since it respects the + // current locale, which GNU ls doesn't do. + if (a >= 'A' && a <= 'Z') { + a += 'a' - 'A'; + } + if (b >= 'A' && b <= 'Z') { + b += 'a' - 'A'; + } + + ext[i] = b; + ext[len - i - 1] = a; + } +} + +/** + * Set the color for an extension. + */ +static int set_ext_color(struct colors *colors, char *key, const char *value) { + extxfrm(key); + + // A later *.x should override any earlier *.x, *.y.x, etc. + struct trie_leaf *match; + while ((match = trie_find_postfix(&colors->ext_colors, key))) { + dstrfree(match->value); + trie_remove(&colors->ext_colors, match); + } + + struct trie_leaf *leaf = trie_insert_str(&colors->ext_colors, key); + if (leaf) { + leaf->value = (char *)value; + return 0; + } else { + return -1; + } +} + +/** + * Find a color by an extension. + */ +static const char *get_ext_color(const struct colors *colors, const char *filename) { + char *xfrm = strdup(filename); + if (!xfrm) { + return NULL; + } + extxfrm(xfrm); + + const struct trie_leaf *leaf = trie_find_prefix(&colors->ext_colors, xfrm); + free(xfrm); + if (leaf) { + return leaf->value; + } else { + return NULL; + } +} + +/** + * Parse a chunk of $LS_COLORS that may have escape sequences. The supported + * escapes are: + * + * \a, \b, \f, \n, \r, \t, \v: + * As in C + * \e: + * ESC (\033) + * \?: + * DEL (\177) + * \_: + * ' ' (space) + * \NNN: + * Octal + * \xNN: + * Hex + * ^C: + * Control character. + * + * See man dir_colors. + * + * @param value + * The value to parse. + * @param end + * The character that marks the end of the chunk. + * @param[out] next + * Will be set to the next chunk. + * @return + * The parsed chunk as a dstring. + */ +static char *unescape(const char *value, char end, const char **next) { + if (!value) { + goto fail; + } + + char *str = dstralloc(0); + if (!str) { + goto fail_str; + } + + const char *i; + for (i = value; *i && *i != end; ++i) { + unsigned char c = 0; + + switch (*i) { + case '\\': + switch (*++i) { + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'e': + c = '\033'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '?': + c = '\177'; + break; + case '_': + c = ' '; + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + while (i[1] >= '0' && i[1] <= '7') { + c <<= 3; + c |= *i++ - '0'; + } + c <<= 3; + c |= *i - '0'; + break; + + case 'X': + case 'x': + while (true) { + if (i[1] >= '0' && i[1] <= '9') { + c <<= 4; + c |= i[1] - '0'; + } else if (i[1] >= 'A' && i[1] <= 'F') { + c <<= 4; + c |= i[1] - 'A' + 0xA; + } else if (i[1] >= 'a' && i[1] <= 'f') { + c <<= 4; + c |= i[1] - 'a' + 0xA; + } else { + break; + } + ++i; + } + break; + + case '\0': + goto fail_str; + + default: + c = *i; + break; + } + break; + + case '^': + switch (*++i) { + case '?': + c = '\177'; + break; + case '\0': + goto fail_str; + default: + // CTRL masks bits 6 and 7 + c = *i & 0x1F; + break; + } + break; + + default: + c = *i; + break; + } + + if (dstrapp(&str, c) != 0) { + goto fail_str; + } + } + + if (*i) { + *next = i + 1; + } else { + *next = NULL; + } + + return str; + +fail_str: + dstrfree(str); +fail: + *next = NULL; + return NULL; +} + +/** Parse the GNU $LS_COLORS format. */ +static void parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) { + for (const char *chunk = ls_colors, *next; chunk; chunk = next) { + if (chunk[0] == '*') { + char *key = unescape(chunk + 1, '=', &next); + if (!key) { + continue; + } + + char *value = unescape(next, ':', &next); + if (value) { + if (set_ext_color(colors, key, value) != 0) { + dstrfree(value); + } + } + + dstrfree(key); + } else { + const char *equals = strchr(chunk, '='); + if (!equals) { + break; + } + + char *value = unescape(equals + 1, ':', &next); + if (!value) { + continue; + } + + char *key = strndup(chunk, equals - chunk); + if (!key) { + dstrfree(value); + continue; + } + + // All-zero values should be treated like NULL, to fall + // back on any other relevant coloring for that file + if (strspn(value, "0") == strlen(value) + && strcmp(key, "rs") != 0 + && strcmp(key, "lc") != 0 + && strcmp(key, "rc") != 0 + && strcmp(key, "ec") != 0) { + dstrfree(value); + value = NULL; + } + + if (set_color(colors, key, value) != 0) { + dstrfree(value); + } + free(key); + } + } + + if (colors->link && strcmp(colors->link, "target") == 0) { + colors->link_as_target = true; + dstrfree(colors->link); + colors->link = NULL; + } +} + +struct colors *parse_colors() { + struct colors *colors = malloc(sizeof(struct colors)); + if (!colors) { + return NULL; + } + + trie_init(&colors->names); + trie_init(&colors->ext_colors); + + int ret = 0; + + // From man console_codes + + ret |= init_color(colors, "rs", "0", &colors->reset); + ret |= init_color(colors, "lc", "\033[", &colors->leftcode); + ret |= init_color(colors, "rc", "m", &colors->rightcode); + ret |= init_color(colors, "ec", NULL, &colors->endcode); + ret |= init_color(colors, "cl", "\033[K", &colors->clear_to_eol); + + ret |= init_color(colors, "bld", "01;39", &colors->bold); + ret |= init_color(colors, "gry", "01;30", &colors->gray); + ret |= init_color(colors, "red", "01;31", &colors->red); + ret |= init_color(colors, "grn", "01;32", &colors->green); + ret |= init_color(colors, "ylw", "01;33", &colors->yellow); + ret |= init_color(colors, "blu", "01;34", &colors->blue); + ret |= init_color(colors, "mag", "01;35", &colors->magenta); + ret |= init_color(colors, "cyn", "01;36", &colors->cyan); + ret |= init_color(colors, "wht", "01;37", &colors->white); + + ret |= init_color(colors, "wrn", "01;33", &colors->warning); + ret |= init_color(colors, "err", "01;31", &colors->error); + + // Defaults from man dir_colors + + ret |= init_color(colors, "no", NULL, &colors->normal); + + ret |= init_color(colors, "fi", NULL, &colors->file); + ret |= init_color(colors, "mh", NULL, &colors->multi_hard); + ret |= init_color(colors, "ex", "01;32", &colors->executable); + ret |= init_color(colors, "ca", "30;41", &colors->capable); + ret |= init_color(colors, "sg", "30;43", &colors->setgid); + ret |= init_color(colors, "su", "37;41", &colors->setuid); + + ret |= init_color(colors, "di", "01;34", &colors->directory); + ret |= init_color(colors, "st", "37;44", &colors->sticky); + ret |= init_color(colors, "ow", "34;42", &colors->other_writable); + ret |= init_color(colors, "tw", "30;42", &colors->sticky_other_writable); + + ret |= init_color(colors, "ln", "01;36", &colors->link); + ret |= init_color(colors, "or", NULL, &colors->orphan); + ret |= init_color(colors, "mi", NULL, &colors->missing); + colors->link_as_target = false; + + ret |= init_color(colors, "bd", "01;33", &colors->blockdev); + ret |= init_color(colors, "cd", "01;33", &colors->chardev); + ret |= init_color(colors, "do", "01;35", &colors->door); + ret |= init_color(colors, "pi", "33", &colors->pipe); + ret |= init_color(colors, "so", "01;35", &colors->socket); + + if (ret) { + free_colors(colors); + return NULL; + } + + parse_gnu_ls_colors(colors, getenv("LS_COLORS")); + parse_gnu_ls_colors(colors, getenv("BFS_COLORS")); + + return colors; +} + +void free_colors(struct colors *colors) { + if (colors) { + struct trie_leaf *leaf; + while ((leaf = trie_first_leaf(&colors->ext_colors))) { + dstrfree(leaf->value); + trie_remove(&colors->ext_colors, leaf); + } + trie_destroy(&colors->ext_colors); + + while ((leaf = trie_first_leaf(&colors->names))) { + char **field = leaf->value; + dstrfree(*field); + trie_remove(&colors->names, leaf); + } + trie_destroy(&colors->names); + + free(colors); + } +} + +CFILE *cfwrap(FILE *file, const struct colors *colors, bool close) { + CFILE *cfile = malloc(sizeof(*cfile)); + if (!cfile) { + return NULL; + } + + cfile->buffer = dstralloc(128); + if (!cfile->buffer) { + free(cfile); + return NULL; + } + + cfile->file = file; + cfile->close = close; + + if (isatty(fileno(file))) { + cfile->colors = colors; + } else { + cfile->colors = NULL; + } + + return cfile; +} + +int cfclose(CFILE *cfile) { + int ret = 0; + + if (cfile) { + dstrfree(cfile->buffer); + + if (cfile->close) { + ret = fclose(cfile->file); + } + + free(cfile); + } + + return ret; +} + +/** Check if a symlink is broken. */ +static bool is_link_broken(const struct BFTW *ftwbuf) { + if (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW) { + return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, F_OK) != 0; + } else { + return true; + } +} + +/** Get the color for a file. */ +static const char *file_color(const struct colors *colors, const char *filename, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + enum bfs_type type = bftw_type(ftwbuf, flags); + if (type == BFS_ERROR) { + goto error; + } + + const struct bfs_stat *statbuf = NULL; + const char *color = NULL; + + switch (type) { + case BFS_REG: + if (colors->setuid || colors->setgid || colors->executable || colors->multi_hard) { + statbuf = bftw_stat(ftwbuf, flags); + if (!statbuf) { + goto error; + } + } + + if (colors->setuid && (statbuf->mode & 04000)) { + color = colors->setuid; + } else if (colors->setgid && (statbuf->mode & 02000)) { + color = colors->setgid; + } else if (colors->capable && bfs_check_capabilities(ftwbuf) > 0) { + color = colors->capable; + } else if (colors->executable && (statbuf->mode & 00111)) { + color = colors->executable; + } else if (colors->multi_hard && statbuf->nlink > 1) { + color = colors->multi_hard; + } + + if (!color) { + color = get_ext_color(colors, filename); + } + + if (!color) { + color = colors->file; + } + + break; + + case BFS_DIR: + if (colors->sticky_other_writable || colors->other_writable || colors->sticky) { + statbuf = bftw_stat(ftwbuf, flags); + if (!statbuf) { + goto error; + } + } + + if (colors->sticky_other_writable && (statbuf->mode & 01002) == 01002) { + color = colors->sticky_other_writable; + } else if (colors->other_writable && (statbuf->mode & 00002)) { + color = colors->other_writable; + } else if (colors->sticky && (statbuf->mode & 01000)) { + color = colors->sticky; + } else { + color = colors->directory; + } + + break; + + case BFS_LNK: + if (colors->orphan && is_link_broken(ftwbuf)) { + color = colors->orphan; + } else { + color = colors->link; + } + break; + + case BFS_BLK: + color = colors->blockdev; + break; + case BFS_CHR: + color = colors->chardev; + break; + case BFS_FIFO: + color = colors->pipe; + break; + case BFS_SOCK: + color = colors->socket; + break; + case BFS_DOOR: + color = colors->door; + break; + + default: + break; + } + + if (!color) { + color = colors->normal; + } + + return color; + +error: + if (colors->missing) { + return colors->missing; + } else { + return colors->orphan; + } +} + +/** Print an ANSI escape sequence. */ +static int print_esc(CFILE *cfile, const char *esc) { + const struct colors *colors = cfile->colors; + + if (dstrdcat(&cfile->buffer, colors->leftcode) != 0) { + return -1; + } + if (dstrdcat(&cfile->buffer, esc) != 0) { + return -1; + } + if (dstrdcat(&cfile->buffer, colors->rightcode) != 0) { + return -1; + } + + return 0; +} + +/** Reset after an ANSI escape sequence. */ +static int print_reset(CFILE *cfile) { + const struct colors *colors = cfile->colors; + + if (colors->endcode) { + return dstrdcat(&cfile->buffer, colors->endcode); + } else { + return print_esc(cfile, colors->reset); + } +} + +/** Print a string with an optional color. */ +static int print_colored(CFILE *cfile, const char *esc, const char *str, size_t len) { + if (esc) { + if (print_esc(cfile, esc) != 0) { + return -1; + } + } + if (dstrncat(&cfile->buffer, str, len) != 0) { + return -1; + } + if (esc) { + if (print_reset(cfile) != 0) { + return -1; + } + } + + return 0; +} + +/** Find the offset of the first broken path component. */ +static ssize_t first_broken_offset(const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, size_t max) { + ssize_t ret = max; + assert(ret >= 0); + + if (bftw_type(ftwbuf, flags) != BFS_ERROR) { + goto out; + } + + char *at_path; + int at_fd; + if (path == ftwbuf->path) { + if (ftwbuf->depth == 0) { + at_fd = AT_FDCWD; + at_path = dstrndup(path, max); + } else { + // The parent must have existed to get here + goto out; + } + } else { + // We're in print_link_target(), so resolve relative to the link's parent directory + at_fd = ftwbuf->at_fd; + if (at_fd == AT_FDCWD && path[0] != '/') { + at_path = dstrndup(ftwbuf->path, ftwbuf->nameoff); + if (at_path && dstrncat(&at_path, path, max) != 0) { + ret = -1; + goto out_path; + } + } else { + at_path = dstrndup(path, max); + } + } + + if (!at_path) { + ret = -1; + goto out; + } + + while (ret > 0) { + if (xfaccessat(at_fd, at_path, F_OK) == 0) { + break; + } + + size_t len = dstrlen(at_path); + while (ret && at_path[len - 1] == '/') { + --len, --ret; + } + while (ret && at_path[len - 1] != '/') { + --len, --ret; + } + + dstresize(&at_path, len); + } + +out_path: + dstrfree(at_path); +out: + return ret; +} + +/** Print the directories leading up to a file. */ +static int print_dirs_colored(CFILE *cfile, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, size_t nameoff) { + const struct colors *colors = cfile->colors; + + ssize_t broken = first_broken_offset(path, ftwbuf, flags, nameoff); + if (broken < 0) { + return -1; + } + + if (broken > 0) { + if (print_colored(cfile, colors->directory, path, broken) != 0) { + return -1; + } + } + + if ((size_t)broken < nameoff) { + const char *color = colors->missing; + if (!color) { + color = colors->orphan; + } + if (print_colored(cfile, color, path + broken, nameoff - broken) != 0) { + return -1; + } + } + + return 0; +} + +/** Print a file name with colors. */ +static int print_name_colored(CFILE *cfile, const char *name, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + const char *color = file_color(cfile->colors, name, ftwbuf, flags); + return print_colored(cfile, color, name, strlen(name)); +} + +/** Print a path with colors. */ +static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + size_t nameoff; + if (path == ftwbuf->path) { + nameoff = ftwbuf->nameoff; + } else { + nameoff = xbasename(path) - path; + } + + if (print_dirs_colored(cfile, path, ftwbuf, flags, nameoff) != 0) { + return -1; + } + + return print_name_colored(cfile, path + nameoff, ftwbuf, flags); +} + +/** Print the name of a file with the appropriate colors. */ +static int print_name(CFILE *cfile, const struct BFTW *ftwbuf) { + const char *name = ftwbuf->path + ftwbuf->nameoff; + + const struct colors *colors = cfile->colors; + if (!colors) { + return dstrcat(&cfile->buffer, name); + } + + enum bfs_stat_flags flags = ftwbuf->stat_flags; + if (colors->link_as_target && ftwbuf->type == BFS_LNK) { + flags = BFS_STAT_TRYFOLLOW; + } + + return print_name_colored(cfile, name, ftwbuf, flags); +} + +/** Print the path to a file with the appropriate colors. */ +static int print_path(CFILE *cfile, const struct BFTW *ftwbuf) { + const struct colors *colors = cfile->colors; + if (!colors) { + return dstrcat(&cfile->buffer, ftwbuf->path); + } + + enum bfs_stat_flags flags = ftwbuf->stat_flags; + if (colors->link_as_target && ftwbuf->type == BFS_LNK) { + flags = BFS_STAT_TRYFOLLOW; + } + + return print_path_colored(cfile, ftwbuf->path, ftwbuf, flags); +} + +/** Print a link target with the appropriate colors. */ +static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + size_t len = statbuf ? statbuf->size : 0; + + char *target = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len); + if (!target) { + return -1; + } + + int ret; + if (cfile->colors) { + ret = print_path_colored(cfile, target, ftwbuf, BFS_STAT_FOLLOW); + } else { + ret = dstrcat(&cfile->buffer, target); + } + + free(target); + return ret; +} + +/** Format some colored output to the buffer. */ +BFS_FORMATTER(2, 3) +static int cbuff(CFILE *cfile, const char *format, ...); + +/** Dump a parsed expression tree, for debugging. */ +static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose) { + if (dstrcat(&cfile->buffer, "(") != 0) { + return -1; + } + + const struct bfs_expr *lhs = NULL; + const struct bfs_expr *rhs = NULL; + + if (bfs_expr_has_children(expr)) { + lhs = expr->lhs; + rhs = expr->rhs; + + if (cbuff(cfile, "${red}%s${rs}", expr->argv[0]) < 0) { + return -1; + } + } else { + if (cbuff(cfile, "${blu}%s${rs}", expr->argv[0]) < 0) { + return -1; + } + } + + for (size_t i = 1; i < expr->argc; ++i) { + if (cbuff(cfile, " ${bld}%s${rs}", expr->argv[i]) < 0) { + return -1; + } + } + + if (verbose) { + double rate = 0.0, time = 0.0; + if (expr->evaluations) { + rate = 100.0*expr->successes/expr->evaluations; + time = (1.0e9*expr->elapsed.tv_sec + expr->elapsed.tv_nsec)/expr->evaluations; + } + if (cbuff(cfile, " [${ylw}%zu${rs}/${ylw}%zu${rs}=${ylw}%g%%${rs}; ${ylw}%gns${rs}]", + expr->successes, expr->evaluations, rate, time)) { + return -1; + } + } + + if (lhs) { + if (dstrcat(&cfile->buffer, " ") != 0) { + return -1; + } + if (print_expr(cfile, lhs, verbose) != 0) { + return -1; + } + } + + if (rhs) { + if (dstrcat(&cfile->buffer, " ") != 0) { + return -1; + } + if (print_expr(cfile, rhs, verbose) != 0) { + return -1; + } + } + + if (dstrcat(&cfile->buffer, ")") != 0) { + return -1; + } + + return 0; +} + +static int cvbuff(CFILE *cfile, const char *format, va_list args) { + const struct colors *colors = cfile->colors; + int error = errno; + + for (const char *i = format; *i; ++i) { + size_t verbatim = strcspn(i, "%$"); + if (dstrncat(&cfile->buffer, i, verbatim) != 0) { + return -1; + } + + i += verbatim; + switch (*i) { + case '%': + switch (*++i) { + case '%': + if (dstrapp(&cfile->buffer, '%') != 0) { + return -1; + } + break; + + case 'c': + if (dstrapp(&cfile->buffer, va_arg(args, int)) != 0) { + return -1; + } + break; + + case 'd': + if (dstrcatf(&cfile->buffer, "%d", va_arg(args, int)) != 0) { + return -1; + } + break; + + case 'g': + if (dstrcatf(&cfile->buffer, "%g", va_arg(args, double)) != 0) { + return -1; + } + break; + + case 's': + if (dstrcat(&cfile->buffer, va_arg(args, const char *)) != 0) { + return -1; + } + break; + + case 'z': + ++i; + if (*i != 'u') { + goto invalid; + } + if (dstrcatf(&cfile->buffer, "%zu", va_arg(args, size_t)) != 0) { + return -1; + } + break; + + case 'm': + if (dstrcat(&cfile->buffer, strerror(error)) != 0) { + return -1; + } + break; + + case 'p': + switch (*++i) { + case 'F': + if (print_name(cfile, va_arg(args, const struct BFTW *)) != 0) { + return -1; + } + break; + + case 'P': + if (print_path(cfile, va_arg(args, const struct BFTW *)) != 0) { + return -1; + } + break; + + case 'L': + if (print_link_target(cfile, va_arg(args, const struct BFTW *)) != 0) { + return -1; + } + break; + + case 'e': + if (print_expr(cfile, va_arg(args, const struct bfs_expr *), false) != 0) { + return -1; + } + break; + case 'E': + if (print_expr(cfile, va_arg(args, const struct bfs_expr *), true) != 0) { + return -1; + } + break; + + default: + goto invalid; + } + + break; + + default: + goto invalid; + } + break; + + case '$': + switch (*++i) { + case '$': + if (dstrapp(&cfile->buffer, '$') != 0) { + return -1; + } + break; + + case '{': { + ++i; + const char *end = strchr(i, '}'); + if (!end) { + goto invalid; + } + if (!colors) { + i = end; + break; + } + + size_t len = end - i; + char name[len + 1]; + memcpy(name, i, len); + name[len] = '\0'; + + char **esc = get_color(colors, name); + if (!esc) { + goto invalid; + } + if (*esc) { + if (print_esc(cfile, *esc) != 0) { + return -1; + } + } + + i = end; + break; + } + + default: + goto invalid; + } + break; + + default: + return 0; + } + } + + return 0; + +invalid: + assert(!"Invalid format string"); + errno = EINVAL; + return -1; +} + +static int cbuff(CFILE *cfile, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = cvbuff(cfile, format, args); + va_end(args); + return ret; +} + +int cvfprintf(CFILE *cfile, const char *format, va_list args) { + assert(dstrlen(cfile->buffer) == 0); + + int ret = -1; + if (cvbuff(cfile, format, args) == 0) { + size_t len = dstrlen(cfile->buffer); + if (fwrite(cfile->buffer, 1, len, cfile->file) == len) { + ret = 0; + } + } + + dstresize(&cfile->buffer, 0); + return ret; +} + +int cfprintf(CFILE *cfile, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = cvfprintf(cfile, format, args); + va_end(args); + return ret; +} diff --git a/src/color.h b/src/color.h new file mode 100644 index 0000000..edf1ef7 --- /dev/null +++ b/src/color.h @@ -0,0 +1,120 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2021 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Utilities for colored output on ANSI terminals. + */ + +#ifndef BFS_COLOR_H +#define BFS_COLOR_H + +#include "util.h" +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> + +/** + * A color scheme. + */ +struct colors; + +/** + * Parse a color table. + * + * @return The parsed color table. + */ +struct colors *parse_colors(void); + +/** + * Free a color table. + * + * @param colors + * The color table to free. + */ +void free_colors(struct colors *colors); + +/** + * A file/stream with associated colors. + */ +typedef struct CFILE { + /** The underlying file/stream. */ + FILE *file; + /** The color table to use, if any. */ + const struct colors *colors; + /** A buffer for colored formatting. */ + char *buffer; + /** Whether to close the underlying stream. */ + bool close; +} CFILE; + +/** + * Wrap an existing file into a colored stream. + * + * @param file + * The underlying file. + * @param colors + * The color table to use if file is a TTY. + * @param close + * Whether to close the underlying stream when this stream is closed. + * @return + * A colored wrapper around file. + */ +CFILE *cfwrap(FILE *file, const struct colors *colors, bool close); + +/** + * Close a colored file. + * + * @param cfile + * The colored file to close. + * @return + * 0 on success, -1 on failure. + */ +int cfclose(CFILE *cfile); + +/** + * Colored, formatted output. + * + * @param cfile + * The colored stream to print to. + * @param format + * A printf()-style format string, supporting these format specifiers: + * + * %c: A single character + * %d: An integer + * %g: A double + * %s: A string + * %zu: A size_t + * %m: strerror(errno) + * %pF: A colored file name, from a const struct BFTW * argument + * %pP: A colored file path, from a const struct BFTW * argument + * %pL: A colored link target, from a const struct BFTW * argument + * %pe: Dump a const struct bfs_expr *, for debugging. + * %pE: Dump a const struct bfs_expr * in verbose form, for debugging. + * %%: A literal '%' + * ${cc}: Change the color to 'cc' + * $$: A literal '$' + * @return + * 0 on success, -1 on failure. + */ +BFS_FORMATTER(2, 3) +int cfprintf(CFILE *cfile, const char *format, ...); + +/** + * cfprintf() variant that takes a va_list. + */ +int cvfprintf(CFILE *cfile, const char *format, va_list args); + +#endif // BFS_COLOR_H diff --git a/src/ctx.c b/src/ctx.c new file mode 100644 index 0000000..8ba2f38 --- /dev/null +++ b/src/ctx.c @@ -0,0 +1,311 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "ctx.h" +#include "color.h" +#include "darray.h" +#include "diag.h" +#include "expr.h" +#include "mtab.h" +#include "pwcache.h" +#include "stat.h" +#include "trie.h" +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +const char *debug_flag_name(enum debug_flags flag) { + switch (flag) { + case DEBUG_COST: + return "cost"; + case DEBUG_EXEC: + return "exec"; + case DEBUG_OPT: + return "opt"; + case DEBUG_RATES: + return "rates"; + case DEBUG_SEARCH: + return "search"; + case DEBUG_STAT: + return "stat"; + case DEBUG_TREE: + return "tree"; + + case DEBUG_ALL: + break; + } + + assert(!"Unrecognized debug flag"); + return "???"; +} + +struct bfs_ctx *bfs_ctx_new(void) { + struct bfs_ctx *ctx = malloc(sizeof(*ctx)); + if (!ctx) { + return NULL; + } + + ctx->argv = NULL; + ctx->paths = NULL; + ctx->expr = NULL; + ctx->exclude = NULL; + + ctx->mindepth = 0; + ctx->maxdepth = INT_MAX; + ctx->flags = BFTW_RECOVER; + ctx->strategy = BFTW_BFS; + ctx->optlevel = 3; + ctx->debug = 0; + ctx->ignore_races = false; + ctx->posixly_correct = false; + ctx->status = false; + ctx->unique = false; + ctx->warn = false; + ctx->xargs_safe = false; + + ctx->colors = NULL; + ctx->colors_error = 0; + ctx->cout = NULL; + ctx->cerr = NULL; + + ctx->users = NULL; + ctx->users_error = 0; + ctx->groups = NULL; + ctx->groups_error = 0; + + ctx->mtab = NULL; + ctx->mtab_error = 0; + + trie_init(&ctx->files); + ctx->nfiles = 0; + + struct rlimit rl; + if (getrlimit(RLIMIT_NOFILE, &rl) == 0) { + ctx->nofile_soft = rl.rlim_cur; + ctx->nofile_hard = rl.rlim_max; + } else { + ctx->nofile_soft = 1024; + ctx->nofile_hard = RLIM_INFINITY; + } + + return ctx; +} + +const struct bfs_users *bfs_ctx_users(const struct bfs_ctx *ctx) { + struct bfs_ctx *mut = (struct bfs_ctx *)ctx; + + if (mut->users_error) { + errno = mut->users_error; + } else if (!mut->users) { + mut->users = bfs_users_parse(); + if (!mut->users) { + mut->users_error = errno; + } + } + + return mut->users; +} + +const struct bfs_groups *bfs_ctx_groups(const struct bfs_ctx *ctx) { + struct bfs_ctx *mut = (struct bfs_ctx *)ctx; + + if (mut->groups_error) { + errno = mut->groups_error; + } else if (!mut->groups) { + mut->groups = bfs_groups_parse(); + if (!mut->groups) { + mut->groups_error = errno; + } + } + + return mut->groups; +} + +const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx) { + struct bfs_ctx *mut = (struct bfs_ctx *)ctx; + + if (mut->mtab_error) { + errno = mut->mtab_error; + } else if (!mut->mtab) { + mut->mtab = bfs_mtab_parse(); + if (!mut->mtab) { + mut->mtab_error = errno; + } + } + + return mut->mtab; +} + +/** + * An open file tracked by the bfs context. + */ +struct bfs_ctx_file { + /** The file itself. */ + CFILE *cfile; + /** The path to the file (for diagnostics). */ + const char *path; +}; + +CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) { + struct bfs_stat sb; + if (bfs_stat(fileno(cfile->file), NULL, 0, &sb) != 0) { + return NULL; + } + + bfs_file_id id; + bfs_stat_id(&sb, &id); + + struct trie_leaf *leaf = trie_insert_mem(&ctx->files, id, sizeof(id)); + if (!leaf) { + return NULL; + } + + struct bfs_ctx_file *ctx_file = leaf->value; + if (ctx_file) { + ctx_file->path = path; + return ctx_file->cfile; + } + + leaf->value = ctx_file = malloc(sizeof(*ctx_file)); + if (!ctx_file) { + trie_remove(&ctx->files, leaf); + return NULL; + } + + ctx_file->cfile = cfile; + ctx_file->path = path; + + if (cfile != ctx->cout && cfile != ctx->cerr) { + ++ctx->nfiles; + } + + return cfile; +} + +void bfs_ctx_flush(const struct bfs_ctx *ctx) { + // Before executing anything, flush all open streams. This ensures that + // - the user sees everything relevant before an -ok[dir] prompt + // - output from commands is interleaved consistently with bfs + // - executed commands can rely on I/O from other bfs actions + // + // We do not check errors here, but they will be caught at cleanup time + // with ferror(). + fflush(NULL); +} + +/** Flush a file and report any errors. */ +static int bfs_ctx_fflush(CFILE *cfile) { + int ret = 0, error = 0; + if (ferror(cfile->file)) { + ret = -1; + error = EIO; + } + if (fflush(cfile->file) != 0) { + ret = -1; + error = errno; + } + + errno = error; + return ret; +} + +/** Close a file tracked by the bfs context. */ +static int bfs_ctx_fclose(struct bfs_ctx *ctx, struct bfs_ctx_file *ctx_file) { + CFILE *cfile = ctx_file->cfile; + + if (cfile == ctx->cout) { + // Will be checked later + return 0; + } else if (cfile == ctx->cerr) { + // Writes to stderr are allowed to fail silently, unless the same file was used by + // -fprint, -fls, etc. + if (ctx_file->path) { + return bfs_ctx_fflush(cfile); + } else { + return 0; + } + } + + int ret = 0, error = 0; + if (ferror(cfile->file)) { + ret = -1; + error = EIO; + } + if (cfclose(cfile) != 0) { + ret = -1; + error = errno; + } + + errno = error; + return ret; +} + +int bfs_ctx_free(struct bfs_ctx *ctx) { + int ret = 0; + + if (ctx) { + CFILE *cout = ctx->cout; + CFILE *cerr = ctx->cerr; + + bfs_expr_free(ctx->exclude); + bfs_expr_free(ctx->expr); + + bfs_mtab_free(ctx->mtab); + + bfs_groups_free(ctx->groups); + bfs_users_free(ctx->users); + + struct trie_leaf *leaf; + while ((leaf = trie_first_leaf(&ctx->files))) { + struct bfs_ctx_file *ctx_file = leaf->value; + + if (bfs_ctx_fclose(ctx, ctx_file) != 0) { + if (cerr) { + bfs_error(ctx, "'%s': %m.\n", ctx_file->path); + } + ret = -1; + } + + free(ctx_file); + trie_remove(&ctx->files, leaf); + } + trie_destroy(&ctx->files); + + if (cout && bfs_ctx_fflush(cout) != 0) { + if (cerr) { + bfs_error(ctx, "standard output: %m.\n"); + } + ret = -1; + } + + cfclose(cout); + cfclose(cerr); + + free_colors(ctx->colors); + + for (size_t i = 0; i < darray_length(ctx->paths); ++i) { + free((char *)ctx->paths[i]); + } + darray_free(ctx->paths); + + free(ctx->argv); + free(ctx); + } + + return ret; +} diff --git a/src/ctx.h b/src/ctx.h new file mode 100644 index 0000000..3ad7f85 --- /dev/null +++ b/src/ctx.h @@ -0,0 +1,212 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * bfs execution context. + */ + +#ifndef BFS_CTX_H +#define BFS_CTX_H + +#include "bftw.h" +#include "trie.h" +#include <stdbool.h> +#include <sys/resource.h> + +/** + * Various debugging flags. + */ +enum debug_flags { + /** Print cost estimates. */ + DEBUG_COST = 1 << 0, + /** Print executed command details. */ + DEBUG_EXEC = 1 << 1, + /** Print optimization details. */ + DEBUG_OPT = 1 << 2, + /** Print rate information. */ + DEBUG_RATES = 1 << 3, + /** Trace the filesystem traversal. */ + DEBUG_SEARCH = 1 << 4, + /** Trace all stat() calls. */ + DEBUG_STAT = 1 << 5, + /** Print the parse tree. */ + DEBUG_TREE = 1 << 6, + /** All debug flags. */ + DEBUG_ALL = (1 << 7) - 1, +}; + +/** + * Convert a debug flag to a string. + */ +const char *debug_flag_name(enum debug_flags flag); + +/** + * The execution context for bfs. + */ +struct bfs_ctx { + /** The number of command line arguments. */ + size_t argc; + /** The unparsed command line arguments. */ + char **argv; + + /** The root paths. */ + const char **paths; + /** The main command line expression. */ + struct bfs_expr *expr; + /** An expression for files to filter out. */ + struct bfs_expr *exclude; + + /** -mindepth option. */ + int mindepth; + /** -maxdepth option. */ + int maxdepth; + + /** bftw() flags. */ + enum bftw_flags flags; + /** bftw() search strategy. */ + enum bftw_strategy strategy; + + /** Optimization level (-O). */ + int optlevel; + /** Debugging flags (-D). */ + enum debug_flags debug; + /** Whether to ignore deletions that race with bfs (-ignore_readdir_race). */ + bool ignore_races; + /** Whether to follow POSIXisms more closely ($POSIXLY_CORRECT). */ + bool posixly_correct; + /** Whether to show a status bar (-status). */ + bool status; + /** Whether to only return unique files (-unique). */ + bool unique; + /** Whether to print warnings (-warn/-nowarn). */ + bool warn; + /** Whether to only handle paths with xargs-safe characters (-X). */ + bool xargs_safe; + + /** Color data. */ + struct colors *colors; + /** The error that occurred parsing the color table, if any. */ + int colors_error; + /** Colored stdout. */ + struct CFILE *cout; + /** Colored stderr. */ + struct CFILE *cerr; + + /** User table. */ + struct bfs_users *users; + /** The error that occurred parsing the user table, if any. */ + int users_error; + /** Group table. */ + struct bfs_groups *groups; + /** The error that occurred parsing the group table, if any. */ + int groups_error; + + /** Table of mounted file systems. */ + struct bfs_mtab *mtab; + /** The error that occurred parsing the mount table, if any. */ + int mtab_error; + + /** All the files owned by the context. */ + struct trie files; + /** The number of files owned by the context. */ + int nfiles; + + /** The initial RLIMIT_NOFILE soft limit. */ + rlim_t nofile_soft; + /** The initial RLIMIT_NOFILE hard limit. */ + rlim_t nofile_hard; +}; + +/** + * @return + * A new bfs context, or NULL on failure. + */ +struct bfs_ctx *bfs_ctx_new(void); + +/** + * Get the users table. + * + * @param ctx + * The bfs context. + * @return + * The cached users table, or NULL on failure. + */ +const struct bfs_users *bfs_ctx_users(const struct bfs_ctx *ctx); + +/** + * Get the groups table. + * + * @param ctx + * The bfs context. + * @return + * The cached groups table, or NULL on failure. + */ +const struct bfs_groups *bfs_ctx_groups(const struct bfs_ctx *ctx); + +/** + * Get the mount table. + * + * @param ctx + * The bfs context. + * @return + * The cached mount table, or NULL on failure. + */ +const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx); + +/** + * Deduplicate an opened file. + * + * @param ctx + * The bfs context. + * @param cfile + * The opened file. + * @param path + * The path to the opened file (or NULL for standard streams). + * @return + * If the same file was opened previously, that file is returned. If cfile is a new file, + * cfile itself is returned. If an error occurs, NULL is returned. + */ +struct CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, struct CFILE *cfile, const char *path); + +/** + * Flush any caches for consistency with external processes. + * + * @param ctx + * The bfs context. + */ +void bfs_ctx_flush(const struct bfs_ctx *ctx); + +/** + * Dump the parsed command line. + * + * @param ctx + * The bfs context. + * @param flag + * The -D flag that triggered the dump. + */ +void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag); + +/** + * Free a bfs context. + * + * @param ctx + * The context to free. + * @return + * 0 on success, -1 if any errors occurred. + */ +int bfs_ctx_free(struct bfs_ctx *ctx); + +#endif // BFS_CTX_H diff --git a/src/darray.c b/src/darray.c new file mode 100644 index 0000000..6585d30 --- /dev/null +++ b/src/darray.c @@ -0,0 +1,103 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "darray.h" +#include <stdlib.h> +#include <string.h> + +/** + * The darray header. + */ +struct darray { + /** The current capacity of the array, as a count of elements. */ + size_t capacity; + /** The current length of the array. */ + size_t length; + + // The array elements are stored after this header in memory. Not using + // a flexible array member to avoid worrying about strict aliasing. We + // assume that 2*sizeof(size_t) keeps any memory allocation suitably + // aligned for the element type. +}; + +/** Get the header for a darray. */ +static struct darray *darray_header(const void *da) { + return (struct darray *)da - 1; +} + +/** Get the array from a darray header. */ +static char *darray_data(struct darray *header) { + return (char *)(header + 1); +} + +size_t darray_length(const void *da) { + if (da) { + return darray_header(da)->length; + } else { + return 0; + } +} + +void *darray_push(void *da, const void *item, size_t size) { + struct darray *header; + if (da) { + header = darray_header(da); + } else { + header = malloc(sizeof(*header) + size); + if (!header) { + return NULL; + } + header->capacity = 1; + header->length = 0; + } + + size_t capacity = header->capacity; + size_t i = header->length++; + if (i >= capacity) { + capacity *= 2; + header = realloc(header, sizeof(*header) + capacity*size); + if (!header) { + // This failure will be detected by darray_check() + return da; + } + header->capacity = capacity; + } + + char *data = darray_data(header); + memcpy(data + i*size, item, size); + return data; +} + +int darray_check(void *da) { + if (!da) { + return -1; + } + + struct darray *header = darray_header(da); + if (header->length <= header->capacity) { + return 0; + } else { + // realloc() failed in darray_push(), so reset the length and report the failure + header->length = header->capacity; + return -1; + } +} + +void darray_free(void *da) { + if (da) { + free(darray_header(da)); + } +} diff --git a/src/darray.h b/src/darray.h new file mode 100644 index 0000000..4464381 --- /dev/null +++ b/src/darray.h @@ -0,0 +1,110 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A dynamic array library. + * + * darrays are represented by a simple pointer to the array element type, like + * any other array. Behind the scenes, the capacity and current length of the + * array are stored along with it. NULL is a valid way to initialize an empty + * darray: + * + * int *darray = NULL; + * + * To append an element to a darray, use the DARRAY_PUSH macro: + * + * int e = 42; + * if (DARRAY_PUSH(&darray, &e) != 0) { + * // Report the error... + * } + * + * The length can be retrieved by darray_length(). Iterating over the array + * works like normal arrays: + * + * for (size_t i = 0; i < darray_length(darray); ++i) { + * printf("%d\n", darray[i]); + * } + * + * To free a darray, use darray_free(): + * + * darray_free(darray); + */ + +#ifndef BFS_DARRAY_H +#define BFS_DARRAY_H + +#include <stddef.h> + +/** + * Get the length of a darray. + * + * @param da + * The array in question. + * @return + * The length of the array. + */ +size_t darray_length(const void *da); + +/** + * @internal Use DARRAY_PUSH(). + * + * Push an element into a darray. + * + * @param da + * The array to append to. + * @param item + * The item to append. + * @param size + * The size of the item. + * @return + * The (new) location of the array. + */ +void *darray_push(void *da, const void *item, size_t size); + +/** + * @internal Use DARRAY_PUSH(). + * + * Check if the last darray_push() call failed. + * + * @param da + * The darray to check. + * @return + * 0 on success, -1 on failure. + */ +int darray_check(void *da); + +/** + * Free a darray. + * + * @param da + * The darray to free. + */ +void darray_free(void *da); + +/** + * Push an item into a darray. + * + * @param da + * The array to append to. + * @param item + * A pointer to the item to append. + * @return + * 0 on success, -1 on failure. + */ +#define DARRAY_PUSH(da, item) \ + (darray_check(*(da) = darray_push(*(da), (item), sizeof(**(da) = *(item))))) + +#endif // BFS_DARRAY_H diff --git a/src/diag.c b/src/diag.c new file mode 100644 index 0000000..27848f1 --- /dev/null +++ b/src/diag.c @@ -0,0 +1,233 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "diag.h" +#include "ctx.h" +#include "color.h" +#include "expr.h" +#include "util.h" +#include <assert.h> +#include <errno.h> +#include <stdarg.h> + +void bfs_perror(const struct bfs_ctx *ctx, const char *str) { + bfs_error(ctx, "%s: %m.\n", str); +} + +void bfs_error(const struct bfs_ctx *ctx, const char *format, ...) { + va_list args; + va_start(args, format); + bfs_verror(ctx, format, args); + va_end(args); +} + +bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...) { + va_list args; + va_start(args, format); + bool ret = bfs_vwarning(ctx, format, args); + va_end(args); + return ret; +} + +bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...) { + va_list args; + va_start(args, format); + bool ret = bfs_vdebug(ctx, flag, format, args); + va_end(args); + return ret; +} + +void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args) { + int error = errno; + + bfs_error_prefix(ctx); + + errno = error; + cvfprintf(ctx->cerr, format, args); +} + +bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) { + int error = errno; + + if (bfs_warning_prefix(ctx)) { + errno = error; + cvfprintf(ctx->cerr, format, args); + return true; + } else { + return false; + } +} + +bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args) { + int error = errno; + + if (bfs_debug_prefix(ctx, flag)) { + errno = error; + cvfprintf(ctx->cerr, format, args); + return true; + } else { + return false; + } +} + +void bfs_error_prefix(const struct bfs_ctx *ctx) { + cfprintf(ctx->cerr, "${bld}%s:${rs} ${err}error:${rs} ", xbasename(ctx->argv[0])); +} + +bool bfs_warning_prefix(const struct bfs_ctx *ctx) { + if (ctx->warn) { + cfprintf(ctx->cerr, "${bld}%s:${rs} ${wrn}warning:${rs} ", xbasename(ctx->argv[0])); + return true; + } else { + return false; + } +} + +bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag) { + if (ctx->debug & flag) { + cfprintf(ctx->cerr, "${bld}%s:${rs} ${cyn}-D %s${rs}: ", xbasename(ctx->argv[0]), debug_flag_name(flag)); + return true; + } else { + return false; + } +} + +/** Recursive part of highlight_expr(). */ +static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool *args) { + if (!expr) { + return false; + } + + bool ret = false; + + if (!expr->synthetic) { + size_t i = expr->argv - ctx->argv; + for (size_t j = 0; j < expr->argc; ++j) { + assert(i + j < ctx->argc); + args[i + j] = true; + ret = true; + } + } + + if (bfs_expr_has_children(expr)) { + ret |= highlight_expr_recursive(ctx, expr->lhs, args); + ret |= highlight_expr_recursive(ctx, expr->rhs, args); + } + + return ret; +} + +/** Highlight an expression in the command line. */ +static bool highlight_expr(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool *args) { + for (size_t i = 0; i < ctx->argc; ++i) { + args[i] = false; + } + + return highlight_expr_recursive(ctx, expr, args); +} + +/** Print a highlighted portion of the command line. */ +static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool *args, bool warning) { + if (warning) { + bfs_warning_prefix(ctx); + } else { + bfs_error_prefix(ctx); + } + + size_t max_argc = 0; + for (size_t i = 0; i < ctx->argc; ++i) { + if (i > 0) { + cfprintf(ctx->cerr, " "); + } + + if (args[i]) { + max_argc = i + 1; + cfprintf(ctx->cerr, "${bld}%s${rs}", ctx->argv[i]); + } else { + cfprintf(ctx->cerr, "%s", ctx->argv[i]); + } + } + + cfprintf(ctx->cerr, "\n"); + + if (warning) { + bfs_warning_prefix(ctx); + } else { + bfs_error_prefix(ctx); + } + + for (size_t i = 0; i < max_argc; ++i) { + if (i > 0) { + if (args[i - 1] && args[i]) { + cfprintf(ctx->cerr, "~"); + } else { + cfprintf(ctx->cerr, " "); + } + } + + if (args[i] && (i == 0 || !args[i - 1])) { + if (warning) { + cfprintf(ctx->cerr, "${wrn}"); + } else { + cfprintf(ctx->cerr, "${err}"); + } + } + + size_t len = xstrwidth(ctx->argv[i]); + for (size_t j = 0; j < len; ++j) { + if (args[i]) { + cfprintf(ctx->cerr, "~"); + } else { + cfprintf(ctx->cerr, " "); + } + } + + if (args[i] && (i + 1 >= max_argc || !args[i + 1])) { + cfprintf(ctx->cerr, "${rs}"); + } + } + + cfprintf(ctx->cerr, "\n"); +} + +void bfs_argv_error(const struct bfs_ctx *ctx, const bool *args) { + bfs_argv_diag(ctx, args, false); +} + +void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr) { + bool args[ctx->argc]; + if (highlight_expr(ctx, expr, args)) { + bfs_argv_error(ctx, args); + } +} + +bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool *args) { + if (!ctx->warn) { + return false; + } + + bfs_argv_diag(ctx, args, true); + return true; +} + +bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr) { + bool args[ctx->argc]; + if (highlight_expr(ctx, expr, args)) { + return bfs_argv_warning(ctx, args); + } + + return false; +} diff --git a/src/diag.h b/src/diag.h new file mode 100644 index 0000000..39129cc --- /dev/null +++ b/src/diag.h @@ -0,0 +1,108 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Formatters for diagnostic messages. + */ + +#ifndef BFS_DIAG_H +#define BFS_DIAG_H + +#include "ctx.h" +#include "util.h" +#include <stdarg.h> +#include <stdbool.h> + +struct bfs_expr; + +/** + * Like perror(), but decorated like bfs_error(). + */ +void bfs_perror(const struct bfs_ctx *ctx, const char *str); + +/** + * Shorthand for printing error messages. + */ +BFS_FORMATTER(2, 3) +void bfs_error(const struct bfs_ctx *ctx, const char *format, ...); + +/** + * Shorthand for printing warning messages. + * + * @return Whether a warning was printed. + */ +BFS_FORMATTER(2, 3) +bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...); + +/** + * Shorthand for printing debug messages. + * + * @return Whether a debug message was printed. + */ +BFS_FORMATTER(3, 4) +bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...); + +/** + * bfs_error() variant that takes a va_list. + */ +void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args); + +/** + * bfs_warning() variant that takes a va_list. + */ +bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args); + +/** + * bfs_debug() variant that takes a va_list. + */ +bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args); + +/** + * Print the error message prefix. + */ +void bfs_error_prefix(const struct bfs_ctx *ctx); + +/** + * Print the warning message prefix. + */ +bool bfs_warning_prefix(const struct bfs_ctx *ctx); + +/** + * Print the debug message prefix. + */ +bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag); + +/** + * Highlight parts of the command line in an error message. + */ +void bfs_argv_error(const struct bfs_ctx *ctx, const bool *args); + +/** + * Highlight parts of an expression in an error message. + */ +void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr); + +/** + * Highlight parts of the command line in a warning message. + */ +bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool *args); + +/** + * Highlight parts of an expression in a warning message. + */ +bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr); + +#endif // BFS_DIAG_H diff --git a/src/dir.c b/src/dir.c new file mode 100644 index 0000000..024e767 --- /dev/null +++ b/src/dir.c @@ -0,0 +1,303 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2021-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "dir.h" +#include "util.h" +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#if __linux__ +# include <sys/syscall.h> +#endif // __linux__ + +enum bfs_type bfs_mode_to_type(mode_t mode) { + switch (mode & S_IFMT) { +#ifdef S_IFBLK + case S_IFBLK: + return BFS_BLK; +#endif +#ifdef S_IFCHR + case S_IFCHR: + return BFS_CHR; +#endif +#ifdef S_IFDIR + case S_IFDIR: + return BFS_DIR; +#endif +#ifdef S_IFDOOR + case S_IFDOOR: + return BFS_DOOR; +#endif +#ifdef S_IFIFO + case S_IFIFO: + return BFS_FIFO; +#endif +#ifdef S_IFLNK + case S_IFLNK: + return BFS_LNK; +#endif +#ifdef S_IFPORT + case S_IFPORT: + return BFS_PORT; +#endif +#ifdef S_IFREG + case S_IFREG: + return BFS_REG; +#endif +#ifdef S_IFSOCK + case S_IFSOCK: + return BFS_SOCK; +#endif +#ifdef S_IFWHT + case S_IFWHT: + return BFS_WHT; +#endif + + default: + return BFS_UNKNOWN; + } +} + +#if __linux__ +/** + * This is not defined in the kernel headers for some reason, callers have to + * define it themselves. + */ +struct linux_dirent64 { + ino64_t d_ino; + off64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; + +// Make the whole allocation 64k +#define BUF_SIZE ((64 << 10) - 8) +#endif + +struct bfs_dir { +#if __linux__ + int fd; + unsigned short pos; + unsigned short size; +#else + DIR *dir; + struct dirent *de; +#endif +}; + +struct bfs_dir *bfs_opendir(int at_fd, const char *at_path) { +#if __linux__ + struct bfs_dir *dir = malloc(sizeof(*dir) + BUF_SIZE); +#else + struct bfs_dir *dir = malloc(sizeof(*dir)); +#endif + if (!dir) { + return NULL; + } + + int fd; + if (at_path) { + fd = openat(at_fd, at_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY); + } else if (at_fd >= 0) { + fd = at_fd; + } else { + free(dir); + errno = EBADF; + return NULL; + } + + if (fd < 0) { + free(dir); + return NULL; + } + +#if __linux__ + dir->fd = fd; + dir->pos = 0; + dir->size = 0; +#else + dir->dir = fdopendir(fd); + if (!dir->dir) { + if (at_path) { + close_quietly(fd); + } + free(dir); + return NULL; + } + + dir->de = NULL; +#endif // __linux__ + + return dir; +} + +int bfs_dirfd(const struct bfs_dir *dir) { +#if __linux__ + return dir->fd; +#else + return dirfd(dir->dir); +#endif +} + +/** Convert a dirent type to a bfs_type. */ +static enum bfs_type translate_type(int d_type) { + switch (d_type) { +#ifdef DT_BLK + case DT_BLK: + return BFS_BLK; +#endif +#ifdef DT_CHR + case DT_CHR: + return BFS_CHR; +#endif +#ifdef DT_DIR + case DT_DIR: + return BFS_DIR; +#endif +#ifdef DT_DOOR + case DT_DOOR: + return BFS_DOOR; +#endif +#ifdef DT_FIFO + case DT_FIFO: + return BFS_FIFO; +#endif +#ifdef DT_LNK + case DT_LNK: + return BFS_LNK; +#endif +#ifdef DT_PORT + case DT_PORT: + return BFS_PORT; +#endif +#ifdef DT_REG + case DT_REG: + return BFS_REG; +#endif +#ifdef DT_SOCK + case DT_SOCK: + return BFS_SOCK; +#endif +#ifdef DT_WHT + case DT_WHT: + return BFS_WHT; +#endif + } + + return BFS_UNKNOWN; +} + +#if !__linux__ +/** Get the type from a struct dirent if it exists, and convert it. */ +static enum bfs_type dirent_type(const struct dirent *de) { +#if defined(_DIRENT_HAVE_D_TYPE) || defined(DT_UNKNOWN) + return translate_type(de->d_type); +#else + return BFS_UNKNOWN; +#endif +} +#endif + +/** Check if a name is . or .. */ +static bool is_dot(const char *name) { + return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +} + +int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de) { + while (true) { +#if __linux__ + char *buf = (char *)(dir + 1); + + if (dir->pos >= dir->size) { +#if BFS_HAS_FEATURE(memory_sanitizer, false) + // Make sure msan knows the buffer is initialized + memset(buf, 0, BUF_SIZE); +#endif + + ssize_t size = syscall(__NR_getdents64, dir->fd, buf, BUF_SIZE); + if (size <= 0) { + return size; + } + dir->pos = 0; + dir->size = size; + } + + const struct linux_dirent64 *lde = (void *)(buf + dir->pos); + dir->pos += lde->d_reclen; + + if (is_dot(lde->d_name)) { + continue; + } + + if (de) { + de->type = translate_type(lde->d_type); + de->name = lde->d_name; + } + + return 1; +#else // !__linux__ + errno = 0; + dir->de = readdir(dir->dir); + if (dir->de) { + if (is_dot(dir->de->d_name)) { + continue; + } + if (de) { + de->type = dirent_type(dir->de); + de->name = dir->de->d_name; + } + return 1; + } else if (errno != 0) { + return -1; + } else { + return 0; + } +#endif // !__linux__ + } +} + +int bfs_closedir(struct bfs_dir *dir) { +#if __linux__ + int ret = xclose(dir->fd); +#else + int ret = closedir(dir->dir); +#endif + free(dir); + return ret; +} + +int bfs_freedir(struct bfs_dir *dir) { +#if __linux__ + int ret = dir->fd; + free(dir); + return ret; +#elif __FreeBSD__ + int ret = fdclosedir(dir->dir); + free(dir); + return ret; +#else + int ret = dup_cloexec(dirfd(dir->dir)); + bfs_closedir(dir); + return ret; +#endif +} diff --git a/src/dir.h b/src/dir.h new file mode 100644 index 0000000..69344c6 --- /dev/null +++ b/src/dir.h @@ -0,0 +1,124 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2021 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Directories and their contents. + */ + +#ifndef BFS_DIR_H +#define BFS_DIR_H + +#include <sys/types.h> + +/** + * A directory. + */ +struct bfs_dir; + +/** + * File types. + */ +enum bfs_type { + /** An error occurred for this file. */ + BFS_ERROR = -1, + /** Unknown type. */ + BFS_UNKNOWN, + /** Block device. */ + BFS_BLK, + /** Character device. */ + BFS_CHR, + /** Directory. */ + BFS_DIR, + /** Solaris door. */ + BFS_DOOR, + /** Pipe. */ + BFS_FIFO, + /** Symbolic link. */ + BFS_LNK, + /** Solaris event port. */ + BFS_PORT, + /** Regular file. */ + BFS_REG, + /** Socket. */ + BFS_SOCK, + /** BSD whiteout. */ + BFS_WHT, +}; + +/** + * Convert a bfs_stat() mode to a bfs_type. + */ +enum bfs_type bfs_mode_to_type(mode_t mode); + +/** + * A directory entry. + */ +struct bfs_dirent { + /** The type of this file (possibly unknown). */ + enum bfs_type type; + /** The name of this file. */ + const char *name; +}; + +/** + * Open a directory. + * + * @param at_fd + * The base directory for path resolution. + * @param at_path + * The path of the directory to open, relative to at_fd. Pass NULL to + * open at_fd itself. + * @return + * The opened directory, or NULL on failure. + */ +struct bfs_dir *bfs_opendir(int at_fd, const char *at_path); + +/** + * Get the file descriptor for a directory. + */ +int bfs_dirfd(const struct bfs_dir *dir); + +/** + * Read a directory entry. + * + * @param dir + * The directory to read. + * @param[out] dirent + * The directory entry to populate. + * @return + * 1 on success, 0 on EOF, or -1 on failure. + */ +int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de); + +/** + * Close a directory. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_closedir(struct bfs_dir *dir); + +/** + * Free a directory, keeping an open file descriptor to it. + * + * @param dir + * The directory to free. + * @return + * The file descriptor on success, or -1 on failure. + */ +int bfs_freedir(struct bfs_dir *dir); + +#endif // BFS_DIR_H diff --git a/src/dstring.c b/src/dstring.c new file mode 100644 index 0000000..f344d09 --- /dev/null +++ b/src/dstring.c @@ -0,0 +1,220 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2016-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "dstring.h" +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/** + * The memory representation of a dynamic string. Users get a pointer to data. + */ +struct dstring { + size_t capacity; + size_t length; + char data[]; +}; + +/** Get the string header from the string data pointer. */ +static struct dstring *dstrheader(const char *dstr) { + return (struct dstring *)(dstr - offsetof(struct dstring, data)); +} + +/** Get the correct size for a dstring with the given capacity. */ +static size_t dstrsize(size_t capacity) { + return BFS_FLEX_SIZEOF(struct dstring, data, capacity + 1); +} + +/** Allocate a dstring with the given contents. */ +static char *dstralloc_impl(size_t capacity, size_t length, const char *data) { + // Avoid reallocations for small strings + if (capacity < 7) { + capacity = 7; + } + + struct dstring *header = malloc(dstrsize(capacity)); + if (!header) { + return NULL; + } + + header->capacity = capacity; + header->length = length; + + memcpy(header->data, data, length); + header->data[length] = '\0'; + return header->data; +} + +char *dstralloc(size_t capacity) { + return dstralloc_impl(capacity, 0, ""); +} + +char *dstrdup(const char *str) { + size_t len = strlen(str); + return dstralloc_impl(len, len, str); +} + +char *dstrndup(const char *str, size_t n) { + size_t len = strnlen(str, n); + return dstralloc_impl(len, len, str); +} + +size_t dstrlen(const char *dstr) { + return dstrheader(dstr)->length; +} + +int dstreserve(char **dstr, size_t capacity) { + struct dstring *header = dstrheader(*dstr); + + if (capacity > header->capacity) { + capacity *= 2; + + header = realloc(header, dstrsize(capacity)); + if (!header) { + return -1; + } + header->capacity = capacity; + + *dstr = header->data; + } + + return 0; +} + +int dstresize(char **dstr, size_t length) { + if (dstreserve(dstr, length) != 0) { + return -1; + } + + struct dstring *header = dstrheader(*dstr); + header->length = length; + header->data[length] = '\0'; + + return 0; +} + +/** Common implementation of dstr{cat,ncat,app}. */ +static int dstrcat_impl(char **dest, const char *src, size_t srclen) { + size_t oldlen = dstrlen(*dest); + size_t newlen = oldlen + srclen; + + if (dstresize(dest, newlen) != 0) { + return -1; + } + + memcpy(*dest + oldlen, src, srclen); + return 0; +} + +int dstrcat(char **dest, const char *src) { + return dstrcat_impl(dest, src, strlen(src)); +} + +int dstrncat(char **dest, const char *src, size_t n) { + return dstrcat_impl(dest, src, strnlen(src, n)); +} + +int dstrdcat(char **dest, const char *src) { + return dstrcat_impl(dest, src, dstrlen(src)); +} + +int dstrapp(char **str, char c) { + return dstrcat_impl(str, &c, 1); +} + +char *dstrprintf(const char *format, ...) { + va_list args; + + va_start(args, format); + char *str = dstrvprintf(format, args); + va_end(args); + + return str; +} + +char *dstrvprintf(const char *format, va_list args) { + // Guess a capacity to try to avoid reallocating + char *str = dstralloc(2*strlen(format)); + if (!str) { + return NULL; + } + + if (dstrvcatf(&str, format, args) != 0) { + dstrfree(str); + return NULL; + } + + return str; +} + +int dstrcatf(char **str, const char *format, ...) { + va_list args; + + va_start(args, format); + int ret = dstrvcatf(str, format, args); + va_end(args); + + return ret; +} + +int dstrvcatf(char **str, const char *format, va_list args) { + // Guess a capacity to try to avoid calling vsnprintf() twice + size_t len = dstrlen(*str); + dstreserve(str, len + 2*strlen(format)); + size_t cap = dstrheader(*str)->capacity; + + va_list copy; + va_copy(copy, args); + + char *tail = *str + len; + int ret = vsnprintf(tail, cap - len + 1, format, args); + if (ret < 0) { + goto fail; + } + + size_t tail_len = ret; + if (tail_len > cap - len) { + cap = len + tail_len; + if (dstreserve(str, cap) != 0) { + goto fail; + } + + tail = *str + len; + ret = vsnprintf(tail, tail_len + 1, format, copy); + if (ret < 0 || (size_t)ret != tail_len) { + assert(!"Length of formatted string changed"); + goto fail; + } + } + + va_end(copy); + + struct dstring *header = dstrheader(*str); + header->length += tail_len; + return 0; + +fail: + *tail = '\0'; + return -1; +} + +void dstrfree(char *dstr) { + if (dstr) { + free(dstrheader(dstr)); + } +} diff --git a/src/dstring.h b/src/dstring.h new file mode 100644 index 0000000..54106f3 --- /dev/null +++ b/src/dstring.h @@ -0,0 +1,194 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2016-2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A dynamic string library. + */ + +#ifndef BFS_DSTRING_H +#define BFS_DSTRING_H + +#include "util.h" +#include <stdarg.h> +#include <stddef.h> + +/** + * Allocate a dynamic string. + * + * @param capacity + * The initial capacity of the string. + */ +char *dstralloc(size_t capacity); + +/** + * Create a dynamic copy of a string. + * + * @param str + * The NUL-terminated string to copy. + */ +char *dstrdup(const char *str); + +/** + * Create a length-limited dynamic copy of a string. + * + * @param str + * The string to copy. + * @param n + * The maximum number of characters to copy from str. + */ +char *dstrndup(const char *str, size_t n); + +/** + * Get a dynamic string's length. + * + * @param dstr + * The string to measure. + * @return The length of dstr. + */ +size_t dstrlen(const char *dstr); + +/** + * Reserve some capacity in a dynamic string. + * + * @param dstr + * The dynamic string to preallocate. + * @param capacity + * The new capacity for the string. + * @return 0 on success, -1 on failure. + */ +int dstreserve(char **dstr, size_t capacity); + +/** + * Resize a dynamic string. + * + * @param dstr + * The dynamic string to resize. + * @param length + * The new length for the dynamic string. + * @return 0 on success, -1 on failure. + */ +int dstresize(char **dstr, size_t length); + +/** + * Append to a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The string to append. + * @return 0 on success, -1 on failure. + */ +int dstrcat(char **dest, const char *src); + +/** + * Append to a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The string to append. + * @param n + * The maximum number of characters to take from src. + * @return 0 on success, -1 on failure. + */ +int dstrncat(char **dest, const char *src, size_t n); + +/** + * Append a dynamic string to another dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The dynamic string to append. + * @return + * 0 on success, -1 on failure. + */ +int dstrdcat(char **dest, const char *src); + +/** + * Append a single character to a dynamic string. + * + * @param str + * The string to append to. + * @param c + * The character to append. + * @return 0 on success, -1 on failure. + */ +int dstrapp(char **str, char c); + +/** + * Create a dynamic string from a format string. + * + * @param format + * The format string to fill in. + * @param ... + * Any arguments for the format string. + * @return + * The created string, or NULL on failure. + */ +BFS_FORMATTER(1, 2) +char *dstrprintf(const char *format, ...); + +/** + * Create a dynamic string from a format string and a va_list. + * + * @param format + * The format string to fill in. + * @param args + * The arguments for the format string. + * @return + * The created string, or NULL on failure. + */ +char *dstrvprintf(const char *format, va_list args); + +/** + * Format some text onto the end of a dynamic string. + * + * @param str + * The destination dynamic string. + * @param format + * The format string to fill in. + * @param ... + * Any arguments for the format string. + * @return + * 0 on success, -1 on failure. + */ +BFS_FORMATTER(2, 3) +int dstrcatf(char **str, const char *format, ...); + +/** + * Format some text from a va_list onto the end of a dynamic string. + * + * @param str + * The destination dynamic string. + * @param format + * The format string to fill in. + * @param args + * The arguments for the format string. + * @return + * 0 on success, -1 on failure. + */ +int dstrvcatf(char **str, const char *format, va_list args); + +/** + * Free a dynamic string. + * + * @param dstr + * The string to free. + */ +void dstrfree(char *dstr); + +#endif // BFS_DSTRING_H diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..1d0a6f2 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,1644 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Implementation of all the literal expressions. + */ + +#include "eval.h" +#include "bar.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "darray.h" +#include "diag.h" +#include "dir.h" +#include "dstring.h" +#include "exec.h" +#include "expr.h" +#include "fsade.h" +#include "mtab.h" +#include "printf.h" +#include "pwcache.h" +#include "stat.h" +#include "trie.h" +#include "util.h" +#include "xregex.h" +#include "xtime.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <grp.h> +#include <pwd.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> +#include <wchar.h> + +struct bfs_eval { + /** Data about the current file. */ + const struct BFTW *ftwbuf; + /** The bfs context. */ + const struct bfs_ctx *ctx; + /** The bftw() callback return value. */ + enum bftw_action action; + /** The bfs_eval() return value. */ + int *ret; + /** Whether to quit immediately. */ + bool quit; +}; + +/** + * Print an error message. + */ +BFS_FORMATTER(2, 3) +static void eval_error(struct bfs_eval *state, const char *format, ...) { + // By POSIX, any errors should be accompanied by a non-zero exit status + *state->ret = EXIT_FAILURE; + + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + CFILE *cerr = ctx->cerr; + + bfs_error(ctx, "%pP: ", state->ftwbuf); + + va_list args; + va_start(args, format); + errno = error; + cvfprintf(cerr, format, args); + va_end(args); +} + +/** + * Check if an error should be ignored. + */ +static bool eval_should_ignore(const struct bfs_eval *state, int error) { + return state->ctx->ignore_races + && is_nonexistence_error(error) + && state->ftwbuf->depth > 0; +} + +/** + * Report an error that occurs during evaluation. + */ +static void eval_report_error(struct bfs_eval *state) { + if (!eval_should_ignore(state, errno)) { + eval_error(state, "%m.\n"); + } +} + +/** + * Perform a bfs_stat() call if necessary. + */ +static const struct bfs_stat *eval_stat(struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + const struct bfs_stat *ret = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!ret) { + eval_report_error(state); + } + return ret; +} + +/** + * Get the difference (in seconds) between two struct timespecs. + */ +static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) { + time_t ret = lhs->tv_sec - rhs->tv_sec; + if (lhs->tv_nsec < rhs->tv_nsec) { + --ret; + } + return ret; +} + +bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) { + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + return n == expr->num; + case BFS_INT_LESS: + return n < expr->num; + case BFS_INT_GREATER: + return n > expr->num; + } + + assert(!"Invalid comparison mode"); + return false; +} + +/** + * -true test. + */ +bool eval_true(const struct bfs_expr *expr, struct bfs_eval *state) { + return true; +} + +/** + * -false test. + */ +bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state) { + return false; +} + +/** + * -executable, -readable, -writable tests. + */ +bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, expr->num) == 0; +} + +/** + * -acl test. + */ +bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_acl(state->ftwbuf); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -capable test. + */ +bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_capabilities(state->ftwbuf); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * Get the given timespec field out of a stat buffer. + */ +static const struct timespec *eval_stat_time(const struct bfs_stat *statbuf, enum bfs_stat_field field, struct bfs_eval *state) { + const struct timespec *ret = bfs_stat_time(statbuf, field); + if (!ret) { + eval_error(state, "Couldn't get file %s: %m.\n", bfs_stat_field_name(field)); + } + return ret; +} + +/** + * -[aBcm]?newer tests. + */ +bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct timespec *time = eval_stat_time(statbuf, expr->stat_field, state); + if (!time) { + return false; + } + + return time->tv_sec > expr->reftime.tv_sec + || (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec); +} + +/** + * -[aBcm]{min,time} tests. + */ +bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct timespec *time = eval_stat_time(statbuf, expr->stat_field, state); + if (!time) { + return false; + } + + time_t diff = timespec_diff(&expr->reftime, time); + switch (expr->time_unit) { + case BFS_DAYS: + diff /= 60*24; + BFS_FALLTHROUGH; + case BFS_MINUTES: + diff /= 60; + BFS_FALLTHROUGH; + case BFS_SECONDS: + break; + } + + return bfs_expr_cmp(expr, diff); +} + +/** + * -used test. + */ +bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct timespec *atime = eval_stat_time(statbuf, BFS_STAT_ATIME, state); + const struct timespec *ctime = eval_stat_time(statbuf, BFS_STAT_CTIME, state); + if (!atime || !ctime) { + return false; + } + + long long diff = timespec_diff(atime, ctime); + if (diff < 0) { + return false; + } + + long long day_seconds = 60*60*24; + diff = (diff + day_seconds - 1) / day_seconds; + return bfs_expr_cmp(expr, diff); +} + +/** + * -gid test. + */ +bool eval_gid(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->gid); +} + +/** + * -uid test. + */ +bool eval_uid(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->uid); +} + +/** + * -nogroup test. + */ +bool eval_nogroup(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct bfs_groups *groups = bfs_ctx_groups(state->ctx); + if (!groups) { + eval_report_error(state); + return false; + } + + return bfs_getgrgid(groups, statbuf->gid) == NULL; +} + +/** + * -nouser test. + */ +bool eval_nouser(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct bfs_users *users = bfs_ctx_users(state->ctx); + if (!users) { + eval_report_error(state); + return false; + } + + return bfs_getpwuid(users, statbuf->uid) == NULL; +} + +/** + * -delete action. + */ +bool eval_delete(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + + // Don't try to delete the current directory + if (strcmp(ftwbuf->path, ".") == 0) { + return true; + } + + int flag = 0; + + // We need to know the actual type of the path, not what it points to + enum bfs_type type = bftw_type(ftwbuf, BFS_STAT_NOFOLLOW); + if (type == BFS_DIR) { + flag |= AT_REMOVEDIR; + } else if (type == BFS_ERROR) { + eval_report_error(state); + return false; + } + + if (unlinkat(ftwbuf->at_fd, ftwbuf->at_path, flag) != 0) { + eval_report_error(state); + return false; + } + + return true; +} + +/** Finish any pending -exec ... + operations. */ +static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *ctx) { + int ret = 0; + + if (expr->eval_fn == eval_exec) { + if (bfs_exec_finish(expr->exec) != 0) { + if (errno != 0) { + bfs_error(ctx, "%s %s: %m.\n", expr->argv[0], expr->argv[1]); + } + ret = -1; + } + } else if (bfs_expr_has_children(expr)) { + if (expr->lhs && eval_exec_finish(expr->lhs, ctx) != 0) { + ret = -1; + } + if (expr->rhs && eval_exec_finish(expr->rhs, ctx) != 0) { + ret = -1; + } + } + + return ret; +} + +/** + * -exec[dir]/-ok[dir] actions. + */ +bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = bfs_exec(expr->exec, state->ftwbuf) == 0; + if (errno != 0) { + eval_error(state, "%s %s: %m.\n", expr->argv[0], expr->argv[1]); + } + return ret; +} + +/** + * -exit action. + */ +bool eval_exit(const struct bfs_expr *expr, struct bfs_eval *state) { + state->action = BFTW_STOP; + *state->ret = expr->num; + state->quit = true; + return true; +} + +/** + * -depth N test. + */ +bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state) { + return bfs_expr_cmp(expr, state->ftwbuf->depth); +} + +/** + * -empty test. + */ +bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = false; + const struct BFTW *ftwbuf = state->ftwbuf; + + if (ftwbuf->type == BFS_DIR) { + struct bfs_dir *dir = bfs_opendir(ftwbuf->at_fd, ftwbuf->at_path); + if (!dir) { + eval_report_error(state); + goto done; + } + + int did_read = bfs_readdir(dir, NULL); + if (did_read < 0) { + eval_report_error(state); + } else { + ret = !did_read; + } + + bfs_closedir(dir); + } else if (ftwbuf->type == BFS_REG) { + const struct bfs_stat *statbuf = eval_stat(state); + if (statbuf) { + ret = statbuf->size == 0; + } + } + +done: + return ret; +} + +/** + * -flags test. + */ +bool eval_flags(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + if (!(statbuf->mask & BFS_STAT_ATTRS)) { + eval_error(state, "Couldn't get file %s.\n", bfs_stat_field_name(BFS_STAT_ATTRS)); + return false; + } + + unsigned long flags = statbuf->attrs; + unsigned long set = expr->set_flags; + unsigned long clear = expr->clear_flags; + + switch (expr->flags_cmp) { + case BFS_MODE_EQUAL: + return flags == set && !(flags & clear); + + case BFS_MODE_ALL: + return (flags & set) == set && !(flags & clear); + + case BFS_MODE_ANY: + return (flags & set) || (flags & clear) != clear; + } + + assert(!"Invalid comparison mode"); + return false; +} + +/** + * -fstype test. + */ +bool eval_fstype(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct bfs_mtab *mtab = bfs_ctx_mtab(state->ctx); + if (!mtab) { + eval_report_error(state); + return false; + } + + const char *type = bfs_fstype(mtab, statbuf); + return strcmp(type, expr->argv[1]) == 0; +} + +/** + * -hidden test. + */ +bool eval_hidden(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + const char *name = ftwbuf->path + ftwbuf->nameoff; + + // Don't treat "." or ".." as hidden directories. Otherwise we'd filter + // out everything when given + // + // $ bfs . -nohidden + // $ bfs .. -nohidden + return name[0] == '.' && strcmp(name, ".") != 0 && strcmp(name, "..") != 0; +} + +/** + * -inum test. + */ +bool eval_inum(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->ino); +} + +/** + * -links test. + */ +bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->nlink); +} + +/** + * -i?lname test. + */ +bool eval_lname(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = false; + char *name = NULL; + + const struct BFTW *ftwbuf = state->ftwbuf; + if (ftwbuf->type != BFS_LNK) { + goto done; + } + + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + size_t len = statbuf ? statbuf->size : 0; + + name = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len); + if (!name) { + eval_report_error(state); + goto done; + } + + ret = fnmatch(expr->argv[1], name, expr->num) == 0; + +done: + free(name); + return ret; +} + +/** + * -i?name test. + */ +bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + + const char *name = ftwbuf->path + ftwbuf->nameoff; + char *copy = NULL; + if (ftwbuf->depth == 0) { + // Any trailing slashes are not part of the name. This can only + // happen for the root path. + const char *slash = strchr(name, '/'); + if (slash && slash > name) { + copy = strndup(name, slash - name); + if (!copy) { + eval_report_error(state); + return false; + } + name = copy; + } + } + + bool ret = fnmatch(expr->argv[1], name, expr->num) == 0; + free(copy); + return ret; +} + +/** + * -i?path test. + */ +bool eval_path(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + return fnmatch(expr->argv[1], ftwbuf->path, expr->num) == 0; +} + +/** + * -perm test. + */ +bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + mode_t mode = statbuf->mode; + mode_t target; + if (state->ftwbuf->type == BFS_DIR) { + target = expr->dir_mode; + } else { + target = expr->file_mode; + } + + switch (expr->mode_cmp) { + case BFS_MODE_EQUAL: + return (mode & 07777) == target; + + case BFS_MODE_ALL: + return (mode & target) == target; + + case BFS_MODE_ANY: + return !(mode & target) == !target; + } + + assert(!"Invalid comparison mode"); + return false; +} + +/** + * -f?ls action. + */ +bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) { + CFILE *cfile = expr->cfile; + FILE *file = cfile->file; + const struct bfs_users *users = bfs_ctx_users(state->ctx); + const struct bfs_groups *groups = bfs_ctx_groups(state->ctx); + const struct BFTW *ftwbuf = state->ftwbuf; + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + goto done; + } + + uintmax_t ino = statbuf->ino; + uintmax_t block_size = state->ctx->posixly_correct ? 512 : 1024; + uintmax_t blocks = ((uintmax_t)statbuf->blocks*BFS_STAT_BLKSIZE + block_size - 1)/block_size; + char mode[11]; + xstrmode(statbuf->mode, mode); + char acl = bfs_check_acl(ftwbuf) > 0 ? '+' : ' '; + uintmax_t nlink = statbuf->nlink; + if (fprintf(file, "%9ju %6ju %s%c %2ju", ino, blocks, mode, acl, nlink) < 0) { + goto error; + } + + uintmax_t uid = statbuf->uid; + const struct passwd *pwd = users ? bfs_getpwuid(users, uid) : NULL; + if (pwd) { + if (fprintf(file, " %-8s", pwd->pw_name) < 0) { + goto error; + } + } else { + if (fprintf(file, " %-8ju", uid) < 0) { + goto error; + } + } + + uintmax_t gid = statbuf->gid; + const struct group *grp = groups ? bfs_getgrgid(groups, gid) : NULL; + if (grp) { + if (fprintf(file, " %-8s", grp->gr_name) < 0) { + goto error; + } + } else { + if (fprintf(file, " %-8ju", gid) < 0) { + goto error; + } + } + + if (ftwbuf->type == BFS_BLK || ftwbuf->type == BFS_CHR) { + int ma = bfs_major(statbuf->rdev); + int mi = bfs_minor(statbuf->rdev); + if (fprintf(file, " %3d, %3d", ma, mi) < 0) { + goto error; + } + } else { + uintmax_t size = statbuf->size; + if (fprintf(file, " %8ju", size) < 0) { + goto error; + } + } + + time_t time = statbuf->mtime.tv_sec; + time_t now = expr->reftime.tv_sec; + time_t six_months_ago = now - 6*30*24*60*60; + time_t tomorrow = now + 24*60*60; + struct tm tm; + if (xlocaltime(&time, &tm) != 0) { + goto error; + } + char time_str[256]; + const char *time_format = "%b %e %H:%M"; + if (time <= six_months_ago || time >= tomorrow) { + time_format = "%b %e %Y"; + } + if (!strftime(time_str, sizeof(time_str), time_format, &tm)) { + errno = EOVERFLOW; + goto error; + } + if (fprintf(file, " %s", time_str) < 0) { + goto error; + } + + if (cfprintf(cfile, " %pP", ftwbuf) < 0) { + goto error; + } + + if (ftwbuf->type == BFS_LNK) { + if (cfprintf(cfile, " -> %pL", ftwbuf) < 0) { + goto error; + } + } + + if (fputc('\n', file) == EOF) { + goto error; + } + +done: + return true; + +error: + eval_report_error(state); + return true; +} + +/** + * -f?print action. + */ +bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state) { + if (cfprintf(expr->cfile, "%pP\n", state->ftwbuf) < 0) { + eval_report_error(state); + } + return true; +} + +/** + * -f?print0 action. + */ +bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state) { + const char *path = state->ftwbuf->path; + size_t length = strlen(path) + 1; + if (fwrite(path, 1, length, expr->cfile->file) != length) { + eval_report_error(state); + } + return true; +} + +/** + * -f?printf action. + */ +bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state) { + if (bfs_printf(expr->cfile, expr->printf, state->ftwbuf) != 0) { + eval_report_error(state); + } + + return true; +} + +/** + * -printx action. + */ +bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state) { + FILE *file = expr->cfile->file; + const char *path = state->ftwbuf->path; + + while (true) { + size_t span = strcspn(path, " \t\n\\$'\"`"); + if (fwrite(path, 1, span, file) != span) { + goto error; + } + path += span; + + char c = path[0]; + if (!c) { + break; + } + + char escaped[] = {'\\', c}; + if (fwrite(escaped, 1, sizeof(escaped), file) != sizeof(escaped)) { + goto error; + } + ++path; + } + + + if (fputc('\n', file) == EOF) { + goto error; + } + + return true; + +error: + eval_report_error(state); + return true; +} + +/** + * -prune action. + */ +bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state) { + state->action = BFTW_PRUNE; + return true; +} + +/** + * -quit action. + */ +bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state) { + state->action = BFTW_STOP; + state->quit = true; + return true; +} + +/** + * -i?regex test. + */ +bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state) { + const char *path = state->ftwbuf->path; + + int ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR); + if (ret < 0) { + char *str = bfs_regerror(expr->regex); + if (str) { + eval_error(state, "%s.\n", str); + free(str); + } else { + eval_error(state, "bfs_regerror(): %m.\n"); + } + } + + return ret > 0; +} + +/** + * -samefile test. + */ +bool eval_samefile(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return statbuf->dev == expr->dev && statbuf->ino == expr->ino; +} + +/** + * -size test. + */ +bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + static const off_t scales[] = { + [BFS_BLOCKS] = 512, + [BFS_BYTES] = 1, + [BFS_WORDS] = 2, + [BFS_KB] = 1LL << 10, + [BFS_MB] = 1LL << 20, + [BFS_GB] = 1LL << 30, + [BFS_TB] = 1LL << 40, + [BFS_PB] = 1LL << 50, + }; + + off_t scale = scales[expr->size_unit]; + off_t size = (statbuf->size + scale - 1)/scale; // Round up + return bfs_expr_cmp(expr, size); +} + +/** + * -sparse test. + */ +bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + blkcnt_t expected = (statbuf->size + BFS_STAT_BLKSIZE - 1)/BFS_STAT_BLKSIZE; + return statbuf->blocks < expected; +} + +/** + * -type test. + */ +bool eval_type(const struct bfs_expr *expr, struct bfs_eval *state) { + return (1 << state->ftwbuf->type) & expr->num; +} + +/** + * -xattr test. + */ +bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_xattrs(state->ftwbuf); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -xattrname test. + */ +bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_xattr_named(state->ftwbuf, expr->argv[1]); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -xtype test. + */ +bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + enum bfs_stat_flags flags = ftwbuf->stat_flags ^ (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW); + enum bfs_type type = bftw_type(ftwbuf, flags); + if (type == BFS_ERROR) { + eval_report_error(state); + return false; + } else { + return (1 << type) & expr->num; + } +} + +#if _POSIX_MONOTONIC_CLOCK > 0 +# define BFS_CLOCK CLOCK_MONOTONIC +#elif _POSIX_TIMERS > 0 +# define BFS_CLOCK CLOCK_REALTIME +#endif + +/** + * Call clock_gettime(), if available. + */ +static int eval_gettime(struct bfs_eval *state, struct timespec *ts) { +#ifdef BFS_CLOCK + int ret = clock_gettime(BFS_CLOCK, ts); + if (ret != 0) { + bfs_warning(state->ctx, "%pP: clock_gettime(): %m.\n", state->ftwbuf); + } + return ret; +#else + return -1; +#endif +} + +/** + * Record an elapsed time. + */ +static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) { + elapsed->tv_sec += end->tv_sec - start->tv_sec; + elapsed->tv_nsec += end->tv_nsec - start->tv_nsec; + if (elapsed->tv_nsec < 0) { + elapsed->tv_nsec += 1000000000L; + --elapsed->tv_sec; + } else if (elapsed->tv_nsec >= 1000000000L) { + elapsed->tv_nsec -= 1000000000L; + ++elapsed->tv_sec; + } +} + +/** + * Evaluate an expression. + */ +static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) { + struct timespec start, end; + bool time = state->ctx->debug & DEBUG_RATES; + if (time) { + if (eval_gettime(state, &start) != 0) { + time = false; + } + } + + assert(!state->quit); + + bool ret = expr->eval_fn(expr, state); + + if (time) { + if (eval_gettime(state, &end) == 0) { + timespec_elapsed(&expr->elapsed, &start, &end); + } + } + + ++expr->evaluations; + if (ret) { + ++expr->successes; + } + + if (bfs_expr_never_returns(expr)) { + assert(state->quit); + } else if (!state->quit) { + assert(!expr->always_true || ret); + assert(!expr->always_false || !ret); + } + + return ret; +} + +/** + * Evaluate a negation. + */ +bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state) { + return !eval_expr(expr->rhs, state); +} + +/** + * Evaluate a conjunction. + */ +bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state) { + if (!eval_expr(expr->lhs, state)) { + return false; + } + + if (state->quit) { + return false; + } + + return eval_expr(expr->rhs, state); +} + +/** + * Evaluate a disjunction. + */ +bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state) { + if (eval_expr(expr->lhs, state)) { + return true; + } + + if (state->quit) { + return false; + } + + return eval_expr(expr->rhs, state); +} + +/** + * Evaluate the comma operator. + */ +bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state) { + eval_expr(expr->lhs, state); + + if (state->quit) { + return false; + } + + return eval_expr(expr->rhs, state); +} + +/** Update the status bar. */ +static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct timespec *last_status, size_t count) { + struct timespec now; + if (eval_gettime(state, &now) == 0) { + struct timespec elapsed = {0}; + timespec_elapsed(&elapsed, last_status, &now); + + // Update every 0.1s + if (elapsed.tv_sec > 0 || elapsed.tv_nsec >= 100000000L) { + *last_status = now; + } else { + return; + } + } + + size_t width = bfs_bar_width(bar); + if (width < 3) { + return; + } + + const struct BFTW *ftwbuf = state->ftwbuf; + + char *rhs = dstrprintf(" (visited: %zu, depth: %2zu)", count, ftwbuf->depth); + if (!rhs) { + return; + } + + size_t rhslen = dstrlen(rhs); + if (3 + rhslen > width) { + dstresize(&rhs, 0); + rhslen = 0; + } + + char *status = dstralloc(0); + if (!status) { + goto out_rhs; + } + + const char *path = ftwbuf->path; + size_t pathlen = ftwbuf->nameoff; + if (ftwbuf->depth == 0) { + pathlen = strlen(path); + } + + // Try to make sure even wide characters fit in the status bar + size_t pathmax = width - rhslen - 3; + size_t pathwidth = 0; + mbstate_t mb; + memset(&mb, 0, sizeof(mb)); + while (pathlen > 0) { + wchar_t wc; + size_t len = mbrtowc(&wc, path, pathlen, &mb); + int cwidth; + if (len == (size_t)-1) { + // Invalid byte sequence, assume a single-width '?' + len = 1; + cwidth = 1; + memset(&mb, 0, sizeof(mb)); + } else if (len == (size_t)-2) { + // Incomplete byte sequence, assume a single-width '?' + len = pathlen; + cwidth = 1; + } else { + cwidth = wcwidth(wc); + if (cwidth < 0) { + cwidth = 0; + } + } + + if (pathwidth + cwidth > pathmax) { + break; + } + + if (dstrncat(&status, path, len) != 0) { + goto out_rhs; + } + + path += len; + pathlen -= len; + pathwidth += cwidth; + } + + if (dstrcat(&status, "...") != 0) { + goto out_rhs; + } + + while (pathwidth < pathmax) { + if (dstrapp(&status, ' ') != 0) { + goto out_rhs; + } + ++pathwidth; + } + + if (dstrcat(&status, rhs) != 0) { + goto out_rhs; + } + + bfs_bar_update(bar, status); + + dstrfree(status); +out_rhs: + dstrfree(rhs); +} + +/** Check if we've seen a file before. */ +static bool eval_file_unique(struct bfs_eval *state, struct trie *seen) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + bfs_file_id id; + bfs_stat_id(statbuf, &id); + + struct trie_leaf *leaf = trie_insert_mem(seen, id, sizeof(id)); + if (!leaf) { + eval_report_error(state); + return false; + } + + if (leaf->value) { + state->action = BFTW_PRUNE; + return false; + } else { + leaf->value = leaf; + return true; + } +} + +#define DEBUG_FLAG(flags, flag) \ + do { \ + if ((flags & flag) || flags == flag) { \ + fputs(#flag, stderr); \ + flags ^= flag; \ + if (flags) { \ + fputs(" | ", stderr); \ + } \ + } \ + } while (0) + +/** + * Log a stat() call. + */ +static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, const struct bftw_stat *cache, enum bfs_stat_flags flags) { + bfs_debug_prefix(ctx, DEBUG_STAT); + + fprintf(stderr, "bfs_stat("); + if (ftwbuf->at_fd == AT_FDCWD) { + fprintf(stderr, "AT_FDCWD"); + } else { + size_t baselen = strlen(ftwbuf->path) - strlen(ftwbuf->at_path); + fprintf(stderr, "\""); + fwrite(ftwbuf->path, 1, baselen, stderr); + fprintf(stderr, "\""); + } + + fprintf(stderr, ", \"%s\", ", ftwbuf->at_path); + + DEBUG_FLAG(flags, BFS_STAT_FOLLOW); + DEBUG_FLAG(flags, BFS_STAT_NOFOLLOW); + DEBUG_FLAG(flags, BFS_STAT_TRYFOLLOW); + + fprintf(stderr, ") == %d", cache->buf ? 0 : -1); + + if (cache->error) { + fprintf(stderr, " [%d]", cache->error); + } + + fprintf(stderr, "\n"); +} + +/** + * Log any stat() calls that happened. + */ +static void debug_stats(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf) { + if (!(ctx->debug & DEBUG_STAT)) { + return; + } + + const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; + if (statbuf || ftwbuf->stat_cache.error) { + debug_stat(ctx, ftwbuf, &ftwbuf->stat_cache, BFS_STAT_FOLLOW); + } + + const struct bfs_stat *lstatbuf = ftwbuf->lstat_cache.buf; + if ((lstatbuf && lstatbuf != statbuf) || ftwbuf->lstat_cache.error) { + debug_stat(ctx, ftwbuf, &ftwbuf->lstat_cache, BFS_STAT_NOFOLLOW); + } +} + +#define DUMP_MAP(value) [value] = #value + +/** + * Dump the bfs_type for -D search. + */ +static const char *dump_bfs_type(enum bfs_type type) { + static const char *types[] = { + DUMP_MAP(BFS_UNKNOWN), + DUMP_MAP(BFS_BLK), + DUMP_MAP(BFS_CHR), + DUMP_MAP(BFS_DIR), + DUMP_MAP(BFS_DOOR), + DUMP_MAP(BFS_FIFO), + DUMP_MAP(BFS_LNK), + DUMP_MAP(BFS_PORT), + DUMP_MAP(BFS_REG), + DUMP_MAP(BFS_SOCK), + DUMP_MAP(BFS_WHT), + }; + + if (type == BFS_ERROR) { + return "BFS_ERROR"; + } else { + return types[type]; + } +} + +/** + * Dump the bftw_visit for -D search. + */ +static const char *dump_bftw_visit(enum bftw_visit visit) { + static const char *visits[] = { + DUMP_MAP(BFTW_PRE), + DUMP_MAP(BFTW_POST), + }; + return visits[visit]; +} + +/** + * Dump the bftw_action for -D search. + */ +static const char *dump_bftw_action(enum bftw_action action) { + static const char *actions[] = { + DUMP_MAP(BFTW_CONTINUE), + DUMP_MAP(BFTW_PRUNE), + DUMP_MAP(BFTW_STOP), + }; + return actions[action]; +} + +/** + * Type passed as the argument to the bftw() callback. + */ +struct callback_args { + /** The bfs context. */ + const struct bfs_ctx *ctx; + + /** The status bar. */ + struct bfs_bar *bar; + /** The time of the last status update. */ + struct timespec last_status; + /** The number of files visited so far. */ + size_t count; + + /** The set of seen files. */ + struct trie *seen; + + /** Eventual return value from bfs_eval(). */ + int ret; +}; + +/** + * bftw() callback. + */ +static enum bftw_action eval_callback(const struct BFTW *ftwbuf, void *ptr) { + struct callback_args *args = ptr; + ++args->count; + + const struct bfs_ctx *ctx = args->ctx; + + struct bfs_eval state; + state.ftwbuf = ftwbuf; + state.ctx = ctx; + state.action = BFTW_CONTINUE; + state.ret = &args->ret; + state.quit = false; + + if (args->bar) { + eval_status(&state, args->bar, &args->last_status, args->count); + } + + if (ftwbuf->type == BFS_ERROR) { + if (!eval_should_ignore(&state, ftwbuf->error)) { + eval_error(&state, "%s.\n", strerror(ftwbuf->error)); + } + state.action = BFTW_PRUNE; + goto done; + } + + if (ctx->unique && ftwbuf->visit == BFTW_PRE) { + if (!eval_file_unique(&state, args->seen)) { + goto done; + } + } + + if (eval_expr(ctx->exclude, &state)) { + state.action = BFTW_PRUNE; + goto done; + } + + if (ctx->xargs_safe && strpbrk(ftwbuf->path, " \t\n\'\"\\")) { + eval_error(&state, "Path is not safe for xargs.\n"); + state.action = BFTW_PRUNE; + goto done; + } + + if (ctx->maxdepth < 0 || ftwbuf->depth >= (size_t)ctx->maxdepth) { + state.action = BFTW_PRUNE; + } + + // In -depth mode, only handle directories on the BFTW_POST visit + enum bftw_visit expected_visit = BFTW_PRE; + if ((ctx->flags & BFTW_POST_ORDER) + && (ctx->strategy == BFTW_IDS || ftwbuf->type == BFS_DIR) + && ftwbuf->depth < (size_t)ctx->maxdepth) { + expected_visit = BFTW_POST; + } + + if (ftwbuf->visit == expected_visit + && ftwbuf->depth >= (size_t)ctx->mindepth + && ftwbuf->depth <= (size_t)ctx->maxdepth) { + eval_expr(ctx->expr, &state); + } + +done: + debug_stats(ctx, ftwbuf); + + if (bfs_debug(ctx, DEBUG_SEARCH, "eval_callback({\n")) { + fprintf(stderr, "\t.path = \"%s\",\n", ftwbuf->path); + fprintf(stderr, "\t.root = \"%s\",\n", ftwbuf->root); + fprintf(stderr, "\t.depth = %zu,\n", ftwbuf->depth); + fprintf(stderr, "\t.visit = %s,\n", dump_bftw_visit(ftwbuf->visit)); + fprintf(stderr, "\t.type = %s,\n", dump_bfs_type(ftwbuf->type)); + fprintf(stderr, "\t.error = %d,\n", ftwbuf->error); + fprintf(stderr, "}) == %s\n", dump_bftw_action(state.action)); + } + + return state.action; +} + +/** Check if an rlimit value is infinite. */ +static bool rlim_isinf(rlim_t r) { + // Consider RLIM_{INFINITY,SAVED_{CUR,MAX}} all equally infinite + if (r == RLIM_INFINITY) { + return true; + } + +#ifdef RLIM_SAVED_CUR + if (r == RLIM_SAVED_CUR) { + return true; + } +#endif + +#ifdef RLIM_SAVED_MAX + if (r == RLIM_SAVED_MAX) { + return true; + } +#endif + + return false; +} + +/** Compare two rlimit values, accounting for RLIM_INFINITY etc. */ +static int rlim_cmp(rlim_t a, rlim_t b) { + bool a_inf = rlim_isinf(a); + bool b_inf = rlim_isinf(b); + if (a_inf || b_inf) { + return a_inf - b_inf; + } + + return (a > b) - (a < b); +} + +/** Raise RLIMIT_NOFILE if possible, and return the new limit. */ +static int raise_fdlimit(const struct bfs_ctx *ctx) { + rlim_t target = 64 << 10; + if (rlim_cmp(target, ctx->nofile_hard) > 0) { + target = ctx->nofile_hard; + } + + int ret = target; + + if (rlim_cmp(target, ctx->nofile_soft) > 0) { + const struct rlimit rl = { + .rlim_cur = target, + .rlim_max = ctx->nofile_hard, + }; + if (setrlimit(RLIMIT_NOFILE, &rl) != 0) { + ret = ctx->nofile_soft; + } + } + + return ret; +} + +/** Infer the number of file descriptors available to bftw(). */ +static int infer_fdlimit(const struct bfs_ctx *ctx, int limit) { + // 3 for std{in,out,err} + int nopen = 3 + ctx->nfiles; + + // Check /proc/self/fd for the current number of open fds, if possible + // (we may have inherited more than just the standard ones) + struct bfs_dir *dir = bfs_opendir(AT_FDCWD, "/proc/self/fd"); + if (!dir) { + dir = bfs_opendir(AT_FDCWD, "/dev/fd"); + } + if (dir) { + // Account for 'dir' itself + nopen = -1; + + while (bfs_readdir(dir, NULL) > 0) { + ++nopen; + } + + bfs_closedir(dir); + } + + int ret = limit - nopen; + ret -= ctx->expr->persistent_fds; + ret -= ctx->expr->ephemeral_fds; + + // bftw() needs at least 2 available fds + if (ret < 2) { + ret = 2; + } + + return ret; +} + +/** + * Dump the bftw() flags for -D search. + */ +static void dump_bftw_flags(enum bftw_flags flags) { + DEBUG_FLAG(flags, 0); + DEBUG_FLAG(flags, BFTW_STAT); + DEBUG_FLAG(flags, BFTW_RECOVER); + DEBUG_FLAG(flags, BFTW_POST_ORDER); + DEBUG_FLAG(flags, BFTW_FOLLOW_ROOTS); + DEBUG_FLAG(flags, BFTW_FOLLOW_ALL); + DEBUG_FLAG(flags, BFTW_DETECT_CYCLES); + DEBUG_FLAG(flags, BFTW_SKIP_MOUNTS); + DEBUG_FLAG(flags, BFTW_PRUNE_MOUNTS); + DEBUG_FLAG(flags, BFTW_SORT); + DEBUG_FLAG(flags, BFTW_BUFFER); + + assert(!flags); +} + +/** + * Dump the bftw_strategy for -D search. + */ +static const char *dump_bftw_strategy(enum bftw_strategy strategy) { + static const char *strategies[] = { + DUMP_MAP(BFTW_BFS), + DUMP_MAP(BFTW_DFS), + DUMP_MAP(BFTW_IDS), + DUMP_MAP(BFTW_EDS), + }; + return strategies[strategy]; +} + +/** Check if we need to enable BFTW_BUFFER. */ +static bool eval_must_buffer(const struct bfs_expr *expr) { +#if __FreeBSD__ + // FreeBSD doesn't properly handle adding/removing directory entries + // during readdir() on NFS mounts. Work around it by passing BFTW_BUFFER + // whenever we could be mutating the directory ourselves through -delete + // or -exec. We don't attempt to handle concurrent modification by other + // processes, which are racey anyway. + // + // https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=57696 + // https://github.com/tavianator/bfs/issues/67 + + if (expr->eval_fn == eval_delete || expr->eval_fn == eval_exec) { + return true; + } + + if (bfs_expr_has_children(expr)) { + if (expr->lhs && eval_must_buffer(expr->lhs)) { + return true; + } + + if (expr->rhs && eval_must_buffer(expr->rhs)) { + return true; + } + } +#endif // __FreeBSD__ + + return false; +} + +int bfs_eval(const struct bfs_ctx *ctx) { + if (!ctx->expr) { + return EXIT_SUCCESS; + } + + struct callback_args args = { + .ctx = ctx, + .ret = EXIT_SUCCESS, + }; + + if (ctx->status) { + args.bar = bfs_bar_show(); + if (!args.bar) { + bfs_warning(ctx, "Couldn't show status bar: %m.\n\n"); + } + } + + struct trie seen; + if (ctx->unique) { + trie_init(&seen); + args.seen = &seen; + } + + int fdlimit = raise_fdlimit(ctx); + fdlimit = infer_fdlimit(ctx, fdlimit); + + struct bftw_args bftw_args = { + .paths = ctx->paths, + .npaths = darray_length(ctx->paths), + .callback = eval_callback, + .ptr = &args, + .nopenfd = fdlimit, + .flags = ctx->flags, + .strategy = ctx->strategy, + .mtab = bfs_ctx_mtab(ctx), + }; + + if (eval_must_buffer(ctx->expr)) { + bftw_args.flags |= BFTW_BUFFER; + } + + if (bfs_debug(ctx, DEBUG_SEARCH, "bftw({\n")) { + fprintf(stderr, "\t.paths = {\n"); + for (size_t i = 0; i < bftw_args.npaths; ++i) { + fprintf(stderr, "\t\t\"%s\",\n", bftw_args.paths[i]); + } + fprintf(stderr, "\t},\n"); + fprintf(stderr, "\t.npaths = %zu,\n", bftw_args.npaths); + fprintf(stderr, "\t.callback = eval_callback,\n"); + fprintf(stderr, "\t.ptr = &args,\n"); + fprintf(stderr, "\t.nopenfd = %d,\n", bftw_args.nopenfd); + fprintf(stderr, "\t.flags = "); + dump_bftw_flags(bftw_args.flags); + fprintf(stderr, ",\n\t.strategy = %s,\n", dump_bftw_strategy(bftw_args.strategy)); + fprintf(stderr, "\t.mtab = "); + if (bftw_args.mtab) { + fprintf(stderr, "ctx->mtab"); + } else { + fprintf(stderr, "NULL"); + } + fprintf(stderr, ",\n})\n"); + } + + if (bftw(&bftw_args) != 0) { + args.ret = EXIT_FAILURE; + bfs_perror(ctx, "bftw()"); + } + + if (eval_exec_finish(ctx->expr, ctx) != 0) { + args.ret = EXIT_FAILURE; + } + + bfs_ctx_dump(ctx, DEBUG_RATES); + + if (ctx->unique) { + trie_destroy(&seen); + } + + bfs_bar_hide(args.bar); + + return args.ret; +} diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..a1bbd2f --- /dev/null +++ b/src/eval.h @@ -0,0 +1,113 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The evaluation functions that implement literal expressions like -name, + * -print, etc. + */ + +#ifndef BFS_EVAL_H +#define BFS_EVAL_H + +#include <stdbool.h> + +struct bfs_ctx; +struct bfs_expr; + +/** + * Ephemeral state for evaluating an expression. + */ +struct bfs_eval; + +/** + * Expression evaluation function. + * + * @param expr + * The current expression. + * @param state + * The current evaluation state. + * @return + * The result of the test. + */ +typedef bool bfs_eval_fn(const struct bfs_expr *expr, struct bfs_eval *state); + +/** + * Evaluate the command line. + * + * @param ctx + * The bfs context to evaluate. + * @return + * EXIT_SUCCESS on success, otherwise on failure. + */ +int bfs_eval(const struct bfs_ctx *ctx); + +// Predicate evaluation functions + +bool eval_true(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_gid(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_uid(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_nogroup(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_nouser(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_flags(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fstype(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_hidden(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_inum(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_samefile(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_type(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_lname(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_path(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_delete(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_exit(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state); + +// Operator evaluation functions +bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state); + +#endif // BFS_EVAL_H diff --git a/src/exec.c b/src/exec.c new file mode 100644 index 0000000..0130317 --- /dev/null +++ b/src/exec.c @@ -0,0 +1,715 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "exec.h" +#include "bftw.h" +#include "ctx.h" +#include "color.h" +#include "diag.h" +#include "dstring.h" +#include "util.h" +#include "xspawn.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/wait.h> +#include <unistd.h> + +/** Print some debugging info. */ +BFS_FORMATTER(2, 3) +static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, ...) { + const struct bfs_ctx *ctx = execbuf->ctx; + + if (!bfs_debug(ctx, DEBUG_EXEC, "${blu}")) { + return; + } + + if (execbuf->flags & BFS_EXEC_CONFIRM) { + fputs("-ok", stderr); + } else { + fputs("-exec", stderr); + } + if (execbuf->flags & BFS_EXEC_CHDIR) { + fputs("dir", stderr); + } + cfprintf(ctx->cerr, "${rs}: "); + + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); +} + +/** Determine the size of a single argument, for comparison to arg_max. */ +static size_t bfs_exec_arg_size(const char *arg) { + return sizeof(arg) + strlen(arg) + 1; +} + +/** Even if we can pass a bigger argument list, cap it here. */ +#define BFS_EXEC_ARG_MAX (16 << 20) + +/** Determine the maximum argv size. */ +static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) { + long arg_max = sysconf(_SC_ARG_MAX); + bfs_exec_debug(execbuf, "ARG_MAX: %ld according to sysconf()\n", arg_max); + if (arg_max < 0) { + arg_max = BFS_EXEC_ARG_MAX; + bfs_exec_debug(execbuf, "ARG_MAX: %ld assumed\n", arg_max); + } + + // We have to share space with the environment variables + extern char **environ; + for (char **envp = environ; *envp; ++envp) { + arg_max -= bfs_exec_arg_size(*envp); + } + // Account for the terminating NULL entry + arg_max -= sizeof(char *); + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after environment variables\n", arg_max); + + // Account for the fixed arguments + for (size_t i = 0; i < execbuf->tmpl_argc - 1; ++i) { + arg_max -= bfs_exec_arg_size(execbuf->tmpl_argv[i]); + } + // Account for the terminating NULL entry + arg_max -= sizeof(char *); + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after fixed arguments\n", arg_max); + + // Assume arguments are counted with the granularity of a single page, + // so allow a one page cushion to account for rounding up + long page_size = sysconf(_SC_PAGESIZE); + if (page_size < 4096) { + page_size = 4096; + } + arg_max -= page_size; + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after page cushion\n", arg_max); + + // POSIX recommends an additional 2048 bytes of headroom + arg_max -= 2048; + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after headroom\n", arg_max); + + if (arg_max < 0) { + arg_max = 0; + } else if (arg_max > BFS_EXEC_ARG_MAX) { + arg_max = BFS_EXEC_ARG_MAX; + } + + bfs_exec_debug(execbuf, "ARG_MAX: %ld final value\n", arg_max); + return arg_max; +} + +/** Highlight part of the command line as an error. */ +static void bfs_exec_parse_error(const struct bfs_ctx *ctx, const struct bfs_exec *execbuf) { + char **argv = execbuf->tmpl_argv - 1; + size_t argc = execbuf->tmpl_argc + 1; + if (argv[argc]) { + ++argc; + } + + bool args[ctx->argc]; + for (size_t i = 0; i < ctx->argc; ++i) { + args[i] = false; + } + + size_t i = argv - ctx->argv; + for (size_t j = 0; j < argc; ++j) { + args[i + j] = true; + } + + bfs_argv_error(ctx, args); +} + +struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs_exec_flags flags) { + struct bfs_exec *execbuf = malloc(sizeof(*execbuf)); + if (!execbuf) { + bfs_perror(ctx, "malloc()"); + goto fail; + } + + execbuf->flags = flags; + execbuf->ctx = ctx; + execbuf->tmpl_argv = argv + 1; + execbuf->tmpl_argc = 0; + execbuf->argv = NULL; + execbuf->argc = 0; + execbuf->argv_cap = 0; + execbuf->arg_size = 0; + execbuf->arg_max = 0; + execbuf->arg_min = 0; + execbuf->wd_fd = -1; + execbuf->wd_path = NULL; + execbuf->wd_len = 0; + execbuf->ret = 0; + + while (true) { + const char *arg = execbuf->tmpl_argv[execbuf->tmpl_argc]; + if (!arg) { + if (execbuf->flags & BFS_EXEC_CONFIRM) { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Expected '... ;'.\n"); + } else { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Expected '... ;' or '... {} +'.\n"); + } + goto fail; + } else if (strcmp(arg, ";") == 0) { + break; + } else if (strcmp(arg, "+") == 0) { + const char *prev = execbuf->tmpl_argv[execbuf->tmpl_argc - 1]; + if (!(execbuf->flags & BFS_EXEC_CONFIRM) && strcmp(prev, "{}") == 0) { + execbuf->flags |= BFS_EXEC_MULTI; + break; + } + } + + ++execbuf->tmpl_argc; + } + + if (execbuf->tmpl_argc == 0) { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Missing command.\n"); + goto fail; + } + + execbuf->argv_cap = execbuf->tmpl_argc + 1; + execbuf->argv = malloc(execbuf->argv_cap*sizeof(*execbuf->argv)); + if (!execbuf->argv) { + bfs_perror(ctx, "malloc()"); + goto fail; + } + + if (execbuf->flags & BFS_EXEC_MULTI) { + for (size_t i = 0; i < execbuf->tmpl_argc - 1; ++i) { + char *arg = execbuf->tmpl_argv[i]; + if (strstr(arg, "{}")) { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Only one '{}' is supported.\n"); + goto fail; + } + execbuf->argv[i] = arg; + } + execbuf->argc = execbuf->tmpl_argc - 1; + + execbuf->arg_max = bfs_exec_arg_max(execbuf); + execbuf->arg_min = execbuf->arg_max; + } + + return execbuf; + +fail: + bfs_exec_free(execbuf); + return NULL; +} + +/** Format the current path for use as a command line argument. */ +static char *bfs_exec_format_path(const struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (!(execbuf->flags & BFS_EXEC_CHDIR)) { + return strdup(ftwbuf->path); + } + + const char *name = ftwbuf->path + ftwbuf->nameoff; + + if (name[0] == '/') { + // Must be a root path ("/", "//", etc.) + return strdup(name); + } + + // For compatibility with GNU find, use './name' instead of just 'name' + char *path = malloc(2 + strlen(name) + 1); + if (!path) { + return NULL; + } + + strcpy(path, "./"); + strcpy(path + 2, name); + + return path; +} + +/** Format an argument, expanding "{}" to the current path. */ +static char *bfs_exec_format_arg(char *arg, const char *path) { + char *match = strstr(arg, "{}"); + if (!match) { + return arg; + } + + char *ret = dstralloc(0); + if (!ret) { + return NULL; + } + + char *last = arg; + do { + if (dstrncat(&ret, last, match - last) != 0) { + goto err; + } + if (dstrcat(&ret, path) != 0) { + goto err; + } + + last = match + 2; + match = strstr(last, "{}"); + } while (match); + + if (dstrcat(&ret, last) != 0) { + goto err; + } + + return ret; + +err: + dstrfree(ret); + return NULL; +} + +/** Free a formatted argument. */ +static void bfs_exec_free_arg(char *arg, const char *tmpl) { + if (arg != tmpl) { + dstrfree(arg); + } +} + +/** Open a file to use as the working directory. */ +static int bfs_exec_openwd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + assert(execbuf->wd_fd < 0); + assert(!execbuf->wd_path); + + if (ftwbuf->at_fd != AT_FDCWD) { + // Rely on at_fd being the immediate parent + assert(ftwbuf->at_path == xbasename(ftwbuf->at_path)); + + execbuf->wd_fd = ftwbuf->at_fd; + if (!(execbuf->flags & BFS_EXEC_MULTI)) { + return 0; + } + + execbuf->wd_fd = dup_cloexec(execbuf->wd_fd); + if (execbuf->wd_fd < 0) { + return -1; + } + } + + execbuf->wd_len = ftwbuf->nameoff; + if (execbuf->wd_len == 0) { + if (ftwbuf->path[0] == '/') { + ++execbuf->wd_len; + } else { + // The path is something like "foo", so we're already in the right directory + return 0; + } + } + + execbuf->wd_path = strndup(ftwbuf->path, execbuf->wd_len); + if (!execbuf->wd_path) { + return -1; + } + + if (execbuf->wd_fd < 0) { + execbuf->wd_fd = open(execbuf->wd_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY); + } + + if (execbuf->wd_fd < 0) { + return -1; + } + + return 0; +} + +/** Close the working directory. */ +static void bfs_exec_closewd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (execbuf->wd_fd >= 0) { + if (!ftwbuf || execbuf->wd_fd != ftwbuf->at_fd) { + xclose(execbuf->wd_fd); + } + execbuf->wd_fd = -1; + } + + if (execbuf->wd_path) { + free(execbuf->wd_path); + execbuf->wd_path = NULL; + execbuf->wd_len = 0; + } +} + +/** Actually spawn the process. */ +static int bfs_exec_spawn(const struct bfs_exec *execbuf) { + if (execbuf->flags & BFS_EXEC_CONFIRM) { + for (size_t i = 0; i < execbuf->argc; ++i) { + if (fprintf(stderr, "%s ", execbuf->argv[i]) < 0) { + return -1; + } + } + if (fprintf(stderr, "? ") < 0) { + return -1; + } + + if (ynprompt() <= 0) { + errno = 0; + return -1; + } + } + + // Flush cached state for consistency with the external process + bfs_ctx_flush(execbuf->ctx); + + if (execbuf->flags & BFS_EXEC_MULTI) { + bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments] (size %zu)\n", + execbuf->argv[0], execbuf->argc - 1, execbuf->arg_size); + } else { + bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments]\n", execbuf->argv[0], execbuf->argc - 1); + } + + pid_t pid = -1; + int error; + + struct bfs_spawn ctx; + if (bfs_spawn_init(&ctx) != 0) { + return -1; + } + + if (bfs_spawn_setflags(&ctx, BFS_SPAWN_USEPATH) != 0) { + goto fail; + } + + // Reset RLIMIT_NOFILE, to avoid breaking applications that use select() + struct rlimit rl = { + .rlim_cur = execbuf->ctx->nofile_soft, + .rlim_max = execbuf->ctx->nofile_hard, + }; + if (bfs_spawn_addsetrlimit(&ctx, RLIMIT_NOFILE, &rl) != 0) { + goto fail; + } + + if (execbuf->wd_fd >= 0) { + if (bfs_spawn_addfchdir(&ctx, execbuf->wd_fd) != 0) { + goto fail; + } + } + + pid = bfs_spawn(execbuf->argv[0], &ctx, execbuf->argv, NULL); +fail: + error = errno; + bfs_spawn_destroy(&ctx); + if (pid < 0) { + errno = error; + return -1; + } + + int wstatus; + if (waitpid(pid, &wstatus, 0) < 0) { + return -1; + } + + int ret = -1; + + if (WIFEXITED(wstatus)) { + int status = WEXITSTATUS(wstatus); + if (status == EXIT_SUCCESS) { + ret = 0; + } else { + bfs_exec_debug(execbuf, "Command '%s' failed with status %d\n", execbuf->argv[0], status); + } + } else if (WIFSIGNALED(wstatus)) { + int sig = WTERMSIG(wstatus); + const char *str = strsignal(sig); + if (!str) { + str = "unknown"; + } + bfs_warning(execbuf->ctx, "Command '${ex}%s${rs}' terminated by signal %d (%s)\n", execbuf->argv[0], sig, str); + } else { + bfs_warning(execbuf->ctx, "Command '${ex}%s${rs}' terminated abnormally\n", execbuf->argv[0]); + } + + errno = 0; + return ret; +} + +/** exec() a command for a single file. */ +static int bfs_exec_single(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + int ret = -1, error = 0; + + char *path = bfs_exec_format_path(execbuf, ftwbuf); + if (!path) { + goto out; + } + + size_t i; + for (i = 0; i < execbuf->tmpl_argc; ++i) { + execbuf->argv[i] = bfs_exec_format_arg(execbuf->tmpl_argv[i], path); + if (!execbuf->argv[i]) { + goto out_free; + } + } + execbuf->argv[i] = NULL; + execbuf->argc = i; + + if (execbuf->flags & BFS_EXEC_CHDIR) { + if (bfs_exec_openwd(execbuf, ftwbuf) != 0) { + goto out_free; + } + } + + ret = bfs_exec_spawn(execbuf); + +out_free: + error = errno; + + bfs_exec_closewd(execbuf, ftwbuf); + + for (size_t j = 0; j < i; ++j) { + bfs_exec_free_arg(execbuf->argv[j], execbuf->tmpl_argv[j]); + } + + free(path); + + errno = error; + +out: + return ret; +} + +/** Check if any arguments remain in the buffer. */ +static bool bfs_exec_args_remain(const struct bfs_exec *execbuf) { + return execbuf->argc >= execbuf->tmpl_argc; +} + +/** Compute the current ARG_MAX estimate for binary search. */ +static size_t bfs_exec_estimate_max(const struct bfs_exec *execbuf) { + size_t min = execbuf->arg_min; + size_t max = execbuf->arg_max; + return min + (max - min)/2; +} + +/** Update the ARG_MAX lower bound from a successful execution. */ +static void bfs_exec_update_min(struct bfs_exec *execbuf) { + if (execbuf->arg_size > execbuf->arg_min) { + execbuf->arg_min = execbuf->arg_size; + + // Don't let min exceed max + if (execbuf->arg_min > execbuf->arg_max) { + execbuf->arg_min = execbuf->arg_max; + } + + size_t estimate = bfs_exec_estimate_max(execbuf); + bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n", + execbuf->arg_min, execbuf->arg_max, estimate); + } +} + +/** Update the ARG_MAX upper bound from a failed execution. */ +static size_t bfs_exec_update_max(struct bfs_exec *execbuf) { + bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n"); + + size_t size = execbuf->arg_size; + if (size <= execbuf->arg_min) { + // Lower bound was wrong, restart binary search. + execbuf->arg_min = 0; + } + + // Trim a fraction off the max size to avoid repeated failures near the + // top end of the working range + size -= size/16; + if (size < execbuf->arg_max) { + execbuf->arg_max = size; + + // Don't let min exceed max + if (execbuf->arg_min > execbuf->arg_max) { + execbuf->arg_min = execbuf->arg_max; + } + } + + // Binary search for a more precise bound + size_t estimate = bfs_exec_estimate_max(execbuf); + bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n", + execbuf->arg_min, execbuf->arg_max, estimate); + return estimate; +} + +/** Execute the pending command from a BFS_EXEC_MULTI execbuf. */ +static int bfs_exec_flush(struct bfs_exec *execbuf) { + int ret = 0, error = 0; + + size_t orig_argc = execbuf->argc; + while (bfs_exec_args_remain(execbuf)) { + execbuf->argv[execbuf->argc] = NULL; + ret = bfs_exec_spawn(execbuf); + error = errno; + if (ret == 0) { + bfs_exec_update_min(execbuf); + break; + } else if (error != E2BIG) { + break; + } + + // Try to recover from E2BIG by trying fewer and fewer arguments + // until they fit + size_t new_max = bfs_exec_update_max(execbuf); + while (execbuf->arg_size > new_max) { + execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1]; + execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]); + --execbuf->argc; + } + } + + size_t new_argc = execbuf->argc; + for (size_t i = execbuf->tmpl_argc - 1; i < new_argc; ++i) { + free(execbuf->argv[i]); + } + execbuf->argc = execbuf->tmpl_argc - 1; + execbuf->arg_size = 0; + + if (new_argc < orig_argc) { + // If we recovered from E2BIG, there are unused arguments at the + // end of the list + for (size_t i = new_argc + 1; i <= orig_argc; ++i) { + if (error == 0) { + execbuf->argv[execbuf->argc] = execbuf->argv[i]; + execbuf->arg_size += bfs_exec_arg_size(execbuf->argv[execbuf->argc]); + ++execbuf->argc; + } else { + free(execbuf->argv[i]); + } + } + } + + errno = error; + return ret; +} + +/** Check if we need to flush the execbuf because we're changing directories. */ +static bool bfs_exec_changed_dirs(const struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (execbuf->flags & BFS_EXEC_CHDIR) { + if (ftwbuf->nameoff > execbuf->wd_len + || (execbuf->wd_path && strncmp(ftwbuf->path, execbuf->wd_path, execbuf->wd_len) != 0)) { + bfs_exec_debug(execbuf, "Changed directories, executing buffered command\n"); + return true; + } + } + + return false; +} + +/** Check if we need to flush the execbuf because we're too big. */ +static bool bfs_exec_would_overflow(const struct bfs_exec *execbuf, const char *arg) { + size_t arg_max = bfs_exec_estimate_max(execbuf); + size_t next_size = execbuf->arg_size + bfs_exec_arg_size(arg); + if (next_size > arg_max) { + bfs_exec_debug(execbuf, "Command size (%zu) would exceed maximum (%zu), executing buffered command\n", + next_size, arg_max); + return true; + } + + return false; +} + +/** Push a new argument to a BFS_EXEC_MULTI execbuf. */ +static int bfs_exec_push(struct bfs_exec *execbuf, char *arg) { + execbuf->argv[execbuf->argc] = arg; + + if (execbuf->argc + 1 >= execbuf->argv_cap) { + size_t cap = 2*execbuf->argv_cap; + char **argv = realloc(execbuf->argv, cap*sizeof(*argv)); + if (!argv) { + return -1; + } + execbuf->argv = argv; + execbuf->argv_cap = cap; + } + + ++execbuf->argc; + execbuf->arg_size += bfs_exec_arg_size(arg); + return 0; +} + +/** Handle a new path for a BFS_EXEC_MULTI execbuf. */ +static int bfs_exec_multi(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + int ret = 0; + + char *arg = bfs_exec_format_path(execbuf, ftwbuf); + if (!arg) { + ret = -1; + goto out; + } + + if (bfs_exec_changed_dirs(execbuf, ftwbuf)) { + while (bfs_exec_args_remain(execbuf)) { + ret |= bfs_exec_flush(execbuf); + } + bfs_exec_closewd(execbuf, ftwbuf); + } else if (bfs_exec_would_overflow(execbuf, arg)) { + ret |= bfs_exec_flush(execbuf); + } + + if ((execbuf->flags & BFS_EXEC_CHDIR) && execbuf->wd_fd < 0) { + if (bfs_exec_openwd(execbuf, ftwbuf) != 0) { + ret = -1; + goto out_arg; + } + } + + if (bfs_exec_push(execbuf, arg) != 0) { + ret = -1; + goto out_arg; + } + + // arg will get cleaned up later by bfs_exec_flush() + goto out; + +out_arg: + free(arg); +out: + return ret; +} + +int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (execbuf->flags & BFS_EXEC_MULTI) { + if (bfs_exec_multi(execbuf, ftwbuf) == 0) { + errno = 0; + } else { + execbuf->ret = -1; + } + // -exec ... + never returns false + return 0; + } else { + return bfs_exec_single(execbuf, ftwbuf); + } +} + +int bfs_exec_finish(struct bfs_exec *execbuf) { + if (execbuf->flags & BFS_EXEC_MULTI) { + bfs_exec_debug(execbuf, "Finishing execution, executing buffered command\n"); + while (bfs_exec_args_remain(execbuf)) { + execbuf->ret |= bfs_exec_flush(execbuf); + } + if (execbuf->ret != 0) { + bfs_exec_debug(execbuf, "One or more executions of '%s' failed\n", execbuf->argv[0]); + } + } + return execbuf->ret; +} + +void bfs_exec_free(struct bfs_exec *execbuf) { + if (execbuf) { + bfs_exec_closewd(execbuf, NULL); + free(execbuf->argv); + free(execbuf); + } +} diff --git a/src/exec.h b/src/exec.h new file mode 100644 index 0000000..a3e3c71 --- /dev/null +++ b/src/exec.h @@ -0,0 +1,121 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Implementation of -exec/-execdir/-ok/-okdir. + */ + +#ifndef BFS_EXEC_H +#define BFS_EXEC_H + +#include <stddef.h> + +struct BFTW; +struct bfs_ctx; + +/** + * Flags for the -exec actions. + */ +enum bfs_exec_flags { + /** Prompt the user before executing (-ok, -okdir). */ + BFS_EXEC_CONFIRM = 1 << 0, + /** Run the command in the file's parent directory (-execdir, -okdir). */ + BFS_EXEC_CHDIR = 1 << 1, + /** Pass multiple files at once to the command (-exec ... {} +). */ + BFS_EXEC_MULTI = 1 << 2, +}; + +/** + * Buffer for a command line to be executed. + */ +struct bfs_exec { + /** Flags for this exec buffer. */ + enum bfs_exec_flags flags; + + /** The bfs context. */ + const struct bfs_ctx *ctx; + /** Command line template. */ + char **tmpl_argv; + /** Command line template size. */ + size_t tmpl_argc; + + /** The built command line. */ + char **argv; + /** Number of command line arguments. */ + size_t argc; + /** Capacity of argv. */ + size_t argv_cap; + + /** Current size of all arguments. */ + size_t arg_size; + /** Maximum arg_size before E2BIG. */ + size_t arg_max; + /** Lower bound for arg_max. */ + size_t arg_min; + + /** A file descriptor for the working directory, for BFS_EXEC_CHDIR. */ + int wd_fd; + /** The path to the working directory, for BFS_EXEC_CHDIR. */ + char *wd_path; + /** Length of the working directory path. */ + size_t wd_len; + + /** The ultimate return value for bfs_exec_finish(). */ + int ret; +}; + +/** + * Parse an exec action. + * + * @param argv + * The (bfs) command line argument to parse. + * @param flags + * Any flags for this exec action. + * @param ctx + * The bfs context. + * @return + * The parsed exec action, or NULL on failure. + */ +struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs_exec_flags flags); + +/** + * Execute the command for a file. + * + * @param execbuf + * The parsed exec action. + * @param ftwbuf + * The bftw() data for the current file. + * @return 0 if the command succeeded, -1 if it failed. If the command could + * be executed, -1 is returned, and errno will be non-zero. For + * BFS_EXEC_MULTI, errors will not be reported until bfs_exec_finish(). + */ +int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf); + +/** + * Finish executing any commands. + * + * @param execbuf + * The parsed exec action. + * @return 0 on success, -1 if any errors were encountered. + */ +int bfs_exec_finish(struct bfs_exec *execbuf); + +/** + * Free a parsed exec action. + */ +void bfs_exec_free(struct bfs_exec *execbuf); + +#endif // BFS_EXEC_H diff --git a/src/expr.h b/src/expr.h new file mode 100644 index 0000000..1f1ece6 --- /dev/null +++ b/src/expr.h @@ -0,0 +1,235 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The expression tree representation. + */ + +#ifndef BFS_EXPR_H +#define BFS_EXPR_H + +#include "color.h" +#include "eval.h" +#include "stat.h" +#include <stdbool.h> +#include <stddef.h> +#include <sys/types.h> +#include <time.h> + +/** + * Integer comparison modes. + */ +enum bfs_int_cmp { + /** Exactly N. */ + BFS_INT_EQUAL, + /** Less than N (-N). */ + BFS_INT_LESS, + /** Greater than N (+N). */ + BFS_INT_GREATER, +}; + +/** + * Permission comparison modes. + */ +enum bfs_mode_cmp { + /** Mode is an exact match (MODE). */ + BFS_MODE_EQUAL, + /** Mode has all these bits (-MODE). */ + BFS_MODE_ALL, + /** Mode has any of these bits (/MODE). */ + BFS_MODE_ANY, +}; + +/** + * Possible time units. + */ +enum bfs_time_unit { + /** Seconds. */ + BFS_SECONDS, + /** Minutes. */ + BFS_MINUTES, + /** Days. */ + BFS_DAYS, +}; + +/** + * Possible file size units. + */ +enum bfs_size_unit { + /** 512-byte blocks. */ + BFS_BLOCKS, + /** Single bytes. */ + BFS_BYTES, + /** Two-byte words. */ + BFS_WORDS, + /** Kibibytes. */ + BFS_KB, + /** Mebibytes. */ + BFS_MB, + /** Gibibytes. */ + BFS_GB, + /** Tebibytes. */ + BFS_TB, + /** Pebibytes. */ + BFS_PB, +}; + +/** + * A command line expression. + */ +struct bfs_expr { + /** The function that evaluates this expression. */ + bfs_eval_fn *eval_fn; + + /** The number of command line arguments for this expression. */ + size_t argc; + /** The command line arguments comprising this expression. */ + char **argv; + + /** The number of files this expression keeps open between evaluations. */ + int persistent_fds; + /** The number of files this expression opens during evaluation. */ + int ephemeral_fds; + + /** Whether this expression has no side effects. */ + bool pure; + /** Whether this expression always evaluates to true. */ + bool always_true; + /** Whether this expression always evaluates to false. */ + bool always_false; + /** Whether this expression doesn't appear on the command line. */ + bool synthetic; + + /** Estimated cost. */ + float cost; + /** Estimated probability of success. */ + float probability; + /** Number of times this predicate was evaluated. */ + size_t evaluations; + /** Number of times this predicate succeeded. */ + size_t successes; + /** Total time spent running this predicate. */ + struct timespec elapsed; + + /** Auxilliary data for the evaluation function. */ + union { + /** Child expressions. */ + struct { + /** The left hand side of the expression. */ + struct bfs_expr *lhs; + /** The right hand side of the expression. */ + struct bfs_expr *rhs; + }; + + /** Integer comparisons. */ + struct { + /** Integer for this comparison. */ + long long num; + /** The comparison mode. */ + enum bfs_int_cmp int_cmp; + + /** Optional extra data. */ + union { + /** -size data. */ + enum bfs_size_unit size_unit; + + /** Timestamp comparison data. */ + struct { + /** The stat field to look at. */ + enum bfs_stat_field stat_field; + /** The reference time. */ + struct timespec reftime; + /** The time unit. */ + enum bfs_time_unit time_unit; + }; + }; + }; + + /** Printing actions. */ + struct { + /** The output stream. */ + CFILE *cfile; + /** Optional -printf format. */ + struct bfs_printf *printf; + }; + + /** -exec data. */ + struct bfs_exec *exec; + + /** -flags data. */ + struct { + /** The comparison mode. */ + enum bfs_mode_cmp flags_cmp; + /** Flags that should be set. */ + unsigned long long set_flags; + /** Flags that should be cleared. */ + unsigned long long clear_flags; + }; + + /** -perm data. */ + struct { + /** The comparison mode. */ + enum bfs_mode_cmp mode_cmp; + /** Mode to use for files. */ + mode_t file_mode; + /** Mode to use for directories (different due to X). */ + mode_t dir_mode; + }; + + /** -regex data. */ + struct bfs_regex *regex; + + /** -samefile data. */ + struct { + /** Device number of the target file. */ + dev_t dev; + /** Inode number of the target file. */ + ino_t ino; + }; + }; +}; + +/** Singleton true expression instance. */ +extern struct bfs_expr bfs_true; +/** Singleton false expression instance. */ +extern struct bfs_expr bfs_false; + +/** + * Create a new expression. + */ +struct bfs_expr *bfs_expr_new(bfs_eval_fn *eval, size_t argc, char **argv); + +/** + * @return Whether the expression has child expressions. + */ +bool bfs_expr_has_children(const struct bfs_expr *expr); + +/** + * @return Whether expr is known to always quit. + */ +bool bfs_expr_never_returns(const struct bfs_expr *expr); + +/** + * @return The result of the integer comparison for this expression. + */ +bool bfs_expr_cmp(const struct bfs_expr *expr, long long n); + +/** + * Free an expression tree. + */ +void bfs_expr_free(struct bfs_expr *expr); + +#endif // BFS_EXPR_H diff --git a/src/fsade.c b/src/fsade.c new file mode 100644 index 0000000..1444cf4 --- /dev/null +++ b/src/fsade.c @@ -0,0 +1,392 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019-2021 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "fsade.h" +#include "bftw.h" +#include "dir.h" +#include "dstring.h" +#include "util.h" +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <unistd.h> + +#if BFS_CAN_CHECK_ACL +# include <sys/acl.h> +#endif + +#if BFS_CAN_CHECK_CAPABILITIES +# include <sys/capability.h> +#endif + +#if BFS_HAS_SYS_EXTATTR +# include <sys/extattr.h> +#elif BFS_HAS_SYS_XATTR +# include <sys/xattr.h> +#endif + +#if BFS_CAN_CHECK_ACL || BFS_CAN_CHECK_CAPABILITIES || BFS_CAN_CHECK_XATTRS + +/** + * Many of the APIs used here don't have *at() variants, but we can try to + * emulate something similar if /proc/self/fd is available. + */ +static const char *fake_at(const struct BFTW *ftwbuf) { + static bool proc_works = true; + static bool proc_checked = false; + + char *path = NULL; + if (!proc_works || ftwbuf->at_fd == AT_FDCWD) { + goto fail; + } + + path = dstrprintf("/proc/self/fd/%d/", ftwbuf->at_fd); + if (!path) { + goto fail; + } + + if (!proc_checked) { + proc_checked = true; + if (xfaccessat(AT_FDCWD, path, F_OK) != 0) { + proc_works = false; + goto fail; + } + } + + if (dstrcat(&path, ftwbuf->at_path) != 0) { + goto fail; + } + + return path; + +fail: + dstrfree(path); + return ftwbuf->path; +} + +static void free_fake_at(const struct BFTW *ftwbuf, const char *path) { + if (path != ftwbuf->path) { + dstrfree((char *)path); + } +} + +/** + * Check if an error was caused by the absence of support or data for a feature. + */ +static bool is_absence_error(int error) { + // If the OS doesn't support the feature, it's obviously not enabled for + // any files + if (error == ENOTSUP) { + return true; + } + + // On Linux, ACLs and capabilities are implemented in terms of extended + // attributes, which report ENODATA/ENOATTR when missing + +#ifdef ENODATA + if (error == ENODATA) { + return true; + } +#endif + +#if defined(ENOATTR) && ENOATTR != ENODATA + if (error == ENOATTR) { + return true; + } +#endif + + // On at least FreeBSD and macOS, EINVAL is returned when the requested + // ACL type is not supported for that file + if (error == EINVAL) { + return true; + } + +#if __APPLE__ + // On macOS, ENOENT can also signal that a file has no ACLs + if (error == ENOENT) { + return true; + } +#endif + + return false; +} + +#endif // BFS_CAN_CHECK_ACL || BFS_CAN_CHECK_CAPABILITIES || BFS_CAN_CHECK_XATTRS + +#if BFS_CAN_CHECK_ACL + +/** Check if a POSIX.1e ACL is non-trivial. */ +static int bfs_check_posix1e_acl(acl_t acl, bool ignore_required) { + int ret = 0; + + acl_entry_t entry; + for (int status = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry); +#if __APPLE__ + // POSIX.1e specifies a return value of 1 for success, but macOS + // returns 0 instead + status == 0; +#else + status > 0; +#endif + status = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry)) { +#if defined(ACL_USER_OBJ) && defined(ACL_GROUP_OBJ) && defined(ACL_OTHER) + if (ignore_required) { + acl_tag_t tag; + if (acl_get_tag_type(entry, &tag) != 0) { + ret = -1; + continue; + } + if (tag == ACL_USER_OBJ || tag == ACL_GROUP_OBJ || tag == ACL_OTHER) { + continue; + } + } +#endif + + ret = 1; + break; + } + + return ret; +} + +/** Check if an ACL of the given type is non-trivial. */ +static int bfs_check_acl_type(acl_t acl, acl_type_t type) { + if (type == ACL_TYPE_DEFAULT) { + // For directory default ACLs, any entries make them non-trivial + return bfs_check_posix1e_acl(acl, false); + } + +#if __FreeBSD__ + int trivial; + +#if BFS_HAS_FEATURE(memory_sanitizer, false) + // msan seems to be missing an interceptor for acl_is_trivial_np() + trivial = 0; +#endif + + if (acl_is_trivial_np(acl, &trivial) < 0) { + return -1; + } else if (trivial) { + return 0; + } else { + return 1; + } +#else // !__FreeBSD__ + return bfs_check_posix1e_acl(acl, true); +#endif +} + +int bfs_check_acl(const struct BFTW *ftwbuf) { + static const acl_type_t acl_types[] = { +#if __APPLE__ + // macOS gives EINVAL for either of the two standard ACL types, + // supporting only ACL_TYPE_EXTENDED + ACL_TYPE_EXTENDED, +#else + // The two standard POSIX.1e ACL types + ACL_TYPE_ACCESS, + ACL_TYPE_DEFAULT, +#endif + +#ifdef ACL_TYPE_NFS4 + ACL_TYPE_NFS4, +#endif + }; + static const size_t n_acl_types = sizeof(acl_types)/sizeof(acl_types[0]); + + if (ftwbuf->type == BFS_LNK) { + return 0; + } + + const char *path = fake_at(ftwbuf); + + int ret = -1, error = 0; + for (size_t i = 0; i < n_acl_types && ret <= 0; ++i) { + acl_type_t type = acl_types[i]; + + if (type == ACL_TYPE_DEFAULT && ftwbuf->type != BFS_DIR) { + // ACL_TYPE_DEFAULT is supported only for directories, + // otherwise acl_get_file() gives EACCESS + continue; + } + + acl_t acl = acl_get_file(path, type); + if (!acl) { + error = errno; + if (is_absence_error(error)) { + ret = 0; + } + continue; + } + + ret = bfs_check_acl_type(acl, type); + error = errno; + acl_free(acl); + } + + free_fake_at(ftwbuf, path); + errno = error; + return ret; +} + +#else // !BFS_CAN_CHECK_ACL + +int bfs_check_acl(const struct BFTW *ftwbuf) { + errno = ENOTSUP; + return -1; +} + +#endif + +#if BFS_CAN_CHECK_CAPABILITIES + +int bfs_check_capabilities(const struct BFTW *ftwbuf) { + if (ftwbuf->type == BFS_LNK) { + return 0; + } + + int ret = -1, error; + const char *path = fake_at(ftwbuf); + + cap_t caps = cap_get_file(path); + if (!caps) { + error = errno; + if (is_absence_error(error)) { + ret = 0; + } + goto out_path; + } + + // TODO: Any better way to check for a non-empty capability set? + char *text = cap_to_text(caps, NULL); + if (!text) { + error = errno; + goto out_caps; + } + ret = text[0] ? 1 : 0; + + error = errno; + cap_free(text); +out_caps: + cap_free(caps); +out_path: + free_fake_at(ftwbuf, path); + errno = error; + return ret; +} + +#else // !BFS_CAN_CHECK_CAPABILITIES + +int bfs_check_capabilities(const struct BFTW *ftwbuf) { + errno = ENOTSUP; + return -1; +} + +#endif + +#if BFS_CAN_CHECK_XATTRS + +int bfs_check_xattrs(const struct BFTW *ftwbuf) { + const char *path = fake_at(ftwbuf); + ssize_t len; + +#if BFS_HAS_SYS_EXTATTR + ssize_t (*extattr_list)(const char *, int, void*, size_t) = + ftwbuf->type == BFS_LNK ? extattr_list_link : extattr_list_file; + + len = extattr_list(path, EXTATTR_NAMESPACE_SYSTEM, NULL, 0); + if (len <= 0) { + len = extattr_list(path, EXTATTR_NAMESPACE_USER, NULL, 0); + } +#elif __APPLE__ + int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0; + len = listxattr(path, NULL, 0, options); +#else + if (ftwbuf->type == BFS_LNK) { + len = llistxattr(path, NULL, 0); + } else { + len = listxattr(path, NULL, 0); + } +#endif + + int error = errno; + + free_fake_at(ftwbuf, path); + + if (len > 0) { + return 1; + } else if (len == 0 || is_absence_error(error)) { + return 0; + } else if (error == E2BIG) { + return 1; + } else { + errno = error; + return -1; + } +} + +int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) { + const char *path = fake_at(ftwbuf); + ssize_t len; + +#if BFS_HAS_SYS_EXTATTR + ssize_t (*extattr_get)(const char *, int, const char *, void*, size_t) = + ftwbuf->type == BFS_LNK ? extattr_get_link : extattr_get_file; + + len = extattr_get(path, EXTATTR_NAMESPACE_SYSTEM, name, NULL, 0); + if (len < 0) { + len = extattr_get(path, EXTATTR_NAMESPACE_USER, name, NULL, 0); + } +#elif __APPLE__ + int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0; + len = getxattr(path, name, NULL, 0, 0, options); +#else + if (ftwbuf->type == BFS_LNK) { + len = lgetxattr(path, name, NULL, 0); + } else { + len = getxattr(path, name, NULL, 0); + } +#endif + + int error = errno; + + free_fake_at(ftwbuf, path); + + if (len >= 0) { + return 1; + } else if (is_absence_error(error)) { + return 0; + } else if (error == E2BIG) { + return 1; + } else { + errno = error; + return -1; + } +} + +#else // !BFS_CAN_CHECK_XATTRS + +int bfs_check_xattrs(const struct BFTW *ftwbuf) { + errno = ENOTSUP; + return -1; +} + +int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) { + errno = ENOTSUP; + return -1; +} + +#endif diff --git a/src/fsade.h b/src/fsade.h new file mode 100644 index 0000000..e964112 --- /dev/null +++ b/src/fsade.h @@ -0,0 +1,83 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019-2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A facade over (file)system features that are (un)implemented differently + * between platforms. + */ + +#ifndef BFS_FSADE_H +#define BFS_FSADE_H + +#include "util.h" +#include <stdbool.h> + +#define BFS_CAN_CHECK_ACL BFS_HAS_SYS_ACL + +#if !defined(BFS_CAN_CHECK_CAPABILITIES) && BFS_HAS_SYS_CAPABILITY && !__FreeBSD__ +# include <sys/capability.h> +# ifdef CAP_CHOWN +# define BFS_CAN_CHECK_CAPABILITIES true +# endif +#endif + +#define BFS_CAN_CHECK_XATTRS (BFS_HAS_SYS_EXTATTR || BFS_HAS_SYS_XATTR) + +struct BFTW; + +/** + * Check if a file has a non-trivial Access Control List. + * + * @param ftwbuf + * The file to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_acl(const struct BFTW *ftwbuf); + +/** + * Check if a file has a non-trivial capability set. + * + * @param ftwbuf + * The file to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_capabilities(const struct BFTW *ftwbuf); + +/** + * Check if a file has any extended attributes set. + * + * @param ftwbuf + * The file to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_xattrs(const struct BFTW *ftwbuf); + +/** + * Check if a file has an extended attribute with the given name. + * + * @param ftwbuf + * The file to check. + * @param name + * The name of the xattr to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name); + +#endif // BFS_FSADE_H diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..9dc96e4 --- /dev/null +++ b/src/main.c @@ -0,0 +1,141 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * - main(): the entry point for bfs(1), a breadth-first version of find(1) + * - main.c (this file) + * + * - bfs_parse_cmdline(): parses the command line into an expression tree + * - ctx.[ch] (struct bfs_ctx, the overall bfs context) + * - expr.h (declares the expression tree nodes) + * - parse.[ch] (the parser itself) + * - opt.[ch] (the optimizer) + * + * - bfs_eval(): runs the expression on every file it sees + * - eval.[ch] (the main evaluation functions) + * - exec.[ch] (implements -exec[dir]/-ok[dir]) + * - printf.[ch] (implements -[f]printf) + * + * - bftw(): used by bfs_eval() to walk the directory tree(s) + * - bftw.[ch] (an extended version of nftw(3)) + * + * - Utilities: + * - bfs.h (constants about bfs itself) + * - bar.[ch] (a terminal status bar) + * - color.[ch] (for pretty terminal colors) + * - darray.[ch] (a dynamic array library) + * - diag.[ch] (formats diagnostic messages) + * - dir.[ch] (a directory API facade) + * - dstring.[ch] (a dynamic string library) + * - fsade.[ch] (a facade over non-standard filesystem features) + * - mtab.[ch] (parses the system's mount table) + * - pwcache.[ch] (a cache for the user/group tables) + * - stat.[ch] (wraps stat(), or statx() on Linux) + * - trie.[ch] (a trie set/map implementation) + * - typo.[ch] (fuzzy matching for typos) + * - util.[ch] (everything else) + * - xregex.[ch] (regular expression support) + * - xspawn.[ch] (spawns processes) + * - xtime.[ch] (date/time handling utilities) + */ + +#include "ctx.h" +#include "eval.h" +#include "parse.h" +#include "util.h" +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +/** + * Check if a file descriptor is open. + */ +static bool isopen(int fd) { + return fcntl(fd, F_GETFD) >= 0 || errno != EBADF; +} + +/** + * Open a file and redirect it to a particular descriptor. + */ +static int redirect(int fd, const char *path, int flags) { + int newfd = open(path, flags); + if (newfd < 0 || newfd == fd) { + return newfd; + } + + int ret = dup2(newfd, fd); + close_quietly(newfd); + return ret; +} + +/** + * Make sure the standard streams std{in,out,err} are open. If they are not, + * future open() calls may use those file descriptors, and std{in,out,err} will + * use them unintentionally. + */ +static int open_std_streams(void) { +#ifdef O_PATH + const int inflags = O_PATH, outflags = O_PATH; +#else + // These are intentionally backwards so that bfs >&- still fails with EBADF + const int inflags = O_WRONLY, outflags = O_RDONLY; +#endif + + if (!isopen(STDERR_FILENO) && redirect(STDERR_FILENO, "/dev/null", outflags) < 0) { + return -1; + } + if (!isopen(STDOUT_FILENO) && redirect(STDOUT_FILENO, "/dev/null", outflags) < 0) { + perror("redirect()"); + return -1; + } + if (!isopen(STDIN_FILENO) && redirect(STDIN_FILENO, "/dev/null", inflags) < 0) { + perror("redirect()"); + return -1; + } + + return 0; +} + +/** + * bfs entry point. + */ +int main(int argc, char *argv[]) { + int ret = EXIT_FAILURE; + + // Make sure the standard streams are open + if (open_std_streams() != 0) { + goto done; + } + + // Use the system locale instead of "C" + setlocale(LC_ALL, ""); + + struct bfs_ctx *ctx = bfs_parse_cmdline(argc, argv); + if (ctx) { + ret = bfs_eval(ctx); + } + + if (bfs_ctx_free(ctx) != 0 && ret == EXIT_SUCCESS) { + ret = EXIT_FAILURE; + } + +done: + return ret; +} diff --git a/src/mtab.c b/src/mtab.c new file mode 100644 index 0000000..adc3f58 --- /dev/null +++ b/src/mtab.c @@ -0,0 +1,246 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "mtab.h" +#include "darray.h" +#include "stat.h" +#include "trie.h" +#include "util.h" +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#if BFS_HAS_SYS_PARAM +# include <sys/param.h> +#endif + +#if BFS_HAS_MNTENT +# define BFS_MNTENT 1 +#elif BSD +# define BFS_MNTINFO 1 +#elif __SVR4 +# define BFS_MNTTAB 1 +#endif + +#if BFS_MNTENT +# include <mntent.h> +# include <paths.h> +# include <stdio.h> +#elif BFS_MNTINFO +# include <sys/mount.h> +# include <sys/ucred.h> +#elif BFS_MNTTAB +# include <stdio.h> +# include <sys/mnttab.h> +#endif + +/** + * A mount point in the table. + */ +struct bfs_mtab_entry { + /** The path to the mount point. */ + char *path; + /** The filesystem type. */ + char *type; +}; + +struct bfs_mtab { + /** The list of mount points. */ + struct bfs_mtab_entry *entries; + /** The basenames of every mount point. */ + struct trie names; + + /** A map from device ID to fstype (populated lazily). */ + struct trie types; + /** Whether the types map has been populated. */ + bool types_filled; +}; + +/** + * Add an entry to the mount table. + */ +static int bfs_mtab_add(struct bfs_mtab *mtab, const char *path, const char *type) { + struct bfs_mtab_entry entry = { + .path = strdup(path), + .type = strdup(type), + }; + + if (!entry.path || !entry.type) { + goto fail_entry; + } + + if (DARRAY_PUSH(&mtab->entries, &entry) != 0) { + goto fail_entry; + } + + if (!trie_insert_str(&mtab->names, xbasename(path))) { + goto fail; + } + + return 0; + +fail_entry: + free(entry.type); + free(entry.path); +fail: + return -1; +} + +struct bfs_mtab *bfs_mtab_parse(void) { + struct bfs_mtab *mtab = malloc(sizeof(*mtab)); + if (!mtab) { + return NULL; + } + + mtab->entries = NULL; + trie_init(&mtab->names); + trie_init(&mtab->types); + mtab->types_filled = false; + + int error = 0; + +#if BFS_MNTENT + + FILE *file = setmntent(_PATH_MOUNTED, "r"); + if (!file) { + // In case we're in a chroot or something with /proc but no /etc/mtab + error = errno; + file = setmntent("/proc/mounts", "r"); + } + if (!file) { + goto fail; + } + + struct mntent *mnt; + while ((mnt = getmntent(file))) { + if (bfs_mtab_add(mtab, mnt->mnt_dir, mnt->mnt_type) != 0) { + error = errno; + endmntent(file); + goto fail; + } + } + + endmntent(file); + +#elif BFS_MNTINFO + +#if __NetBSD__ + typedef struct statvfs bfs_statfs; +#else + typedef struct statfs bfs_statfs; +#endif + + bfs_statfs *mntbuf; + int size = getmntinfo(&mntbuf, MNT_WAIT); + if (size < 0) { + error = errno; + goto fail; + } + + for (bfs_statfs *mnt = mntbuf; mnt < mntbuf + size; ++mnt) { + if (bfs_mtab_add(mtab, mnt->f_mntonname, mnt->f_fstypename) != 0) { + error = errno; + goto fail; + } + } + +#elif BFS_MNTTAB + + FILE *file = xfopen(MNTTAB, O_RDONLY | O_CLOEXEC); + if (!file) { + error = errno; + goto fail; + } + + struct mnttab mnt; + while (getmntent(file, &mnt) == 0) { + if (bfs_mtab_add(mtab, mnt.mnt_mountp, mnt.mnt_fstype) != 0) { + error = errno; + fclose(file); + goto fail; + } + } + + fclose(file); + +#else + + error = ENOTSUP; + goto fail; + +#endif + + return mtab; + +fail: + bfs_mtab_free(mtab); + errno = error; + return NULL; +} + +static void bfs_mtab_fill_types(struct bfs_mtab *mtab) { + for (size_t i = 0; i < darray_length(mtab->entries); ++i) { + struct bfs_mtab_entry *entry = &mtab->entries[i]; + + struct bfs_stat sb; + if (bfs_stat(AT_FDCWD, entry->path, BFS_STAT_NOFOLLOW | BFS_STAT_NOSYNC, &sb) != 0) { + continue; + } + + struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev)); + if (leaf) { + leaf->value = entry->type; + } + } + + mtab->types_filled = true; +} + +const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statbuf) { + if (!mtab->types_filled) { + bfs_mtab_fill_types((struct bfs_mtab *)mtab); + } + + const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev)); + if (leaf) { + return leaf->value; + } else { + return "unknown"; + } +} + +bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *path) { + const char *name = xbasename(path); + return trie_find_str(&mtab->names, name); +} + +void bfs_mtab_free(struct bfs_mtab *mtab) { + if (mtab) { + trie_destroy(&mtab->types); + trie_destroy(&mtab->names); + + for (size_t i = 0; i < darray_length(mtab->entries); ++i) { + free(mtab->entries[i].type); + free(mtab->entries[i].path); + } + darray_free(mtab->entries); + + free(mtab); + } +} diff --git a/src/mtab.h b/src/mtab.h new file mode 100644 index 0000000..807539d --- /dev/null +++ b/src/mtab.h @@ -0,0 +1,71 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A facade over platform-specific APIs for enumerating mounted filesystems. + */ + +#ifndef BFS_MTAB_H +#define BFS_MTAB_H + +#include <stdbool.h> + +struct bfs_stat; + +/** + * A file system mount table. + */ +struct bfs_mtab; + +/** + * Parse the mount table. + * + * @return + * The parsed mount table, or NULL on error. + */ +struct bfs_mtab *bfs_mtab_parse(void); + +/** + * Determine the file system type that a file is on. + * + * @param mtab + * The current mount table. + * @param statbuf + * The bfs_stat() buffer for the file in question. + * @return + * The type of file system containing this file, "unknown" if not known, + * or NULL on error. + */ +const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statbuf); + +/** + * Check if a file could be a mount point. + * + * @param mtab + * The current mount table. + * @param path + * The path to check. + * @return + * Whether the named file could be a mount point. + */ +bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *path); + +/** + * Free a mount table. + */ +void bfs_mtab_free(struct bfs_mtab *mtab); + +#endif // BFS_MTAB_H diff --git a/src/opt.c b/src/opt.c new file mode 100644 index 0000000..f8c0ba3 --- /dev/null +++ b/src/opt.c @@ -0,0 +1,1088 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The expression optimizer. Different optimization levels are supported: + * + * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo. + * + * -O2: dead code elimination and data flow analysis. struct opt_facts is used + * to record data flow facts that are true at various points of evaluation. + * Specifically, struct opt_facts records the facts that must be true before an + * expression is evaluated (state->facts), and those that must be true after the + * expression is evaluated, given that it returns true (state->facts_when_true) + * or false (state->facts_when_true). Additionally, state->facts_when_impure + * records the possible data flow facts before any expressions with side effects + * are evaluated. + * + * -O3: expression re-ordering to reduce expected cost. In an expression like + * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can + * be re-ordered to (-bar -and -foo). This is profitable if the expected cost + * is lower for the re-ordered expression, for example if -foo is very slow or + * -bar is likely to return false. + * + * -O4/-Ofast: aggressive optimizations that may affect correctness in corner + * cases. The main effect is to use facts_when_impure to determine if any side- + * effects are reachable at all, and skipping the traversal if not. + */ + +#include "opt.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "eval.h" +#include "expr.h" +#include "pwcache.h" +#include "util.h" +#include <assert.h> +#include <limits.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +static char *fake_and_arg = "-a"; +static char *fake_or_arg = "-o"; +static char *fake_not_arg = "!"; + +/** + * A contrained integer range. + */ +struct range { + /** The (inclusive) minimum value. */ + long long min; + /** The (inclusive) maximum value. */ + long long max; +}; + +/** Compute the minimum of two values. */ +static long long min_value(long long a, long long b) { + if (a < b) { + return a; + } else { + return b; + } +} + +/** Compute the maximum of two values. */ +static long long max_value(long long a, long long b) { + if (a > b) { + return a; + } else { + return b; + } +} + +/** Constrain the minimum of a range. */ +static void constrain_min(struct range *range, long long value) { + range->min = max_value(range->min, value); +} + +/** Contrain the maximum of a range. */ +static void constrain_max(struct range *range, long long value) { + range->max = min_value(range->max, value); +} + +/** Remove a single value from a range. */ +static void range_remove(struct range *range, long long value) { + if (range->min == value) { + if (range->min == LLONG_MAX) { + range->max = LLONG_MIN; + } else { + ++range->min; + } + } + + if (range->max == value) { + if (range->max == LLONG_MIN) { + range->min = LLONG_MAX; + } else { + --range->max; + } + } +} + +/** Compute the union of two ranges. */ +static void range_union(struct range *result, const struct range *lhs, const struct range *rhs) { + result->min = min_value(lhs->min, rhs->min); + result->max = max_value(lhs->max, rhs->max); +} + +/** Check if a range contains no values. */ +static bool range_is_impossible(const struct range *range) { + return range->min > range->max; +} + +/** Set a range to contain no values. */ +static void set_range_impossible(struct range *range) { + range->min = LLONG_MAX; + range->max = LLONG_MIN; +} + +/** + * Types of ranges we track. + */ +enum range_type { + /** Search tree depth. */ + DEPTH_RANGE, + /** Group ID. */ + GID_RANGE, + /** Inode number. */ + INUM_RANGE, + /** Hard link count. */ + LINKS_RANGE, + /** File size. */ + SIZE_RANGE, + /** User ID. */ + UID_RANGE, + /** The number of range_types. */ + RANGE_TYPES, +}; + +/** + * A possibly-known value of a predicate. + */ +enum known_pred { + /** The state is impossible to reach. */ + PRED_IMPOSSIBLE = -2, + /** The value of the predicate is not known. */ + PRED_UNKNOWN = -1, + /** The predicate is known to be false. */ + PRED_FALSE = false, + /** The predicate is known to be true. */ + PRED_TRUE = true, +}; + +/** Make a predicate known. */ +static void constrain_pred(enum known_pred *pred, bool value) { + if (*pred == PRED_UNKNOWN) { + *pred = value; + } else if (*pred == !value) { + *pred = PRED_IMPOSSIBLE; + } +} + +/** Compute the union of two known predicates. */ +static enum known_pred pred_union(enum known_pred lhs, enum known_pred rhs) { + if (lhs == PRED_IMPOSSIBLE) { + return rhs; + } else if (rhs == PRED_IMPOSSIBLE) { + return lhs; + } else if (lhs == rhs) { + return lhs; + } else { + return PRED_UNKNOWN; + } +} + +/** + * Types of predicates we track. + */ +enum pred_type { + /** -readable */ + READABLE_PRED, + /** -writable */ + WRITABLE_PRED, + /** -executable */ + EXECUTABLE_PRED, + /** -acl */ + ACL_PRED, + /** -capable */ + CAPABLE_PRED, + /** -empty */ + EMPTY_PRED, + /** -hidden */ + HIDDEN_PRED, + /** -nogroup */ + NOGROUP_PRED, + /** -nouser */ + NOUSER_PRED, + /** -sparse */ + SPARSE_PRED, + /** -xattr */ + XATTR_PRED, + /** The number of pred_types. */ + PRED_TYPES, +}; + +/** + * Data flow facts about an evaluation point. + */ +struct opt_facts { + /** The value ranges we track. */ + struct range ranges[RANGE_TYPES]; + + /** The predicates we track. */ + enum known_pred preds[PRED_TYPES]; + + /** Bitmask of possible file types. */ + unsigned int types; + /** Bitmask of possible link target types. */ + unsigned int xtypes; +}; + +/** Initialize some data flow facts. */ +static void facts_init(struct opt_facts *facts) { + for (int i = 0; i < RANGE_TYPES; ++i) { + struct range *range = &facts->ranges[i]; + range->min = 0; // All ranges we currently track are non-negative + range->max = LLONG_MAX; + } + + for (int i = 0; i < PRED_TYPES; ++i) { + facts->preds[i] = PRED_UNKNOWN; + } + + facts->types = ~0; + facts->xtypes = ~0; +} + +/** Compute the union of two fact sets. */ +static void facts_union(struct opt_facts *result, const struct opt_facts *lhs, const struct opt_facts *rhs) { + for (int i = 0; i < RANGE_TYPES; ++i) { + range_union(&result->ranges[i], &lhs->ranges[i], &rhs->ranges[i]); + } + + for (int i = 0; i < PRED_TYPES; ++i) { + result->preds[i] = pred_union(lhs->preds[i], rhs->preds[i]); + } + + result->types = lhs->types | rhs->types; + result->xtypes = lhs->xtypes | rhs->xtypes; +} + +/** Determine whether a fact set is impossible. */ +static bool facts_are_impossible(const struct opt_facts *facts) { + for (int i = 0; i < RANGE_TYPES; ++i) { + if (range_is_impossible(&facts->ranges[i])) { + return true; + } + } + + for (int i = 0; i < PRED_TYPES; ++i) { + if (facts->preds[i] == PRED_IMPOSSIBLE) { + return true; + } + } + + if (!facts->types || !facts->xtypes) { + return true; + } + + return false; +} + +/** Set some facts to be impossible. */ +static void set_facts_impossible(struct opt_facts *facts) { + for (int i = 0; i < RANGE_TYPES; ++i) { + set_range_impossible(&facts->ranges[i]); + } + + for (int i = 0; i < PRED_TYPES; ++i) { + facts->preds[i] = PRED_IMPOSSIBLE; + } + + facts->types = 0; + facts->xtypes = 0; +} + +/** + * Optimizer state. + */ +struct opt_state { + /** The context we're optimizing. */ + const struct bfs_ctx *ctx; + + /** Data flow facts before this expression is evaluated. */ + struct opt_facts facts; + /** Data flow facts after this expression returns true. */ + struct opt_facts facts_when_true; + /** Data flow facts after this expression returns false. */ + struct opt_facts facts_when_false; + /** Data flow facts before any side-effecting expressions are evaluated. */ + struct opt_facts *facts_when_impure; +}; + +/** Log an optimization. */ +BFS_FORMATTER(3, 4) +static bool opt_debug(const struct opt_state *state, int level, const char *format, ...) { + assert(state->ctx->optlevel >= level); + + if (bfs_debug(state->ctx, DEBUG_OPT, "${cyn}-O%d${rs}: ", level)) { + va_list args; + va_start(args, format); + cvfprintf(state->ctx->cerr, format, args); + va_end(args); + return true; + } else { + return false; + } +} + +/** Warn about an expression. */ +BFS_FORMATTER(3, 4) +static void opt_warning(const struct opt_state *state, const struct bfs_expr *expr, const char *format, ...) { + if (bfs_expr_warning(state->ctx, expr)) { + va_list args; + va_start(args, format); + bfs_warning(state->ctx, format, args); + va_end(args); + } +} + +/** Extract a child expression, freeing the outer expression. */ +static struct bfs_expr *extract_child_expr(struct bfs_expr *expr, struct bfs_expr **child) { + struct bfs_expr *ret = *child; + *child = NULL; + bfs_expr_free(expr); + return ret; +} + +/** + * Negate an expression. + */ +static struct bfs_expr *negate_expr(struct bfs_expr *rhs, char **argv) { + if (rhs->eval_fn == eval_not) { + return extract_child_expr(rhs, &rhs->rhs); + } + + struct bfs_expr *expr = bfs_expr_new(eval_not, 1, argv); + if (!expr) { + bfs_expr_free(rhs); + return NULL; + } + + if (argv == &fake_not_arg) { + expr->synthetic = true; + } + + expr->lhs = NULL; + expr->rhs = rhs; + return expr; +} + +static struct bfs_expr *optimize_not_expr(const struct opt_state *state, struct bfs_expr *expr); +static struct bfs_expr *optimize_and_expr(const struct opt_state *state, struct bfs_expr *expr); +static struct bfs_expr *optimize_or_expr(const struct opt_state *state, struct bfs_expr *expr); + +/** + * Apply De Morgan's laws. + */ +static struct bfs_expr *de_morgan(const struct opt_state *state, struct bfs_expr *expr, char **argv) { + bool debug = opt_debug(state, 1, "De Morgan's laws: %pe ", expr); + + struct bfs_expr *parent = negate_expr(expr, argv); + if (!parent) { + return NULL; + } + + bool has_parent = true; + if (parent->eval_fn != eval_not) { + expr = parent; + has_parent = false; + } + + assert(expr->eval_fn == eval_and || expr->eval_fn == eval_or); + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + expr->synthetic = true; + + expr->lhs = negate_expr(expr->lhs, argv); + expr->rhs = negate_expr(expr->rhs, argv); + if (!expr->lhs || !expr->rhs) { + bfs_expr_free(parent); + return NULL; + } + + if (debug) { + cfprintf(state->ctx->cerr, "<==> %pe\n", parent); + } + + if (expr->lhs->eval_fn == eval_not) { + expr->lhs = optimize_not_expr(state, expr->lhs); + } + if (expr->rhs->eval_fn == eval_not) { + expr->rhs = optimize_not_expr(state, expr->rhs); + } + if (!expr->lhs || !expr->rhs) { + bfs_expr_free(parent); + return NULL; + } + + if (expr->eval_fn == eval_and) { + expr = optimize_and_expr(state, expr); + } else { + expr = optimize_or_expr(state, expr); + } + if (has_parent) { + parent->rhs = expr; + } else { + parent = expr; + } + if (!expr) { + bfs_expr_free(parent); + return NULL; + } + + if (has_parent) { + parent = optimize_not_expr(state, parent); + } + return parent; +} + +/** Optimize an expression recursively. */ +static struct bfs_expr *optimize_expr_recursive(struct opt_state *state, struct bfs_expr *expr); + +/** + * Optimize a negation. + */ +static struct bfs_expr *optimize_not_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_not); + + struct bfs_expr *rhs = expr->rhs; + + int optlevel = state->ctx->optlevel; + if (optlevel >= 1) { + if (rhs == &bfs_true) { + opt_debug(state, 1, "constant propagation: %pe <==> %pe\n", expr, &bfs_false); + bfs_expr_free(expr); + return &bfs_false; + } else if (rhs == &bfs_false) { + opt_debug(state, 1, "constant propagation: %pe <==> %pe\n", expr, &bfs_true); + bfs_expr_free(expr); + return &bfs_true; + } else if (rhs->eval_fn == eval_not) { + opt_debug(state, 1, "double negation: %pe <==> %pe\n", expr, rhs->rhs); + return extract_child_expr(expr, &rhs->rhs); + } else if (bfs_expr_never_returns(rhs)) { + opt_debug(state, 1, "reachability: %pe <==> %pe\n", expr, rhs); + return extract_child_expr(expr, &expr->rhs); + } else if ((rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) + && (rhs->lhs->eval_fn == eval_not || rhs->rhs->eval_fn == eval_not)) { + return de_morgan(state, expr, expr->argv); + } + } + + expr->pure = rhs->pure; + expr->always_true = rhs->always_false; + expr->always_false = rhs->always_true; + expr->cost = rhs->cost; + expr->probability = 1.0 - rhs->probability; + + return expr; +} + +/** Optimize a negation recursively. */ +static struct bfs_expr *optimize_not_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state rhs_state = *state; + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + state->facts_when_true = rhs_state.facts_when_false; + state->facts_when_false = rhs_state.facts_when_true; + + return optimize_not_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Optimize a conjunction. */ +static struct bfs_expr *optimize_and_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_and); + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + const struct bfs_ctx *ctx = state->ctx; + int optlevel = ctx->optlevel; + if (optlevel >= 1) { + if (lhs == &bfs_true) { + opt_debug(state, 1, "conjunction elimination: %pe <==> %pe\n", expr, rhs); + return extract_child_expr(expr, &expr->rhs); + } else if (rhs == &bfs_true) { + opt_debug(state, 1, "conjunction elimination: %pe <==> %pe\n", expr, lhs); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs->always_false) { + opt_debug(state, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); + opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs->always_true && rhs == &bfs_false) { + bool debug = opt_debug(state, 1, "strength reduction: %pe <==> ", expr); + struct bfs_expr *ret = extract_child_expr(expr, &expr->lhs); + ret = negate_expr(ret, &fake_not_arg); + if (debug && ret) { + cfprintf(ctx->cerr, "%pe\n", ret); + } + return ret; + } else if (optlevel >= 2 && lhs->pure && rhs == &bfs_false) { + opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); + opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); + return extract_child_expr(expr, &expr->rhs); + } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { + return de_morgan(state, expr, expr->lhs->argv); + } + } + + expr->pure = lhs->pure && rhs->pure; + expr->always_true = lhs->always_true && rhs->always_true; + expr->always_false = lhs->always_false || rhs->always_false; + expr->cost = lhs->cost + lhs->probability*rhs->cost; + expr->probability = lhs->probability*rhs->probability; + + return expr; +} + +/** Optimize a conjunction recursively. */ +static struct bfs_expr *optimize_and_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state lhs_state = *state; + expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); + if (!expr->lhs) { + goto fail; + } + + struct opt_state rhs_state = *state; + rhs_state.facts = lhs_state.facts_when_true; + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + state->facts_when_true = rhs_state.facts_when_true; + facts_union(&state->facts_when_false, &lhs_state.facts_when_false, &rhs_state.facts_when_false); + + return optimize_and_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Optimize a disjunction. */ +static struct bfs_expr *optimize_or_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_or); + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + const struct bfs_ctx *ctx = state->ctx; + int optlevel = ctx->optlevel; + if (optlevel >= 1) { + if (lhs->always_true) { + opt_debug(state, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); + opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs == &bfs_false) { + opt_debug(state, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, rhs); + return extract_child_expr(expr, &expr->rhs); + } else if (rhs == &bfs_false) { + opt_debug(state, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, lhs); + return extract_child_expr(expr, &expr->lhs); + } else if (lhs->always_false && rhs == &bfs_true) { + bool debug = opt_debug(state, 1, "strength reduction: %pe <==> ", expr); + struct bfs_expr *ret = extract_child_expr(expr, &expr->lhs); + ret = negate_expr(ret, &fake_not_arg); + if (debug && ret) { + cfprintf(ctx->cerr, "%pe\n", ret); + } + return ret; + } else if (optlevel >= 2 && lhs->pure && rhs == &bfs_true) { + opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); + opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); + return extract_child_expr(expr, &expr->rhs); + } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { + return de_morgan(state, expr, expr->lhs->argv); + } + } + + expr->pure = lhs->pure && rhs->pure; + expr->always_true = lhs->always_true || rhs->always_true; + expr->always_false = lhs->always_false && rhs->always_false; + expr->cost = lhs->cost + (1 - lhs->probability)*rhs->cost; + expr->probability = lhs->probability + rhs->probability - lhs->probability*rhs->probability; + + return expr; +} + +/** Optimize a disjunction recursively. */ +static struct bfs_expr *optimize_or_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state lhs_state = *state; + expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); + if (!expr->lhs) { + goto fail; + } + + struct opt_state rhs_state = *state; + rhs_state.facts = lhs_state.facts_when_false; + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + facts_union(&state->facts_when_true, &lhs_state.facts_when_true, &rhs_state.facts_when_true); + state->facts_when_false = rhs_state.facts_when_false; + + return optimize_or_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Optimize an expression in an ignored-result context. */ +static struct bfs_expr *ignore_result(const struct opt_state *state, struct bfs_expr *expr) { + int optlevel = state->ctx->optlevel; + + if (optlevel >= 1) { + while (true) { + if (expr->eval_fn == eval_not) { + opt_debug(state, 1, "ignored result: %pe --> %pe\n", expr, expr->rhs); + opt_warning(state, expr, "The result of this expression is ignored.\n\n"); + expr = extract_child_expr(expr, &expr->rhs); + } else if (optlevel >= 2 + && (expr->eval_fn == eval_and || expr->eval_fn == eval_or || expr->eval_fn == eval_comma) + && expr->rhs->pure) { + opt_debug(state, 2, "ignored result: %pe --> %pe\n", expr, expr->lhs); + opt_warning(state, expr->rhs, "The result of this expression is ignored.\n\n"); + expr = extract_child_expr(expr, &expr->lhs); + } else { + break; + } + } + + if (optlevel >= 2 && expr->pure && expr != &bfs_false) { + opt_debug(state, 2, "ignored result: %pe --> %pe\n", expr, &bfs_false); + opt_warning(state, expr, "The result of this expression is ignored.\n\n"); + bfs_expr_free(expr); + expr = &bfs_false; + } + } + + return expr; +} + +/** Optimize a comma expression. */ +static struct bfs_expr *optimize_comma_expr(const struct opt_state *state, struct bfs_expr *expr) { + assert(expr->eval_fn == eval_comma); + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + int optlevel = state->ctx->optlevel; + if (optlevel >= 1) { + lhs = expr->lhs = ignore_result(state, lhs); + + if (bfs_expr_never_returns(lhs)) { + opt_debug(state, 1, "reachability: %pe <==> %pe\n", expr, lhs); + opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); + return extract_child_expr(expr, &expr->lhs); + } else if ((lhs->always_true && rhs == &bfs_true) + || (lhs->always_false && rhs == &bfs_false)) { + opt_debug(state, 1, "redundancy elimination: %pe <==> %pe\n", expr, lhs); + return extract_child_expr(expr, &expr->lhs); + } else if (optlevel >= 2 && lhs->pure) { + opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); + opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); + return extract_child_expr(expr, &expr->rhs); + } + } + + expr->pure = lhs->pure && rhs->pure; + expr->always_true = bfs_expr_never_returns(lhs) || rhs->always_true; + expr->always_false = bfs_expr_never_returns(lhs) || rhs->always_false; + expr->cost = lhs->cost + rhs->cost; + expr->probability = rhs->probability; + + return expr; +} + +/** Optimize a comma expression recursively. */ +static struct bfs_expr *optimize_comma_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + struct opt_state lhs_state = *state; + expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); + if (!expr->lhs) { + goto fail; + } + + struct opt_state rhs_state = *state; + facts_union(&rhs_state.facts, &lhs_state.facts_when_true, &lhs_state.facts_when_false); + expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); + if (!expr->rhs) { + goto fail; + } + + return optimize_comma_expr(state, expr); + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** Infer data flow facts about a predicate. */ +static void infer_pred_facts(struct opt_state *state, enum pred_type pred) { + constrain_pred(&state->facts_when_true.preds[pred], true); + constrain_pred(&state->facts_when_false.preds[pred], false); +} + +/** Infer data flow facts about an -{execut,read,writ}able expression. */ +static void infer_access_facts(struct opt_state *state, const struct bfs_expr *expr) { + if (expr->num & R_OK) { + infer_pred_facts(state, READABLE_PRED); + } + if (expr->num & W_OK) { + infer_pred_facts(state, WRITABLE_PRED); + } + if (expr->num & X_OK) { + infer_pred_facts(state, EXECUTABLE_PRED); + } +} + +/** Infer data flow facts about an icmp-style ([+-]N) expression. */ +static void infer_icmp_facts(struct opt_state *state, const struct bfs_expr *expr, enum range_type type) { + struct range *range_when_true = &state->facts_when_true.ranges[type]; + struct range *range_when_false = &state->facts_when_false.ranges[type]; + long long value = expr->num; + + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + constrain_min(range_when_true, value); + constrain_max(range_when_true, value); + range_remove(range_when_false, value); + break; + + case BFS_INT_LESS: + constrain_min(range_when_false, value); + constrain_max(range_when_true, value); + range_remove(range_when_true, value); + break; + + case BFS_INT_GREATER: + constrain_max(range_when_false, value); + constrain_min(range_when_true, value); + range_remove(range_when_true, value); + break; + } +} + +/** Infer data flow facts about a -gid expression. */ +static void infer_gid_facts(struct opt_state *state, const struct bfs_expr *expr) { + infer_icmp_facts(state, expr, GID_RANGE); + + const struct bfs_groups *groups = bfs_ctx_groups(state->ctx); + struct range *range = &state->facts_when_true.ranges[GID_RANGE]; + if (groups && range->min == range->max) { + gid_t gid = range->min; + bool nogroup = !bfs_getgrgid(groups, gid); + constrain_pred(&state->facts_when_true.preds[NOGROUP_PRED], nogroup); + } +} + +/** Infer data flow facts about a -uid expression. */ +static void infer_uid_facts(struct opt_state *state, const struct bfs_expr *expr) { + infer_icmp_facts(state, expr, UID_RANGE); + + const struct bfs_users *users = bfs_ctx_users(state->ctx); + struct range *range = &state->facts_when_true.ranges[UID_RANGE]; + if (users && range->min == range->max) { + uid_t uid = range->min; + bool nouser = !bfs_getpwuid(users, uid); + constrain_pred(&state->facts_when_true.preds[NOUSER_PRED], nouser); + } +} + +/** Infer data flow facts about a -samefile expression. */ +static void infer_samefile_facts(struct opt_state *state, const struct bfs_expr *expr) { + struct range *range_when_true = &state->facts_when_true.ranges[INUM_RANGE]; + constrain_min(range_when_true, expr->ino); + constrain_max(range_when_true, expr->ino); +} + +/** Infer data flow facts about a -type expression. */ +static void infer_type_facts(struct opt_state *state, const struct bfs_expr *expr) { + state->facts_when_true.types &= expr->num; + state->facts_when_false.types &= ~expr->num; +} + +/** Infer data flow facts about an -xtype expression. */ +static void infer_xtype_facts(struct opt_state *state, const struct bfs_expr *expr) { + state->facts_when_true.xtypes &= expr->num; + state->facts_when_false.xtypes &= ~expr->num; +} + +static struct bfs_expr *optimize_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { + int optlevel = state->ctx->optlevel; + + state->facts_when_true = state->facts; + state->facts_when_false = state->facts; + + if (optlevel >= 2 && facts_are_impossible(&state->facts)) { + opt_debug(state, 2, "reachability: %pe --> %pe\n", expr, &bfs_false); + opt_warning(state, expr, "This expression is unreachable.\n\n"); + bfs_expr_free(expr); + expr = &bfs_false; + goto done; + } + + if (!bfs_expr_has_children(expr) && !expr->pure) { + facts_union(state->facts_when_impure, state->facts_when_impure, &state->facts); + } + + if (expr->eval_fn == eval_access) { + infer_access_facts(state, expr); + } else if (expr->eval_fn == eval_acl) { + infer_pred_facts(state, ACL_PRED); + } else if (expr->eval_fn == eval_capable) { + infer_pred_facts(state, CAPABLE_PRED); + } else if (expr->eval_fn == eval_depth) { + infer_icmp_facts(state, expr, DEPTH_RANGE); + } else if (expr->eval_fn == eval_empty) { + infer_pred_facts(state, EMPTY_PRED); + } else if (expr->eval_fn == eval_gid) { + infer_gid_facts(state, expr); + } else if (expr->eval_fn == eval_hidden) { + infer_pred_facts(state, HIDDEN_PRED); + } else if (expr->eval_fn == eval_inum) { + infer_icmp_facts(state, expr, INUM_RANGE); + } else if (expr->eval_fn == eval_links) { + infer_icmp_facts(state, expr, LINKS_RANGE); + } else if (expr->eval_fn == eval_nogroup) { + infer_pred_facts(state, NOGROUP_PRED); + } else if (expr->eval_fn == eval_nouser) { + infer_pred_facts(state, NOUSER_PRED); + } else if (expr->eval_fn == eval_samefile) { + infer_samefile_facts(state, expr); + } else if (expr->eval_fn == eval_size) { + infer_icmp_facts(state, expr, SIZE_RANGE); + } else if (expr->eval_fn == eval_sparse) { + infer_pred_facts(state, SPARSE_PRED); + } else if (expr->eval_fn == eval_type) { + infer_type_facts(state, expr); + } else if (expr->eval_fn == eval_uid) { + infer_uid_facts(state, expr); + } else if (expr->eval_fn == eval_xattr) { + infer_pred_facts(state, XATTR_PRED); + } else if (expr->eval_fn == eval_xtype) { + infer_xtype_facts(state, expr); + } else if (expr->eval_fn == eval_not) { + expr = optimize_not_expr_recursive(state, expr); + } else if (expr->eval_fn == eval_and) { + expr = optimize_and_expr_recursive(state, expr); + } else if (expr->eval_fn == eval_or) { + expr = optimize_or_expr_recursive(state, expr); + } else if (expr->eval_fn == eval_comma) { + expr = optimize_comma_expr_recursive(state, expr); + } + + if (!expr) { + goto done; + } + + if (bfs_expr_has_children(expr)) { + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + if (rhs) { + expr->persistent_fds = rhs->persistent_fds; + expr->ephemeral_fds = rhs->ephemeral_fds; + } + if (lhs) { + expr->persistent_fds += lhs->persistent_fds; + if (lhs->ephemeral_fds > expr->ephemeral_fds) { + expr->ephemeral_fds = lhs->ephemeral_fds; + } + } + } + + if (expr->always_true) { + set_facts_impossible(&state->facts_when_false); + } + if (expr->always_false) { + set_facts_impossible(&state->facts_when_true); + } + + if (optlevel < 2 || expr == &bfs_true || expr == &bfs_false) { + goto done; + } + + if (facts_are_impossible(&state->facts_when_true)) { + if (expr->pure) { + opt_debug(state, 2, "data flow: %pe --> %pe\n", expr, &bfs_false); + opt_warning(state, expr, "This expression is always false.\n\n"); + bfs_expr_free(expr); + expr = &bfs_false; + } else { + expr->always_false = true; + expr->probability = 0.0; + } + } else if (facts_are_impossible(&state->facts_when_false)) { + if (expr->pure) { + opt_debug(state, 2, "data flow: %pe --> %pe\n", expr, &bfs_true); + opt_warning(state, expr, "This expression is always true.\n\n"); + bfs_expr_free(expr); + expr = &bfs_true; + } else { + expr->always_true = true; + expr->probability = 1.0; + } + } + +done: + return expr; +} + +/** Swap the children of a binary expression if it would reduce the cost. */ +static bool reorder_expr(const struct opt_state *state, struct bfs_expr *expr, float swapped_cost) { + if (swapped_cost < expr->cost) { + bool debug = opt_debug(state, 3, "cost: %pe <==> ", expr); + struct bfs_expr *lhs = expr->lhs; + expr->lhs = expr->rhs; + expr->rhs = lhs; + if (debug) { + cfprintf(state->ctx->cerr, "%pe (~${ylw}%g${rs} --> ~${ylw}%g${rs})\n", expr, expr->cost, swapped_cost); + } + expr->cost = swapped_cost; + return true; + } else { + return false; + } +} + +/** + * Recursively reorder sub-expressions to reduce the overall cost. + * + * @param expr + * The expression to optimize. + * @return + * Whether any subexpression was reordered. + */ +static bool reorder_expr_recursive(const struct opt_state *state, struct bfs_expr *expr) { + if (!bfs_expr_has_children(expr)) { + return false; + } + + struct bfs_expr *lhs = expr->lhs; + struct bfs_expr *rhs = expr->rhs; + + bool ret = false; + if (lhs) { + ret |= reorder_expr_recursive(state, lhs); + } + if (rhs) { + ret |= reorder_expr_recursive(state, rhs); + } + + if (expr->eval_fn == eval_and || expr->eval_fn == eval_or) { + if (lhs->pure && rhs->pure) { + float rhs_prob = expr->eval_fn == eval_and ? rhs->probability : 1.0 - rhs->probability; + float swapped_cost = rhs->cost + rhs_prob*lhs->cost; + ret |= reorder_expr(state, expr, swapped_cost); + } + } + + return ret; +} + +/** + * Optimize a top-level expression. + */ +static struct bfs_expr *optimize_expr(struct opt_state *state, struct bfs_expr *expr) { + struct opt_facts saved_impure = *state->facts_when_impure; + + expr = optimize_expr_recursive(state, expr); + if (!expr) { + return NULL; + } + + if (state->ctx->optlevel >= 3 && reorder_expr_recursive(state, expr)) { + // Re-do optimizations to account for the new ordering + *state->facts_when_impure = saved_impure; + expr = optimize_expr_recursive(state, expr); + if (!expr) { + return NULL; + } + } + + return expr; +} + +int bfs_optimize(struct bfs_ctx *ctx) { + bfs_ctx_dump(ctx, DEBUG_OPT); + + struct opt_facts facts_when_impure; + set_facts_impossible(&facts_when_impure); + + struct opt_state state = { + .ctx = ctx, + .facts_when_impure = &facts_when_impure, + }; + facts_init(&state.facts); + + ctx->exclude = optimize_expr(&state, ctx->exclude); + if (!ctx->exclude) { + return -1; + } + + // Only non-excluded files are evaluated + state.facts = state.facts_when_false; + + struct range *depth = &state.facts.ranges[DEPTH_RANGE]; + constrain_min(depth, ctx->mindepth); + constrain_max(depth, ctx->maxdepth); + + ctx->expr = optimize_expr(&state, ctx->expr); + if (!ctx->expr) { + return -1; + } + + ctx->expr = ignore_result(&state, ctx->expr); + + if (facts_are_impossible(&facts_when_impure)) { + bfs_warning(ctx, "This command won't do anything.\n\n"); + } + + const struct range *depth_when_impure = &facts_when_impure.ranges[DEPTH_RANGE]; + long long mindepth = depth_when_impure->min; + long long maxdepth = depth_when_impure->max; + + int optlevel = ctx->optlevel; + + if (optlevel >= 2 && mindepth > ctx->mindepth) { + if (mindepth > INT_MAX) { + mindepth = INT_MAX; + } + ctx->mindepth = mindepth; + opt_debug(&state, 2, "data flow: mindepth --> %d\n", ctx->mindepth); + } + + if (optlevel >= 4 && maxdepth < ctx->maxdepth) { + if (maxdepth < INT_MIN) { + maxdepth = INT_MIN; + } + ctx->maxdepth = maxdepth; + opt_debug(&state, 4, "data flow: maxdepth --> %d\n", ctx->maxdepth); + } + + return 0; +} diff --git a/src/opt.h b/src/opt.h new file mode 100644 index 0000000..5f8180d --- /dev/null +++ b/src/opt.h @@ -0,0 +1,37 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Optimization. + */ + +#ifndef BFS_OPT_H +#define BFS_OPT_H + +struct bfs_ctx; + +/** + * Apply optimizations to the command line. + * + * @param ctx + * The bfs context to optimize. + * @return + * 0 if successful, -1 on error. + */ +int bfs_optimize(struct bfs_ctx *ctx); + +#endif // BFS_OPT_H + diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..65087a0 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,3959 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The command line parser. Expressions are parsed by recursive descent, with a + * grammar described in the comments of the parse_*() functions. The parser + * also accepts flags and paths at any point in the expression, by treating + * flags like always-true options, and skipping over paths wherever they appear. + */ + +#include "parse.h" +#include "bfs.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "darray.h" +#include "diag.h" +#include "dir.h" +#include "eval.h" +#include "exec.h" +#include "expr.h" +#include "fsade.h" +#include "opt.h" +#include "printf.h" +#include "pwcache.h" +#include "stat.h" +#include "typo.h" +#include "util.h" +#include "xregex.h" +#include "xspawn.h" +#include "xtime.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <grp.h> +#include <limits.h> +#include <pwd.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +// Strings printed by -D tree for "fake" expressions +static char *fake_and_arg = "-a"; +static char *fake_false_arg = "-false"; +static char *fake_hidden_arg = "-hidden"; +static char *fake_or_arg = "-o"; +static char *fake_print_arg = "-print"; +static char *fake_true_arg = "-true"; + +// Cost estimation constants +#define FAST_COST 40.0 +#define STAT_COST 1000.0 +#define PRINT_COST 20000.0 + +struct bfs_expr bfs_true = { + .eval_fn = eval_true, + .argc = 1, + .argv = &fake_true_arg, + .pure = true, + .always_true = true, + .synthetic = true, + .cost = FAST_COST, + .probability = 1.0, +}; + +struct bfs_expr bfs_false = { + .eval_fn = eval_false, + .argc = 1, + .argv = &fake_false_arg, + .pure = true, + .always_false = true, + .synthetic = true, + .cost = FAST_COST, + .probability = 0.0, +}; + +void bfs_expr_free(struct bfs_expr *expr) { + if (!expr || expr == &bfs_true || expr == &bfs_false) { + return; + } + + if (bfs_expr_has_children(expr)) { + bfs_expr_free(expr->rhs); + bfs_expr_free(expr->lhs); + } else if (expr->eval_fn == eval_exec) { + bfs_exec_free(expr->exec); + } else if (expr->eval_fn == eval_fprintf) { + bfs_printf_free(expr->printf); + } else if (expr->eval_fn == eval_regex) { + bfs_regfree(expr->regex); + } + + free(expr); +} + +struct bfs_expr *bfs_expr_new(bfs_eval_fn *eval_fn, size_t argc, char **argv) { + struct bfs_expr *expr = malloc(sizeof(*expr)); + if (!expr) { + perror("malloc()"); + return NULL; + } + + expr->eval_fn = eval_fn; + expr->argc = argc; + expr->argv = argv; + expr->persistent_fds = 0; + expr->ephemeral_fds = 0; + expr->pure = false; + expr->always_true = false; + expr->always_false = false; + expr->synthetic = false; + expr->cost = FAST_COST; + expr->probability = 0.5; + expr->evaluations = 0; + expr->successes = 0; + expr->elapsed.tv_sec = 0; + expr->elapsed.tv_nsec = 0; + return expr; +} + +/** + * Create a new unary expression. + */ +static struct bfs_expr *new_unary_expr(bfs_eval_fn *eval_fn, struct bfs_expr *rhs, char **argv) { + struct bfs_expr *expr = bfs_expr_new(eval_fn, 1, argv); + if (!expr) { + bfs_expr_free(rhs); + return NULL; + } + + expr->lhs = NULL; + expr->rhs = rhs; + assert(bfs_expr_has_children(expr)); + + expr->persistent_fds = rhs->persistent_fds; + expr->ephemeral_fds = rhs->ephemeral_fds; + return expr; +} + +/** + * Create a new binary expression. + */ +static struct bfs_expr *new_binary_expr(bfs_eval_fn *eval_fn, struct bfs_expr *lhs, struct bfs_expr *rhs, char **argv) { + struct bfs_expr *expr = bfs_expr_new(eval_fn, 1, argv); + if (!expr) { + bfs_expr_free(rhs); + bfs_expr_free(lhs); + return NULL; + } + + expr->lhs = lhs; + expr->rhs = rhs; + assert(bfs_expr_has_children(expr)); + + if (argv == &fake_and_arg || argv == &fake_or_arg) { + expr->synthetic = true; + } + + expr->persistent_fds = lhs->persistent_fds + rhs->persistent_fds; + if (lhs->ephemeral_fds > rhs->ephemeral_fds) { + expr->ephemeral_fds = lhs->ephemeral_fds; + } else { + expr->ephemeral_fds = rhs->ephemeral_fds; + } + + return expr; +} + +bool bfs_expr_has_children(const struct bfs_expr *expr) { + return expr->eval_fn == eval_and + || expr->eval_fn == eval_or + || expr->eval_fn == eval_not + || expr->eval_fn == eval_comma; +} + +bool bfs_expr_never_returns(const struct bfs_expr *expr) { + // Expressions that never return are vacuously both always true and always false + return expr->always_true && expr->always_false; +} + +/** + * Set an expression to always return true. + */ +static void expr_set_always_true(struct bfs_expr *expr) { + expr->always_true = true; + expr->probability = 1.0; +} + +/** + * Set an expression to never return. + */ +static void expr_set_never_returns(struct bfs_expr *expr) { + expr->always_true = expr->always_false = true; +} + +/** + * Color use flags. + */ +enum use_color { + COLOR_NEVER, + COLOR_AUTO, + COLOR_ALWAYS, +}; + +/** + * Ephemeral state for parsing the command line. + */ +struct parser_state { + /** The command line being constructed. */ + struct bfs_ctx *ctx; + /** The command line arguments being parsed. */ + char **argv; + /** The name of this program. */ + const char *command; + + /** The current regex flags to use. */ + enum bfs_regex_type regex_type; + + /** Whether stdout is a terminal. */ + bool stdout_tty; + /** Whether this session is interactive (stdin and stderr are each a terminal). */ + bool interactive; + /** Whether -color or -nocolor has been passed. */ + enum use_color use_color; + /** Whether a -print action is implied. */ + bool implicit_print; + /** Whether the default root "." should be used. */ + bool implicit_root; + /** Whether the expression has started. */ + bool expr_started; + /** Whether an information option like -help or -version was passed. */ + bool just_info; + /** Whether we are currently parsing an -exclude expression. */ + bool excluding; + + /** The last non-path argument. */ + char **last_arg; + /** A "-depth"-type argument, if any. */ + char **depth_arg; + /** A "-prune" argument, if any. */ + char **prune_arg; + /** A "-mount" argument, if any. */ + char **mount_arg; + /** An "-xdev" argument, if any. */ + char **xdev_arg; + /** A "-files0-from" argument, if any. */ + char **files0_arg; + /** A "-files0-from -" argument, if any. */ + char **files0_stdin_arg; + /** An "-ok"-type expression, if any. */ + const struct bfs_expr *ok_expr; + + /** The current time. */ + struct timespec now; +}; + +/** + * Possible token types. + */ +enum token_type { + /** A flag. */ + T_FLAG, + /** A root path. */ + T_PATH, + /** An option. */ + T_OPTION, + /** A test. */ + T_TEST, + /** An action. */ + T_ACTION, + /** An operator. */ + T_OPERATOR, +}; + +/** + * Print a low-level error message during parsing. + */ +static void parse_perror(const struct parser_state *state, const char *str) { + bfs_perror(state->ctx, str); +} + +/** Initialize an empty highlighted range. */ +static void init_highlight(const struct bfs_ctx *ctx, bool *args) { + for (size_t i = 0; i < ctx->argc; ++i) { + args[i] = false; + } +} + +/** Highlight a range of command line arguments. */ +static void highlight_args(const struct bfs_ctx *ctx, char **argv, size_t argc, bool *args) { + size_t i = argv - ctx->argv; + for (size_t j = 0; j < argc; ++j) { + assert(i + j < ctx->argc); + args[i + j] = true; + } +} + +/** + * Print an error message during parsing. + */ +BFS_FORMATTER(2, 3) +static void parse_error(const struct parser_state *state, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, state->argv, 1, highlight); + bfs_argv_error(ctx, highlight); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(state->ctx, format, args); + va_end(args); +} + +/** + * Print an error about some command line arguments. + */ +BFS_FORMATTER(4, 5) +static void parse_argv_error(const struct parser_state *state, char **argv, size_t argc, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, argv, argc, highlight); + bfs_argv_error(ctx, highlight); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(ctx, format, args); + va_end(args); +} + +/** + * Print an error about conflicting command line arguments. + */ +BFS_FORMATTER(6, 7) +static void parse_conflict_error(const struct parser_state *state, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, argv1, argc1, highlight); + highlight_args(ctx, argv2, argc2, highlight); + bfs_argv_error(ctx, highlight); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(ctx, format, args); + va_end(args); +} + +/** + * Print an error about an expression. + */ +BFS_FORMATTER(3, 4) +static void parse_expr_error(const struct parser_state *state, const struct bfs_expr *expr, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + bfs_expr_error(ctx, expr); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(ctx, format, args); + va_end(args); +} + +/** + * Print a warning message during parsing. + */ +BFS_FORMATTER(2, 3) +static bool parse_warning(const struct parser_state *state, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, state->argv, 1, highlight); + if (!bfs_argv_warning(ctx, highlight)) { + return false; + } + + va_list args; + va_start(args, format); + errno = error; + bool ret = bfs_vwarning(state->ctx, format, args); + va_end(args); + return ret; +} + +/** + * Print a warning about conflicting command line arguments. + */ +BFS_FORMATTER(6, 7) +static bool parse_conflict_warning(const struct parser_state *state, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, argv1, argc1, highlight); + highlight_args(ctx, argv2, argc2, highlight); + if (!bfs_argv_warning(ctx, highlight)) { + return false; + } + + va_list args; + va_start(args, format); + errno = error; + bool ret = bfs_vwarning(ctx, format, args); + va_end(args); + return ret; +} + +/** + * Print a warning about an expression. + */ +BFS_FORMATTER(3, 4) +static bool parse_expr_warning(const struct parser_state *state, const struct bfs_expr *expr, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + + if (!bfs_expr_warning(ctx, expr)) { + return false; + } + + va_list args; + va_start(args, format); + errno = error; + bool ret = bfs_vwarning(ctx, format, args); + va_end(args); + return ret; +} + +/** + * Fill in a "-print"-type expression. + */ +static void init_print_expr(struct parser_state *state, struct bfs_expr *expr) { + expr_set_always_true(expr); + expr->cost = PRINT_COST; + expr->cfile = state->ctx->cout; +} + +/** + * Open a file for an expression. + */ +static int expr_open(struct parser_state *state, struct bfs_expr *expr, const char *path) { + struct bfs_ctx *ctx = state->ctx; + + FILE *file = NULL; + CFILE *cfile = NULL; + + file = xfopen(path, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC); + if (!file) { + goto fail; + } + + cfile = cfwrap(file, state->use_color ? ctx->colors : NULL, true); + if (!cfile) { + goto fail; + } + + CFILE *dedup = bfs_ctx_dedup(ctx, cfile, path); + if (!dedup) { + goto fail; + } + + if (dedup != cfile) { + cfclose(cfile); + } + + expr->cfile = dedup; + return 0; + +fail: + parse_expr_error(state, expr, "%m.\n"); + if (cfile) { + cfclose(cfile); + } else if (file) { + fclose(file); + } + return -1; +} + +/** + * Invoke bfs_stat() on an argument. + */ +static int stat_arg(const struct parser_state *state, char **arg, struct bfs_stat *sb) { + const struct bfs_ctx *ctx = state->ctx; + + bool follow = ctx->flags & (BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL); + enum bfs_stat_flags flags = follow ? BFS_STAT_TRYFOLLOW : BFS_STAT_NOFOLLOW; + + int ret = bfs_stat(AT_FDCWD, *arg, flags, sb); + if (ret != 0) { + parse_argv_error(state, arg, 1, "%m.\n"); + } + return ret; +} + +/** + * Parse the expression specified on the command line. + */ +static struct bfs_expr *parse_expr(struct parser_state *state); + +/** + * Advance by a single token. + */ +static char **parser_advance(struct parser_state *state, enum token_type type, size_t argc) { + if (type != T_FLAG && type != T_PATH) { + state->expr_started = true; + } + + if (type != T_PATH) { + state->last_arg = state->argv; + } + + char **argv = state->argv; + state->argv += argc; + return argv; +} + +/** + * Parse a root path. + */ +static int parse_root(struct parser_state *state, const char *path) { + char *copy = strdup(path); + if (!copy) { + parse_perror(state, "strdup()"); + return -1; + } + + struct bfs_ctx *ctx = state->ctx; + if (DARRAY_PUSH(&ctx->paths, ©) != 0) { + parse_perror(state, "DARRAY_PUSH()"); + free(copy); + return -1; + } + + state->implicit_root = false; + return 0; +} + +/** + * While parsing an expression, skip any paths and add them to ctx->paths. + */ +static int skip_paths(struct parser_state *state) { + while (true) { + const char *arg = state->argv[0]; + if (!arg) { + return 0; + } + + if (arg[0] == '-') { + if (strcmp(arg, "--") == 0) { + // find uses -- to separate flags from the rest + // of the command line. We allow mixing flags + // and paths/predicates, so we just ignore --. + parser_advance(state, T_FLAG, 1); + continue; + } + if (strcmp(arg, "-") != 0) { + // - by itself is a file name. Anything else + // starting with - is a flag/predicate. + return 0; + } + } + + // By POSIX, these are always options + if (strcmp(arg, "(") == 0 || strcmp(arg, "!") == 0) { + return 0; + } + + if (state->expr_started) { + // By POSIX, these can be paths. We only treat them as + // such at the beginning of the command line. + if (strcmp(arg, ")") == 0 || strcmp(arg, ",") == 0) { + return 0; + } + } + + if (parse_root(state, arg) != 0) { + return -1; + } + + parser_advance(state, T_PATH, 1); + } +} + +/** Integer parsing flags. */ +enum int_flags { + IF_BASE_MASK = 0x03F, + IF_INT = 0x040, + IF_LONG = 0x080, + IF_LONG_LONG = 0x0C0, + IF_SIZE_MASK = 0x0C0, + IF_UNSIGNED = 0x100, + IF_PARTIAL_OK = 0x200, + IF_QUIET = 0x400, +}; + +/** + * Parse an integer. + */ +static const char *parse_int(const struct parser_state *state, char **arg, const char *str, void *result, enum int_flags flags) { + char *endptr; + + int base = flags & IF_BASE_MASK; + if (base == 0) { + base = 10; + } + + errno = 0; + long long value = strtoll(str, &endptr, base); + if (errno != 0) { + if (errno == ERANGE) { + goto range; + } else { + goto bad; + } + } + + if (endptr == str) { + goto bad; + } + + if (!(flags & IF_PARTIAL_OK) && *endptr != '\0') { + goto bad; + } + + if ((flags & IF_UNSIGNED) && value < 0) { + goto negative; + } + + switch (flags & IF_SIZE_MASK) { + case IF_INT: + if (value < INT_MIN || value > INT_MAX) { + goto range; + } + *(int *)result = value; + break; + + case IF_LONG: + if (value < LONG_MIN || value > LONG_MAX) { + goto range; + } + *(long *)result = value; + break; + + case IF_LONG_LONG: + *(long long *)result = value; + break; + + default: + assert(!"Invalid int size"); + goto bad; + } + + return endptr; + +bad: + if (!(flags & IF_QUIET)) { + parse_argv_error(state, arg, 1, "${bld}%s${rs} is not a valid integer.\n", str); + } + return NULL; + +negative: + if (!(flags & IF_QUIET)) { + parse_argv_error(state, arg, 1, "Negative integer ${bld}%s${rs} is not allowed here.\n", str); + } + return NULL; + +range: + if (!(flags & IF_QUIET)) { + parse_argv_error(state, arg, 1, "${bld}%s${rs} is too large an integer.\n", str); + } + return NULL; +} + +/** + * Parse an integer and a comparison flag. + */ +static const char *parse_icmp(const struct parser_state *state, struct bfs_expr *expr, enum int_flags flags) { + char **arg = &expr->argv[1]; + const char *str = *arg; + switch (str[0]) { + case '-': + expr->int_cmp = BFS_INT_LESS; + ++str; + break; + case '+': + expr->int_cmp = BFS_INT_GREATER; + ++str; + break; + default: + expr->int_cmp = BFS_INT_EQUAL; + break; + } + + return parse_int(state, arg, str, &expr->num, flags | IF_LONG_LONG | IF_UNSIGNED); +} + +/** + * Check if a string could be an integer comparison. + */ +static bool looks_like_icmp(const char *str) { + int i; + + // One +/- for the comparison flag, one for the sign + for (i = 0; i < 2; ++i) { + if (str[i] != '-' && str[i] != '+') { + break; + } + } + + return str[i] >= '0' && str[i] <= '9'; +} + +/** + * Parse a single flag. + */ +static struct bfs_expr *parse_flag(struct parser_state *state, size_t argc) { + parser_advance(state, T_FLAG, argc); + return &bfs_true; +} + +/** + * Parse a flag that doesn't take a value. + */ +static struct bfs_expr *parse_nullary_flag(struct parser_state *state) { + return parse_flag(state, 1); +} + +/** + * Parse a single option. + */ +static struct bfs_expr *parse_option(struct parser_state *state, size_t argc) { + parser_advance(state, T_OPTION, argc); + return &bfs_true; +} + +/** + * Parse an option that doesn't take a value. + */ +static struct bfs_expr *parse_nullary_option(struct parser_state *state) { + return parse_option(state, 1); +} + +/** + * Parse an option that takes a value. + */ +static struct bfs_expr *parse_unary_option(struct parser_state *state) { + return parse_option(state, 2); +} + +/** + * Parse a single test. + */ +static struct bfs_expr *parse_test(struct parser_state *state, bfs_eval_fn *eval_fn, size_t argc) { + char **argv = parser_advance(state, T_TEST, argc); + struct bfs_expr *expr = bfs_expr_new(eval_fn, argc, argv); + if (expr) { + expr->pure = true; + } + return expr; +} + +/** + * Parse a test that doesn't take a value. + */ +static struct bfs_expr *parse_nullary_test(struct parser_state *state, bfs_eval_fn *eval_fn) { + return parse_test(state, eval_fn, 1); +} + +/** + * Parse a test that takes a value. + */ +static struct bfs_expr *parse_unary_test(struct parser_state *state, bfs_eval_fn *eval_fn) { + const char *arg = state->argv[0]; + const char *value = state->argv[1]; + if (!value) { + parse_error(state, "${blu}%s${rs} needs a value.\n", arg); + return NULL; + } + + return parse_test(state, eval_fn, 2); +} + +/** + * Parse a single action. + */ +static struct bfs_expr *parse_action(struct parser_state *state, bfs_eval_fn *eval_fn, size_t argc) { + char **argv = parser_advance(state, T_ACTION, argc); + + if (state->excluding) { + parse_argv_error(state, argv, argc, "This action is not supported within ${red}-exclude${rs}.\n"); + return NULL; + } + + if (eval_fn != eval_prune && eval_fn != eval_quit) { + state->implicit_print = false; + } + + return bfs_expr_new(eval_fn, argc, argv); +} + +/** + * Parse an action that takes no arguments. + */ +static struct bfs_expr *parse_nullary_action(struct parser_state *state, bfs_eval_fn *eval_fn) { + return parse_action(state, eval_fn, 1); +} + +/** + * Parse an action that takes one argument. + */ +static struct bfs_expr *parse_unary_action(struct parser_state *state, bfs_eval_fn *eval_fn) { + const char *arg = state->argv[0]; + const char *value = state->argv[1]; + if (!value) { + parse_error(state, "${blu}%s${rs} needs a value.\n", arg); + return NULL; + } + + return parse_action(state, eval_fn, 2); +} + +/** + * Add an expression to the exclusions. + */ +static int parse_exclude(struct parser_state *state, struct bfs_expr *expr) { + struct bfs_ctx *ctx = state->ctx; + ctx->exclude = new_binary_expr(eval_or, ctx->exclude, expr, &fake_or_arg); + if (ctx->exclude) { + return 0; + } else { + return -1; + } +} + +/** + * Parse a test expression with integer data and a comparison flag. + */ +static struct bfs_expr *parse_test_icmp(struct parser_state *state, bfs_eval_fn *eval_fn) { + struct bfs_expr *expr = parse_unary_test(state, eval_fn); + if (!expr) { + return NULL; + } + + if (!parse_icmp(state, expr, 0)) { + bfs_expr_free(expr); + return NULL; + } + + return expr; +} + +/** + * Print usage information for -D. + */ +static void debug_help(CFILE *cfile) { + cfprintf(cfile, "Supported debug flags:\n\n"); + + cfprintf(cfile, " ${bld}help${rs}: This message.\n"); + cfprintf(cfile, " ${bld}cost${rs}: Show cost estimates.\n"); + cfprintf(cfile, " ${bld}exec${rs}: Print executed command details.\n"); + cfprintf(cfile, " ${bld}opt${rs}: Print optimization details.\n"); + cfprintf(cfile, " ${bld}rates${rs}: Print predicate success rates.\n"); + cfprintf(cfile, " ${bld}search${rs}: Trace the filesystem traversal.\n"); + cfprintf(cfile, " ${bld}stat${rs}: Trace all stat() calls.\n"); + cfprintf(cfile, " ${bld}tree${rs}: Print the parse tree.\n"); + cfprintf(cfile, " ${bld}all${rs}: All debug flags at once.\n"); +} + +/** Check if a substring matches a debug flag. */ +static bool parse_debug_flag(const char *flag, size_t len, const char *expected) { + if (len == strlen(expected)) { + return strncmp(flag, expected, len) == 0; + } else { + return false; + } +} + +/** + * Parse -D FLAG. + */ +static struct bfs_expr *parse_debug(struct parser_state *state, int arg1, int arg2) { + struct bfs_ctx *ctx = state->ctx; + + const char *arg = state->argv[0]; + const char *flags = state->argv[1]; + if (!flags) { + parse_error(state, "${cyn}%s${rs} needs a flag.\n\n", arg); + debug_help(ctx->cerr); + return NULL; + } + + parser_advance(state, T_FLAG, 1); + + bool unrecognized = false; + + for (const char *flag = flags, *next; flag; flag = next) { + size_t len = strcspn(flag, ","); + if (flag[len]) { + next = flag + len + 1; + } else { + next = NULL; + } + + if (parse_debug_flag(flag, len, "help")) { + debug_help(ctx->cout); + state->just_info = true; + return NULL; + } else if (parse_debug_flag(flag, len, "all")) { + ctx->debug = DEBUG_ALL; + continue; + } + + enum debug_flags i; + for (i = 1; DEBUG_ALL & i; i <<= 1) { + const char *name = debug_flag_name(i); + if (parse_debug_flag(flag, len, name)) { + break; + } + } + + if (DEBUG_ALL & i) { + ctx->debug |= i; + } else { + if (parse_warning(state, "Unrecognized debug flag ${bld}")) { + fwrite(flag, 1, len, stderr); + cfprintf(ctx->cerr, "${rs}.\n\n"); + unrecognized = true; + } + } + } + + if (unrecognized) { + debug_help(ctx->cerr); + cfprintf(ctx->cerr, "\n"); + } + + parser_advance(state, T_FLAG, 1); + return &bfs_true; +} + +/** + * Parse -On. + */ +static struct bfs_expr *parse_optlevel(struct parser_state *state, int arg1, int arg2) { + int *optlevel = &state->ctx->optlevel; + + if (strcmp(state->argv[0], "-Ofast") == 0) { + *optlevel = 4; + } else if (!parse_int(state, state->argv, state->argv[0] + 2, optlevel, IF_INT | IF_UNSIGNED)) { + return NULL; + } + + if (*optlevel > 4) { + parse_warning(state, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", state->argv[0] + 2); + } + + return parse_nullary_flag(state); +} + +/** + * Parse -[PHL], -follow. + */ +static struct bfs_expr *parse_follow(struct parser_state *state, int flags, int option) { + struct bfs_ctx *ctx = state->ctx; + ctx->flags &= ~(BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL); + ctx->flags |= flags; + if (option) { + return parse_nullary_option(state); + } else { + return parse_nullary_flag(state); + } +} + +/** + * Parse -X. + */ +static struct bfs_expr *parse_xargs_safe(struct parser_state *state, int arg1, int arg2) { + state->ctx->xargs_safe = true; + return parse_nullary_flag(state); +} + +/** + * Parse -executable, -readable, -writable + */ +static struct bfs_expr *parse_access(struct parser_state *state, int flag, int arg2) { + struct bfs_expr *expr = parse_nullary_test(state, eval_access); + if (!expr) { + return NULL; + } + + expr->num = flag; + expr->cost = STAT_COST; + + switch (flag) { + case R_OK: + expr->probability = 0.99; + break; + case W_OK: + expr->probability = 0.8; + break; + case X_OK: + expr->probability = 0.2; + break; + } + + return expr; +} + +/** + * Parse -acl. + */ +static struct bfs_expr *parse_acl(struct parser_state *state, int flag, int arg2) { +#if BFS_CAN_CHECK_ACL + struct bfs_expr *expr = parse_nullary_test(state, eval_acl); + if (expr) { + expr->cost = STAT_COST; + expr->probability = 0.00002; + } + return expr; +#else + parse_error(state, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -[aBcm]?newer. + */ +static struct bfs_expr *parse_newer(struct parser_state *state, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_newer); + if (!expr) { + return NULL; + } + + struct bfs_stat sb; + if (stat_arg(state, &expr->argv[1], &sb) != 0) { + goto fail; + } + + expr->cost = STAT_COST; + expr->reftime = sb.mtime; + expr->stat_field = field; + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -[aBcm]min. + */ +static struct bfs_expr *parse_min(struct parser_state *state, int field, int arg2) { + struct bfs_expr *expr = parse_test_icmp(state, eval_time); + if (!expr) { + return NULL; + } + + expr->cost = STAT_COST; + expr->reftime = state->now; + expr->stat_field = field; + expr->time_unit = BFS_MINUTES; + return expr; +} + +/** + * Parse -[aBcm]time. + */ +static struct bfs_expr *parse_time(struct parser_state *state, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_time); + if (!expr) { + return NULL; + } + + expr->cost = STAT_COST; + expr->reftime = state->now; + expr->stat_field = field; + + const char *tail = parse_icmp(state, expr, IF_PARTIAL_OK); + if (!tail) { + goto fail; + } + + if (!*tail) { + expr->time_unit = BFS_DAYS; + return expr; + } + + unsigned long long time = expr->num; + expr->num = 0; + + while (true) { + switch (*tail) { + case 'w': + time *= 7; + BFS_FALLTHROUGH; + case 'd': + time *= 24; + BFS_FALLTHROUGH; + case 'h': + time *= 60; + BFS_FALLTHROUGH; + case 'm': + time *= 60; + BFS_FALLTHROUGH; + case 's': + break; + default: + parse_expr_error(state, expr, "Unknown time unit ${bld}%c${rs}.\n", *tail); + goto fail; + } + + expr->num += time; + + if (!*++tail) { + break; + } + + tail = parse_int(state, &expr->argv[1], tail, &time, IF_PARTIAL_OK | IF_LONG_LONG | IF_UNSIGNED); + if (!tail) { + goto fail; + } + if (!*tail) { + parse_expr_error(state, expr, "Missing time unit.\n"); + goto fail; + } + } + + expr->time_unit = BFS_SECONDS; + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -capable. + */ +static struct bfs_expr *parse_capable(struct parser_state *state, int flag, int arg2) { +#if BFS_CAN_CHECK_CAPABILITIES + struct bfs_expr *expr = parse_nullary_test(state, eval_capable); + if (expr) { + expr->cost = STAT_COST; + expr->probability = 0.000002; + } + return expr; +#else + parse_error(state, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -(no)?color. + */ +static struct bfs_expr *parse_color(struct parser_state *state, int color, int arg2) { + struct bfs_ctx *ctx = state->ctx; + struct colors *colors = ctx->colors; + + if (color) { + if (!colors) { + parse_error(state, "%s.\n", strerror(ctx->colors_error)); + return NULL; + } + + state->use_color = COLOR_ALWAYS; + ctx->cout->colors = colors; + ctx->cerr->colors = colors; + } else { + state->use_color = COLOR_NEVER; + ctx->cout->colors = NULL; + ctx->cerr->colors = NULL; + } + + return parse_nullary_option(state); +} + +/** + * Parse -{false,true}. + */ +static struct bfs_expr *parse_const(struct parser_state *state, int value, int arg2) { + parser_advance(state, T_TEST, 1); + return value ? &bfs_true : &bfs_false; +} + +/** + * Parse -daystart. + */ +static struct bfs_expr *parse_daystart(struct parser_state *state, int arg1, int arg2) { + struct tm tm; + if (xlocaltime(&state->now.tv_sec, &tm) != 0) { + parse_perror(state, "xlocaltime()"); + return NULL; + } + + if (tm.tm_hour || tm.tm_min || tm.tm_sec || state->now.tv_nsec) { + ++tm.tm_mday; + } + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + + time_t time; + if (xmktime(&tm, &time) != 0) { + parse_perror(state, "xmktime()"); + return NULL; + } + + state->now.tv_sec = time; + state->now.tv_nsec = 0; + + return parse_nullary_option(state); +} + +/** + * Parse -delete. + */ +static struct bfs_expr *parse_delete(struct parser_state *state, int arg1, int arg2) { + state->ctx->flags |= BFTW_POST_ORDER; + state->depth_arg = state->argv; + return parse_nullary_action(state, eval_delete); +} + +/** + * Parse -d. + */ +static struct bfs_expr *parse_depth(struct parser_state *state, int arg1, int arg2) { + state->ctx->flags |= BFTW_POST_ORDER; + state->depth_arg = state->argv; + return parse_nullary_flag(state); +} + +/** + * Parse -depth [N]. + */ +static struct bfs_expr *parse_depth_n(struct parser_state *state, int arg1, int arg2) { + const char *arg = state->argv[1]; + if (arg && looks_like_icmp(arg)) { + return parse_test_icmp(state, eval_depth); + } else { + return parse_depth(state, arg1, arg2); + } +} + +/** + * Parse -{min,max}depth N. + */ +static struct bfs_expr *parse_depth_limit(struct parser_state *state, int is_min, int arg2) { + struct bfs_ctx *ctx = state->ctx; + const char *arg = state->argv[0]; + const char *value = state->argv[1]; + if (!value) { + parse_error(state, "${blu}%s${rs} needs a value.\n", arg); + return NULL; + } + + int *depth = is_min ? &ctx->mindepth : &ctx->maxdepth; + if (!parse_int(state, &state->argv[1], value, depth, IF_INT | IF_UNSIGNED)) { + return NULL; + } + + return parse_unary_option(state); +} + +/** + * Parse -empty. + */ +static struct bfs_expr *parse_empty(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(state, eval_empty); + if (!expr) { + return NULL; + } + + expr->cost = 2000.0; + expr->probability = 0.01; + + if (state->ctx->optlevel < 4) { + // Since -empty attempts to open and read directories, it may + // have side effects such as reporting permission errors, and + // thus shouldn't be re-ordered without aggressive optimizations + expr->pure = false; + } + + expr->ephemeral_fds = 1; + + return expr; +} + +/** + * Parse -exec(dir)?/-ok(dir)?. + */ +static struct bfs_expr *parse_exec(struct parser_state *state, int flags, int arg2) { + struct bfs_exec *execbuf = bfs_exec_parse(state->ctx, state->argv, flags); + if (!execbuf) { + return NULL; + } + + struct bfs_expr *expr = parse_action(state, eval_exec, execbuf->tmpl_argc + 2); + if (!expr) { + bfs_exec_free(execbuf); + return NULL; + } + + expr->exec = execbuf; + + if (execbuf->flags & BFS_EXEC_MULTI) { + expr_set_always_true(expr); + } else { + expr->cost = 1000000.0; + } + + expr->ephemeral_fds = 2; + if (execbuf->flags & BFS_EXEC_CHDIR) { + if (execbuf->flags & BFS_EXEC_MULTI) { + expr->persistent_fds = 1; + } else { + ++expr->ephemeral_fds; + } + } + + if (execbuf->flags & BFS_EXEC_CONFIRM) { + state->ok_expr = expr; + } + + return expr; +} + +/** + * Parse -exit [STATUS]. + */ +static struct bfs_expr *parse_exit(struct parser_state *state, int arg1, int arg2) { + size_t argc = 1; + const char *value = state->argv[1]; + + int status = EXIT_SUCCESS; + if (value && parse_int(state, NULL, value, &status, IF_INT | IF_UNSIGNED | IF_QUIET)) { + argc = 2; + } + + struct bfs_expr *expr = parse_action(state, eval_exit, argc); + if (expr) { + expr_set_never_returns(expr); + expr->num = status; + } + return expr; +} + +/** + * Parse -f PATH. + */ +static struct bfs_expr *parse_f(struct parser_state *state, int arg1, int arg2) { + const char *path = state->argv[1]; + if (!path) { + parse_error(state, "${cyn}-f${rs} requires a path.\n"); + return NULL; + } + + if (parse_root(state, path) != 0) { + return NULL; + } + + parser_advance(state, T_FLAG, 1); + parser_advance(state, T_PATH, 1); + return &bfs_true; +} + +/** + * Parse -files0-from PATH. + */ +static struct bfs_expr *parse_files0_from(struct parser_state *state, int arg1, int arg2) { + const char *arg = state->argv[0]; + const char *from = state->argv[1]; + if (!from) { + parse_error(state, "${blu}%s${rs} requires a path.\n", arg); + return NULL; + } + + state->files0_arg = parser_advance(state, T_OPTION, 1); + + FILE *file; + if (strcmp(from, "-") == 0) { + file = stdin; + } else { + file = xfopen(from, O_RDONLY | O_CLOEXEC); + } + if (!file) { + parse_error(state, "%m.\n"); + return NULL; + } + + struct bfs_expr *expr = &bfs_true; + + while (true) { + char *path = xgetdelim(file, '\0'); + if (!path) { + if (errno) { + parse_error(state, "%m.\n"); + expr = NULL; + } + break; + } + + int ret = parse_root(state, path); + free(path); + if (ret != 0) { + expr = NULL; + break; + } + } + + if (file == stdin) { + state->files0_stdin_arg = state->files0_arg; + } else { + fclose(file); + } + + state->implicit_root = false; + parser_advance(state, T_OPTION, 1); + return expr; +} + +/** + * Parse -flags FLAGS. + */ +static struct bfs_expr *parse_flags(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_flags); + if (!expr) { + return NULL; + } + + const char *flags = expr->argv[1]; + switch (flags[0]) { + case '-': + expr->flags_cmp = BFS_MODE_ALL; + ++flags; + break; + case '+': + expr->flags_cmp = BFS_MODE_ANY; + ++flags; + break; + default: + expr->flags_cmp = BFS_MODE_EQUAL; + break; + } + + if (xstrtofflags(&flags, &expr->set_flags, &expr->clear_flags) != 0) { + if (errno == ENOTSUP) { + parse_expr_error(state, expr, "Missing platform support.\n"); + } else { + parse_expr_error(state, expr, "Invalid flags.\n"); + } + bfs_expr_free(expr); + return NULL; + } + + return expr; +} + +/** + * Parse -fls FILE. + */ +static struct bfs_expr *parse_fls(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(state, eval_fls); + if (!expr) { + goto fail; + } + + if (expr_open(state, expr, expr->argv[1]) != 0) { + goto fail; + } + + expr_set_always_true(expr); + expr->cost = PRINT_COST; + expr->reftime = state->now; + + // We'll need these for user/group names, so initialize them now to + // avoid EMFILE later + bfs_ctx_users(state->ctx); + bfs_ctx_groups(state->ctx); + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -fprint FILE. + */ +static struct bfs_expr *parse_fprint(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(state, eval_fprint); + if (expr) { + expr_set_always_true(expr); + expr->cost = PRINT_COST; + if (expr_open(state, expr, expr->argv[1]) != 0) { + goto fail; + } + } + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -fprint0 FILE. + */ +static struct bfs_expr *parse_fprint0(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(state, eval_fprint0); + if (expr) { + expr_set_always_true(expr); + expr->cost = PRINT_COST; + if (expr_open(state, expr, expr->argv[1]) != 0) { + goto fail; + } + } + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -fprintf FILE FORMAT. + */ +static struct bfs_expr *parse_fprintf(struct parser_state *state, int arg1, int arg2) { + const char *arg = state->argv[0]; + + const char *file = state->argv[1]; + if (!file) { + parse_error(state, "${blu}%s${rs} needs a file.\n", arg); + return NULL; + } + + const char *format = state->argv[2]; + if (!format) { + parse_error(state, "${blu}%s${rs} needs a format string.\n", arg); + return NULL; + } + + struct bfs_expr *expr = parse_action(state, eval_fprintf, 3); + if (!expr) { + return NULL; + } + + expr_set_always_true(expr); + + expr->cost = PRINT_COST; + + if (expr_open(state, expr, file) != 0) { + goto fail; + } + + if (bfs_printf_parse(state->ctx, expr, format) != 0) { + goto fail; + } + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -fstype TYPE. + */ +static struct bfs_expr *parse_fstype(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_fstype); + if (!expr) { + return NULL; + } + + if (!bfs_ctx_mtab(state->ctx)) { + parse_expr_error(state, expr, "Couldn't parse the mount table: %m.\n"); + bfs_expr_free(expr); + return NULL; + } + + expr->cost = STAT_COST; + return expr; +} + +/** + * Parse -gid/-group. + */ +static struct bfs_expr *parse_group(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_gid); + if (!expr) { + return NULL; + } + + const struct bfs_groups *groups = bfs_ctx_groups(state->ctx); + if (!groups) { + parse_expr_error(state, expr, "Couldn't parse the group table: %m.\n"); + goto fail; + } + + const struct group *grp = bfs_getgrnam(groups, expr->argv[1]); + if (grp) { + expr->num = grp->gr_gid; + expr->int_cmp = BFS_INT_EQUAL; + } else if (looks_like_icmp(expr->argv[1])) { + if (!parse_icmp(state, expr, 0)) { + goto fail; + } + } else { + parse_expr_error(state, expr, "No such group.\n"); + goto fail; + } + + expr->cost = STAT_COST; + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -unique. + */ +static struct bfs_expr *parse_unique(struct parser_state *state, int arg1, int arg2) { + state->ctx->unique = true; + return parse_nullary_option(state); +} + +/** + * Parse -used N. + */ +static struct bfs_expr *parse_used(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_test_icmp(state, eval_used); + if (expr) { + expr->cost = STAT_COST; + } + return expr; +} + +/** + * Parse -uid/-user. + */ +static struct bfs_expr *parse_user(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_uid); + if (!expr) { + return NULL; + } + + const struct bfs_users *users = bfs_ctx_users(state->ctx); + if (!users) { + parse_expr_error(state, expr, "Couldn't parse the user table: %m.\n"); + goto fail; + } + + const struct passwd *pwd = bfs_getpwnam(users, expr->argv[1]); + if (pwd) { + expr->num = pwd->pw_uid; + expr->int_cmp = BFS_INT_EQUAL; + } else if (looks_like_icmp(expr->argv[1])) { + if (!parse_icmp(state, expr, 0)) { + goto fail; + } + } else { + parse_expr_error(state, expr, "No such user.\n"); + goto fail; + } + + expr->cost = STAT_COST; + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -hidden. + */ +static struct bfs_expr *parse_hidden(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(state, eval_hidden); + if (expr) { + expr->probability = 0.01; + } + return expr; +} + +/** + * Parse -(no)?ignore_readdir_race. + */ +static struct bfs_expr *parse_ignore_races(struct parser_state *state, int ignore, int arg2) { + state->ctx->ignore_races = ignore; + return parse_nullary_option(state); +} + +/** + * Parse -inum N. + */ +static struct bfs_expr *parse_inum(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_test_icmp(state, eval_inum); + if (expr) { + expr->cost = STAT_COST; + expr->probability = expr->int_cmp == BFS_INT_EQUAL ? 0.01 : 0.50; + } + return expr; +} + +/** + * Parse -links N. + */ +static struct bfs_expr *parse_links(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_test_icmp(state, eval_links); + if (expr) { + expr->cost = STAT_COST; + expr->probability = bfs_expr_cmp(expr, 1) ? 0.99 : 0.01; + } + return expr; +} + +/** + * Parse -ls. + */ +static struct bfs_expr *parse_ls(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(state, eval_fls); + if (!expr) { + return NULL; + } + + init_print_expr(state, expr); + expr->reftime = state->now; + + // We'll need these for user/group names, so initialize them now to + // avoid EMFILE later + bfs_ctx_users(state->ctx); + bfs_ctx_groups(state->ctx); + + return expr; +} + +/** + * Parse -mount. + */ +static struct bfs_expr *parse_mount(struct parser_state *state, int arg1, int arg2) { + parse_warning(state, "In the future, ${blu}%s${rs} will skip mount points entirely, unlike\n", state->argv[0]); + bfs_warning(state->ctx, "${blu}-xdev${rs}, due to http://austingroupbugs.net/view.php?id=1133.\n\n"); + + state->ctx->flags |= BFTW_PRUNE_MOUNTS; + state->mount_arg = state->argv; + return parse_nullary_option(state); +} + +/** + * Common code for fnmatch() tests. + */ +static struct bfs_expr *parse_fnmatch(const struct parser_state *state, struct bfs_expr *expr, bool casefold) { + if (!expr) { + return NULL; + } + + if (casefold) { +#ifdef FNM_CASEFOLD + expr->num = FNM_CASEFOLD; +#else + parse_expr_error(state, expr, "Missing platform support.\n"); + bfs_expr_free(expr); + return NULL; +#endif + } else { + expr->num = 0; + } + + // POSIX says, about fnmatch(): + // + // If pattern ends with an unescaped <backslash>, fnmatch() shall + // return a non-zero value (indicating either no match or an error). + // + // But not all implementations obey this, so check for it ourselves. + const char *pattern = expr->argv[1]; + size_t i, len = strlen(pattern); + for (i = 0; i < len; ++i) { + if (pattern[len - i - 1] != '\\') { + break; + } + } + if (i % 2 != 0) { + parse_expr_warning(state, expr, "Unescaped trailing backslash.\n\n"); + bfs_expr_free(expr); + return &bfs_false; + } + + expr->cost = 400.0; + + if (strchr(pattern, '*')) { + expr->probability = 0.5; + } else { + expr->probability = 0.1; + } + + return expr; +} + +/** + * Parse -i?name. + */ +static struct bfs_expr *parse_name(struct parser_state *state, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_name); + return parse_fnmatch(state, expr, casefold); +} + +/** + * Parse -i?path, -i?wholename. + */ +static struct bfs_expr *parse_path(struct parser_state *state, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_path); + return parse_fnmatch(state, expr, casefold); +} + +/** + * Parse -i?lname. + */ +static struct bfs_expr *parse_lname(struct parser_state *state, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_lname); + return parse_fnmatch(state, expr, casefold); +} + +/** Get the bfs_stat_field for X/Y in -newerXY. */ +static enum bfs_stat_field parse_newerxy_field(char c) { + switch (c) { + case 'a': + return BFS_STAT_ATIME; + case 'B': + return BFS_STAT_BTIME; + case 'c': + return BFS_STAT_CTIME; + case 'm': + return BFS_STAT_MTIME; + default: + return 0; + } +} + +/** Parse an explicit reference timestamp for -newerXt and -*since. */ +static int parse_reftime(const struct parser_state *state, struct bfs_expr *expr) { + if (parse_timestamp(expr->argv[1], &expr->reftime) == 0) { + return 0; + } else if (errno != EINVAL) { + parse_expr_error(state, expr, "%m.\n"); + return -1; + } + + parse_expr_error(state, expr, "Invalid timestamp.\n\n"); + fprintf(stderr, "Supported timestamp formats are ISO 8601-like, e.g.\n\n"); + + struct tm tm; + if (xlocaltime(&state->now.tv_sec, &tm) != 0) { + parse_perror(state, "xlocaltime()"); + return -1; + } + + int year = tm.tm_year + 1900; + int month = tm.tm_mon + 1; + fprintf(stderr, " - %04d-%02d-%02d\n", year, month, tm.tm_mday); + fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); + +#if __FreeBSD__ + int gmtoff = tm.tm_gmtoff; +#else + int gmtoff = -timezone; +#endif + int tz_hour = gmtoff/3600; + int tz_min = (labs(gmtoff)/60)%60; + fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d%+03d:%02d\n", + year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, tz_hour, tz_min); + + if (xgmtime(&state->now.tv_sec, &tm) != 0) { + parse_perror(state, "xgmtime()"); + return -1; + } + + year = tm.tm_year + 1900; + month = tm.tm_mon + 1; + fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02dZ\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); + + return -1; +} + +/** + * Parse -newerXY. + */ +static struct bfs_expr *parse_newerxy(struct parser_state *state, int arg1, int arg2) { + const char *arg = state->argv[0]; + if (strlen(arg) != 8) { + parse_error(state, "Expected ${blu}-newer${bld}XY${rs}; found ${blu}-newer${bld}%s${rs}.\n", arg + 6); + return NULL; + } + + struct bfs_expr *expr = parse_unary_test(state, eval_newer); + if (!expr) { + goto fail; + } + + expr->stat_field = parse_newerxy_field(arg[6]); + if (!expr->stat_field) { + parse_expr_error(state, expr, + "For ${blu}-newer${bld}XY${rs}, ${bld}X${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, or ${bld}B${rs}, not ${err}%c${rs}.\n", + arg[6]); + goto fail; + } + + if (arg[7] == 't') { + if (parse_reftime(state, expr) != 0) { + goto fail; + } + } else { + enum bfs_stat_field field = parse_newerxy_field(arg[7]); + if (!field) { + parse_expr_error(state, expr, + "For ${blu}-newer${bld}XY${rs}, ${bld}Y${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, ${bld}B${rs}, or ${bld}t${rs}, not ${err}%c${rs}.\n", + arg[7]); + goto fail; + } + + struct bfs_stat sb; + if (stat_arg(state, &expr->argv[1], &sb) != 0) { + goto fail; + } + + + const struct timespec *reftime = bfs_stat_time(&sb, field); + if (!reftime) { + parse_expr_error(state, expr, "Couldn't get file %s.\n", bfs_stat_field_name(field)); + goto fail; + } + + expr->reftime = *reftime; + } + + expr->cost = STAT_COST; + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -nogroup. + */ +static struct bfs_expr *parse_nogroup(struct parser_state *state, int arg1, int arg2) { + if (!bfs_ctx_groups(state->ctx)) { + parse_error(state, "Couldn't parse the group table: %m.\n"); + return NULL; + } + + struct bfs_expr *expr = parse_nullary_test(state, eval_nogroup); + if (expr) { + expr->cost = STAT_COST; + expr->probability = 0.01; + } + return expr; +} + +/** + * Parse -nohidden. + */ +static struct bfs_expr *parse_nohidden(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *hidden = bfs_expr_new(eval_hidden, 1, &fake_hidden_arg); + if (!hidden) { + return NULL; + } + + hidden->probability = 0.01; + hidden->pure = true; + hidden->synthetic = true; + + if (parse_exclude(state, hidden) != 0) { + return NULL; + } + + parser_advance(state, T_OPTION, 1); + return &bfs_true; +} + +/** + * Parse -noleaf. + */ +static struct bfs_expr *parse_noleaf(struct parser_state *state, int arg1, int arg2) { + parse_warning(state, "${ex}bfs${rs} does not apply the optimization that ${blu}%s${rs} inhibits.\n\n", state->argv[0]); + return parse_nullary_option(state); +} + +/** + * Parse -nouser. + */ +static struct bfs_expr *parse_nouser(struct parser_state *state, int arg1, int arg2) { + if (!bfs_ctx_users(state->ctx)) { + parse_error(state, "Couldn't parse the user table: %m.\n"); + return NULL; + } + + struct bfs_expr *expr = parse_nullary_test(state, eval_nouser); + if (expr) { + expr->cost = STAT_COST; + expr->probability = 0.01; + } + return expr; +} + +/** + * Parse a permission mode like chmod(1). + */ +static int parse_mode(const struct parser_state *state, const char *mode, struct bfs_expr *expr) { + if (mode[0] >= '0' && mode[0] <= '9') { + unsigned int parsed; + if (!parse_int(state, NULL, mode, &parsed, 8 | IF_INT | IF_UNSIGNED | IF_QUIET)) { + goto fail; + } + if (parsed > 07777) { + goto fail; + } + + expr->file_mode = parsed; + expr->dir_mode = parsed; + return 0; + } + + expr->file_mode = 0; + expr->dir_mode = 0; + + // Parse the same grammar as chmod(1), which looks like this: + // + // MODE : CLAUSE ["," CLAUSE]* + // + // CLAUSE : WHO* ACTION+ + // + // WHO : "u" | "g" | "o" | "a" + // + // ACTION : OP PERM* + // | OP PERMCOPY + // + // OP : "+" | "-" | "=" + // + // PERM : "r" | "w" | "x" | "X" | "s" | "t" + // + // PERMCOPY : "u" | "g" | "o" + + // State machine state + enum { + MODE_CLAUSE, + MODE_WHO, + MODE_ACTION, + MODE_ACTION_APPLY, + MODE_OP, + MODE_PERM, + } mstate = MODE_CLAUSE; + + enum { + MODE_PLUS, + MODE_MINUS, + MODE_EQUALS, + } op; + + mode_t who; + mode_t file_change; + mode_t dir_change; + + const char *i = mode; + while (true) { + switch (mstate) { + case MODE_CLAUSE: + who = 0; + mstate = MODE_WHO; + BFS_FALLTHROUGH; + + case MODE_WHO: + switch (*i) { + case 'u': + who |= 0700; + break; + case 'g': + who |= 0070; + break; + case 'o': + who |= 0007; + break; + case 'a': + who |= 0777; + break; + default: + mstate = MODE_ACTION; + continue; + } + break; + + case MODE_ACTION_APPLY: + switch (op) { + case MODE_EQUALS: + expr->file_mode &= ~who; + expr->dir_mode &= ~who; + BFS_FALLTHROUGH; + case MODE_PLUS: + expr->file_mode |= file_change; + expr->dir_mode |= dir_change; + break; + case MODE_MINUS: + expr->file_mode &= ~file_change; + expr->dir_mode &= ~dir_change; + break; + } + BFS_FALLTHROUGH; + + case MODE_ACTION: + if (who == 0) { + who = 0777; + } + + switch (*i) { + case '+': + op = MODE_PLUS; + mstate = MODE_OP; + break; + case '-': + op = MODE_MINUS; + mstate = MODE_OP; + break; + case '=': + op = MODE_EQUALS; + mstate = MODE_OP; + break; + + case ',': + if (mstate == MODE_ACTION_APPLY) { + mstate = MODE_CLAUSE; + } else { + goto fail; + } + break; + + case '\0': + if (mstate == MODE_ACTION_APPLY) { + goto done; + } else { + goto fail; + } + + default: + goto fail; + } + break; + + case MODE_OP: + switch (*i) { + case 'u': + file_change = (expr->file_mode >> 6) & 07; + dir_change = (expr->dir_mode >> 6) & 07; + break; + case 'g': + file_change = (expr->file_mode >> 3) & 07; + dir_change = (expr->dir_mode >> 3) & 07; + break; + case 'o': + file_change = expr->file_mode & 07; + dir_change = expr->dir_mode & 07; + break; + + default: + file_change = 0; + dir_change = 0; + mstate = MODE_PERM; + continue; + } + + file_change |= (file_change << 6) | (file_change << 3); + file_change &= who; + dir_change |= (dir_change << 6) | (dir_change << 3); + dir_change &= who; + mstate = MODE_ACTION_APPLY; + break; + + case MODE_PERM: + switch (*i) { + case 'r': + file_change |= who & 0444; + dir_change |= who & 0444; + break; + case 'w': + file_change |= who & 0222; + dir_change |= who & 0222; + break; + case 'x': + file_change |= who & 0111; + BFS_FALLTHROUGH; + case 'X': + dir_change |= who & 0111; + break; + case 's': + if (who & 0700) { + file_change |= S_ISUID; + dir_change |= S_ISUID; + } + if (who & 0070) { + file_change |= S_ISGID; + dir_change |= S_ISGID; + } + break; + case 't': + if (who & 0007) { + file_change |= S_ISVTX; + dir_change |= S_ISVTX; + } + break; + default: + mstate = MODE_ACTION_APPLY; + continue; + } + break; + } + + ++i; + } + +done: + return 0; + +fail: + parse_expr_error(state, expr, "Invalid mode.\n"); + return -1; +} + +/** + * Parse -perm MODE. + */ +static struct bfs_expr *parse_perm(struct parser_state *state, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_perm); + if (!expr) { + return NULL; + } + + const char *mode = expr->argv[1]; + switch (mode[0]) { + case '-': + expr->mode_cmp = BFS_MODE_ALL; + ++mode; + break; + case '/': + expr->mode_cmp = BFS_MODE_ANY; + ++mode; + break; + case '+': + if (mode[1] >= '0' && mode[1] <= '9') { + expr->mode_cmp = BFS_MODE_ANY; + ++mode; + break; + } + BFS_FALLTHROUGH; + default: + expr->mode_cmp = BFS_MODE_EQUAL; + break; + } + + if (parse_mode(state, mode, expr) != 0) { + goto fail; + } + + expr->cost = STAT_COST; + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -print. + */ +static struct bfs_expr *parse_print(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(state, eval_fprint); + if (expr) { + init_print_expr(state, expr); + } + return expr; +} + +/** + * Parse -print0. + */ +static struct bfs_expr *parse_print0(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(state, eval_fprint0); + if (expr) { + init_print_expr(state, expr); + } + return expr; +} + +/** + * Parse -printf FORMAT. + */ +static struct bfs_expr *parse_printf(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(state, eval_fprintf); + if (!expr) { + return NULL; + } + + init_print_expr(state, expr); + + if (bfs_printf_parse(state->ctx, expr, expr->argv[1]) != 0) { + bfs_expr_free(expr); + return NULL; + } + + return expr; +} + +/** + * Parse -printx. + */ +static struct bfs_expr *parse_printx(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(state, eval_fprintx); + if (expr) { + init_print_expr(state, expr); + } + return expr; +} + +/** + * Parse -prune. + */ +static struct bfs_expr *parse_prune(struct parser_state *state, int arg1, int arg2) { + state->prune_arg = state->argv; + + struct bfs_expr *expr = parse_nullary_action(state, eval_prune); + if (expr) { + expr_set_always_true(expr); + } + return expr; +} + +/** + * Parse -quit. + */ +static struct bfs_expr *parse_quit(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(state, eval_quit); + if (expr) { + expr_set_never_returns(expr); + } + return expr; +} + +/** + * Parse -i?regex. + */ +static struct bfs_expr *parse_regex(struct parser_state *state, int flags, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_regex); + if (!expr) { + goto fail; + } + + if (bfs_regcomp(&expr->regex, expr->argv[1], state->regex_type, flags) != 0) { + if (!expr->regex) { + parse_perror(state, "bfs_regcomp()"); + goto fail; + } + + char *str = bfs_regerror(expr->regex); + if (!str) { + parse_perror(state, "bfs_regerror()"); + goto fail; + } + + parse_expr_error(state, expr, "%s.\n", str); + free(str); + goto fail; + } + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -E. + */ +static struct bfs_expr *parse_regex_extended(struct parser_state *state, int arg1, int arg2) { + state->regex_type = BFS_REGEX_POSIX_EXTENDED; + return parse_nullary_flag(state); +} + +/** + * Parse -regextype TYPE. + */ +static struct bfs_expr *parse_regextype(struct parser_state *state, int arg1, int arg2) { + struct bfs_ctx *ctx = state->ctx; + CFILE *cfile = ctx->cerr; + + const char *arg = state->argv[0]; + const char *type = state->argv[1]; + if (!type) { + parse_error(state, "${blu}%s${rs} needs a value.\n\n", arg); + goto list_types; + } + + parser_advance(state, T_OPTION, 1); + + // See https://www.gnu.org/software/gnulib/manual/html_node/Predefined-Syntaxes.html + if (strcmp(type, "posix-basic") == 0 + || strcmp(type, "ed") == 0 + || strcmp(type, "sed") == 0) { + state->regex_type = BFS_REGEX_POSIX_BASIC; + } else if (strcmp(type, "posix-extended") == 0) { + state->regex_type = BFS_REGEX_POSIX_EXTENDED; +#if BFS_WITH_ONIGURUMA + } else if (strcmp(type, "emacs") == 0) { + state->regex_type = BFS_REGEX_EMACS; + } else if (strcmp(type, "grep") == 0) { + state->regex_type = BFS_REGEX_GREP; +#endif + } else if (strcmp(type, "help") == 0) { + state->just_info = true; + cfile = ctx->cout; + goto list_types; + } else { + parse_error(state, "Unsupported regex type.\n\n"); + goto list_types; + } + + parser_advance(state, T_OPTION, 1); + return &bfs_true; + +list_types: + cfprintf(cfile, "Supported types are:\n\n"); + cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n"); + cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n"); + cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n"); +#if BFS_WITH_ONIGURUMA + cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n"); + cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n"); +#endif + cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n"); + return NULL; +} + +/** + * Parse -s. + */ +static struct bfs_expr *parse_s(struct parser_state *state, int arg1, int arg2) { + state->ctx->flags |= BFTW_SORT; + return parse_nullary_flag(state); +} + +/** + * Parse -samefile FILE. + */ +static struct bfs_expr *parse_samefile(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_samefile); + if (!expr) { + return NULL; + } + + struct bfs_stat sb; + if (stat_arg(state, &expr->argv[1], &sb) != 0) { + bfs_expr_free(expr); + return NULL; + } + + expr->dev = sb.dev; + expr->ino = sb.ino; + + expr->cost = STAT_COST; + expr->probability = 0.01; + + return expr; +} + +/** + * Parse -S STRATEGY. + */ +static struct bfs_expr *parse_search_strategy(struct parser_state *state, int arg1, int arg2) { + struct bfs_ctx *ctx = state->ctx; + CFILE *cfile = ctx->cerr; + + const char *flag = state->argv[0]; + const char *arg = state->argv[1]; + if (!arg) { + parse_error(state, "${cyn}%s${rs} needs an argument.\n\n", flag); + goto list_strategies; + } + + parser_advance(state, T_FLAG, 1); + + if (strcmp(arg, "bfs") == 0) { + ctx->strategy = BFTW_BFS; + } else if (strcmp(arg, "dfs") == 0) { + ctx->strategy = BFTW_DFS; + } else if (strcmp(arg, "ids") == 0) { + ctx->strategy = BFTW_IDS; + } else if (strcmp(arg, "eds") == 0) { + ctx->strategy = BFTW_EDS; + } else if (strcmp(arg, "help") == 0) { + state->just_info = true; + cfile = ctx->cout; + goto list_strategies; + } else { + parse_error(state, "Unrecognized search strategy.\n\n"); + goto list_strategies; + } + + parser_advance(state, T_FLAG, 1); + return &bfs_true; + +list_strategies: + cfprintf(cfile, "Supported search strategies:\n\n"); + cfprintf(cfile, " ${bld}bfs${rs}: breadth-first search\n"); + cfprintf(cfile, " ${bld}dfs${rs}: depth-first search\n"); + cfprintf(cfile, " ${bld}ids${rs}: iterative deepening search\n"); + cfprintf(cfile, " ${bld}eds${rs}: exponential deepening search\n"); + return NULL; +} + +/** + * Parse -[aBcm]?since. + */ +static struct bfs_expr *parse_since(struct parser_state *state, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_newer); + if (!expr) { + return NULL; + } + + if (parse_reftime(state, expr) != 0) { + goto fail; + } + + expr->cost = STAT_COST; + expr->stat_field = field; + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -size N[cwbkMGTP]?. + */ +static struct bfs_expr *parse_size(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(state, eval_size); + if (!expr) { + return NULL; + } + + const char *unit = parse_icmp(state, expr, IF_PARTIAL_OK); + if (!unit) { + goto fail; + } + + if (strlen(unit) > 1) { + goto bad_unit; + } + + switch (*unit) { + case '\0': + case 'b': + expr->size_unit = BFS_BLOCKS; + break; + case 'c': + expr->size_unit = BFS_BYTES; + break; + case 'w': + expr->size_unit = BFS_WORDS; + break; + case 'k': + expr->size_unit = BFS_KB; + break; + case 'M': + expr->size_unit = BFS_MB; + break; + case 'G': + expr->size_unit = BFS_GB; + break; + case 'T': + expr->size_unit = BFS_TB; + break; + case 'P': + expr->size_unit = BFS_PB; + break; + + default: + goto bad_unit; + } + + expr->cost = STAT_COST; + expr->probability = expr->int_cmp == BFS_INT_EQUAL ? 0.01 : 0.50; + + return expr; + +bad_unit: + parse_expr_error(state, expr, "Expected a size unit (one of ${bld}cwbkMGTP${rs}); found ${err}%s${rs}.\n", unit); +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -sparse. + */ +static struct bfs_expr *parse_sparse(struct parser_state *state, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(state, eval_sparse); + if (expr) { + expr->cost = STAT_COST; + } + return expr; +} + +/** + * Parse -status. + */ +static struct bfs_expr *parse_status(struct parser_state *state, int arg1, int arg2) { + state->ctx->status = true; + return parse_nullary_option(state); +} + +/** + * Parse -x?type [bcdpflsD]. + */ +static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2) { + bfs_eval_fn *eval = x ? eval_xtype : eval_type; + struct bfs_expr *expr = parse_unary_test(state, eval); + if (!expr) { + return NULL; + } + + unsigned int types = 0; + float probability = 0.0; + + const char *c = expr->argv[1]; + while (true) { + enum bfs_type type; + float type_prob; + + switch (*c) { + case 'b': + type = BFS_BLK; + type_prob = 0.00000721183; + break; + case 'c': + type = BFS_CHR; + type_prob = 0.0000499855; + break; + case 'd': + type = BFS_DIR; + type_prob = 0.114475; + break; + case 'D': + type = BFS_DOOR; + type_prob = 0.000001; + break; + case 'p': + type = BFS_FIFO; + type_prob = 0.00000248684; + break; + case 'f': + type = BFS_REG; + type_prob = 0.859772; + break; + case 'l': + type = BFS_LNK; + type_prob = 0.0256816; + break; + case 's': + type = BFS_SOCK; + type_prob = 0.0000116881; + break; + case 'w': + type = BFS_WHT; + type_prob = 0.000001; + break; + + case '\0': + parse_expr_error(state, expr, "Expected a type flag.\n"); + goto fail; + + default: + parse_expr_error(state, expr, "Unknown type flag ${err}%c${rs}; expected one of [${bld}bcdpflsD${rs}].\n", *c); + goto fail; + } + + unsigned int flag = 1 << type; + if (!(types & flag)) { + types |= flag; + probability += type_prob; + } + + ++c; + if (*c == '\0') { + break; + } else if (*c == ',') { + ++c; + continue; + } else { + parse_expr_error(state, expr, "Types must be comma-separated.\n"); + goto fail; + } + } + + expr->num = types; + expr->probability = probability; + + if (x && state->ctx->optlevel < 4) { + // Since -xtype dereferences symbolic links, it may have side + // effects such as reporting permission errors, and thus + // shouldn't be re-ordered without aggressive optimizations + expr->pure = false; + } + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +/** + * Parse -(no)?warn. + */ +static struct bfs_expr *parse_warn(struct parser_state *state, int warn, int arg2) { + state->ctx->warn = warn; + return parse_nullary_option(state); +} + +/** + * Parse -xattr. + */ +static struct bfs_expr *parse_xattr(struct parser_state *state, int arg1, int arg2) { +#if BFS_CAN_CHECK_XATTRS + struct bfs_expr *expr = parse_nullary_test(state, eval_xattr); + if (expr) { + expr->cost = STAT_COST; + expr->probability = 0.01; + } + return expr; +#else + parse_error(state, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -xattrname. + */ +static struct bfs_expr *parse_xattrname(struct parser_state *state, int arg1, int arg2) { +#if BFS_CAN_CHECK_XATTRS + struct bfs_expr *expr = parse_unary_test(state, eval_xattrname); + if (expr) { + expr->cost = STAT_COST; + expr->probability = 0.01; + } + return expr; +#else + parse_error(state, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -xdev. + */ +static struct bfs_expr *parse_xdev(struct parser_state *state, int arg1, int arg2) { + state->ctx->flags |= BFTW_PRUNE_MOUNTS; + state->xdev_arg = state->argv; + return parse_nullary_option(state); +} + +/** + * Launch a pager for the help output. + */ +static CFILE *launch_pager(pid_t *pid, CFILE *cout) { + char *pager = getenv("PAGER"); + + char *exe; + if (pager && pager[0]) { + exe = bfs_spawn_resolve(pager); + } else { + exe = bfs_spawn_resolve("less"); + if (!exe) { + exe = bfs_spawn_resolve("more"); + } + } + if (!exe) { + goto fail; + } + + int pipefd[2]; + if (pipe(pipefd) != 0) { + goto fail_exe; + } + + FILE *file = fdopen(pipefd[1], "w"); + if (!file) { + goto fail_pipe; + } + pipefd[1] = -1; + + CFILE *ret = cfwrap(file, NULL, true); + if (!ret) { + goto fail_file; + } + file = NULL; + + struct bfs_spawn ctx; + if (bfs_spawn_init(&ctx) != 0) { + goto fail_ret; + } + + if (bfs_spawn_addclose(&ctx, fileno(ret->file)) != 0) { + goto fail_ctx; + } + if (bfs_spawn_adddup2(&ctx, pipefd[0], STDIN_FILENO) != 0) { + goto fail_ctx; + } + if (bfs_spawn_addclose(&ctx, pipefd[0]) != 0) { + goto fail_ctx; + } + + char *argv[] = { + exe, + NULL, + NULL, + }; + + if (strcmp(xbasename(exe), "less") == 0) { + // We know less supports colors, other pagers may not + ret->colors = cout->colors; + argv[1] = "-FKRX"; + } + + *pid = bfs_spawn(exe, &ctx, argv, NULL); + if (*pid < 0) { + goto fail_ctx; + } + + xclose(pipefd[0]); + bfs_spawn_destroy(&ctx); + free(exe); + return ret; + +fail_ctx: + bfs_spawn_destroy(&ctx); +fail_ret: + cfclose(ret); +fail_file: + if (file) { + fclose(file); + } +fail_pipe: + if (pipefd[1] >= 0) { + xclose(pipefd[1]); + } + if (pipefd[0] >= 0) { + xclose(pipefd[0]); + } +fail_exe: + free(exe); +fail: + return cout; +} + +/** + * "Parse" -help. + */ +static struct bfs_expr *parse_help(struct parser_state *state, int arg1, int arg2) { + CFILE *cout = state->ctx->cout; + + pid_t pager = -1; + if (state->stdout_tty) { + cout = launch_pager(&pager, cout); + } + + cfprintf(cout, "Usage: ${ex}%s${rs} [${cyn}flags${rs}...] [${mag}paths${rs}...] [${blu}expression${rs}...]\n\n", + state->command); + + cfprintf(cout, "${ex}bfs${rs} is compatible with ${ex}find${rs}, with some extensions. " + "${cyn}Flags${rs} (${cyn}-H${rs}/${cyn}-L${rs}/${cyn}-P${rs} etc.), ${mag}paths${rs},\n" + "and ${blu}expressions${rs} may be freely mixed in any order.\n\n"); + + cfprintf(cout, "${bld}Flags:${rs}\n\n"); + + cfprintf(cout, " ${cyn}-H${rs}\n"); + cfprintf(cout, " Follow symbolic links on the command line, but not while searching\n"); + cfprintf(cout, " ${cyn}-L${rs}\n"); + cfprintf(cout, " Follow all symbolic links\n"); + cfprintf(cout, " ${cyn}-P${rs}\n"); + cfprintf(cout, " Never follow symbolic links (the default)\n"); + + cfprintf(cout, " ${cyn}-E${rs}\n"); + cfprintf(cout, " Use extended regular expressions (same as ${blu}-regextype${rs} ${bld}posix-extended${rs})\n"); + cfprintf(cout, " ${cyn}-X${rs}\n"); + cfprintf(cout, " Filter out files with non-${ex}xargs${rs}-safe names\n"); + cfprintf(cout, " ${cyn}-d${rs}\n"); + cfprintf(cout, " Search in post-order (same as ${blu}-depth${rs})\n"); + cfprintf(cout, " ${cyn}-s${rs}\n"); + cfprintf(cout, " Visit directory entries in sorted order\n"); + cfprintf(cout, " ${cyn}-x${rs}\n"); + cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs})\n"); + + cfprintf(cout, " ${cyn}-f${rs} ${mag}PATH${rs}\n"); + cfprintf(cout, " Treat ${mag}PATH${rs} as a path to search (useful if begins with a dash)\n"); + cfprintf(cout, " ${cyn}-D${rs} ${bld}FLAG${rs}\n"); + cfprintf(cout, " Turn on a debugging flag (see ${cyn}-D${rs} ${bld}help${rs})\n"); + cfprintf(cout, " ${cyn}-O${bld}N${rs}\n"); + cfprintf(cout, " Enable optimization level ${bld}N${rs} (default: ${bld}3${rs})\n"); + cfprintf(cout, " ${cyn}-S${rs} ${bld}bfs${rs}|${bld}dfs${rs}|${bld}ids${rs}|${bld}eds${rs}\n"); + cfprintf(cout, " Use ${bld}b${rs}readth-${bld}f${rs}irst/${bld}d${rs}epth-${bld}f${rs}irst/${bld}i${rs}terative/${bld}e${rs}xponential ${bld}d${rs}eepening ${bld}s${rs}earch\n"); + cfprintf(cout, " (default: ${cyn}-S${rs} ${bld}bfs${rs})\n\n"); + + cfprintf(cout, "${bld}Operators:${rs}\n\n"); + + cfprintf(cout, " ${red}(${rs} ${blu}expression${rs} ${red})${rs}\n\n"); + + cfprintf(cout, " ${red}!${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${red}-not${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, " ${blu}expression${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${blu}expression${rs} ${red}-a${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${blu}expression${rs} ${red}-and${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, " ${blu}expression${rs} ${red}-o${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${blu}expression${rs} ${red}-or${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, " ${blu}expression${rs} ${red},${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, "${bld}Special forms:${rs}\n\n"); + + cfprintf(cout, " ${red}-exclude${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " Exclude all paths matching the ${blu}expression${rs} from the search.\n\n"); + + cfprintf(cout, "${bld}Options:${rs}\n\n"); + + cfprintf(cout, " ${blu}-color${rs}\n"); + cfprintf(cout, " ${blu}-nocolor${rs}\n"); + cfprintf(cout, " Turn colors on or off (default: ${blu}-color${rs} if outputting to a terminal,\n"); + cfprintf(cout, " ${blu}-nocolor${rs} otherwise)\n"); + cfprintf(cout, " ${blu}-daystart${rs}\n"); + cfprintf(cout, " Measure times relative to the start of today\n"); + cfprintf(cout, " ${blu}-depth${rs}\n"); + cfprintf(cout, " Search in post-order (descendents first)\n"); + cfprintf(cout, " ${blu}-files0-from${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Search the NUL ('\\0')-separated paths from ${bld}FILE${rs} (${bld}-${rs} for standard input).\n"); + cfprintf(cout, " ${blu}-follow${rs}\n"); + cfprintf(cout, " Follow all symbolic links (same as ${cyn}-L${rs})\n"); + cfprintf(cout, " ${blu}-ignore_readdir_race${rs}\n"); + cfprintf(cout, " ${blu}-noignore_readdir_race${rs}\n"); + cfprintf(cout, " Whether to report an error if ${ex}bfs${rs} detects that the file tree is modified\n"); + cfprintf(cout, " during the search (default: ${blu}-noignore_readdir_race${rs})\n"); + cfprintf(cout, " ${blu}-maxdepth${rs} ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-mindepth${rs} ${bld}N${rs}\n"); + cfprintf(cout, " Ignore files deeper/shallower than ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-mount${rs}\n"); + cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs} for now, but will\n"); + cfprintf(cout, " skip mount points entirely in the future)\n"); + cfprintf(cout, " ${blu}-nohidden${rs}\n"); + cfprintf(cout, " Exclude hidden files\n"); + cfprintf(cout, " ${blu}-noleaf${rs}\n"); + cfprintf(cout, " Ignored; for compatibility with GNU find\n"); + cfprintf(cout, " ${blu}-regextype${rs} ${bld}TYPE${rs}\n"); + cfprintf(cout, " Use ${bld}TYPE${rs}-flavored regexes (default: ${bld}posix-basic${rs}; see ${blu}-regextype${rs} ${bld}help${rs})\n"); + cfprintf(cout, " ${blu}-status${rs}\n"); + cfprintf(cout, " Display a status bar while searching\n"); + cfprintf(cout, " ${blu}-unique${rs}\n"); + cfprintf(cout, " Skip any files that have already been seen\n"); + cfprintf(cout, " ${blu}-warn${rs}\n"); + cfprintf(cout, " ${blu}-nowarn${rs}\n"); + cfprintf(cout, " Turn on or off warnings about the command line\n"); + cfprintf(cout, " ${blu}-xdev${rs}\n"); + cfprintf(cout, " Don't descend into other mount points\n\n"); + + cfprintf(cout, "${bld}Tests:${rs}\n\n"); + +#if BFS_CAN_CHECK_ACL + cfprintf(cout, " ${blu}-acl${rs}\n"); + cfprintf(cout, " Find files with a non-trivial Access Control List\n"); +#endif + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}min${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified ${bld}N${rs} minutes ago\n"); + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}newer${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified more recently than ${bld}FILE${rs} was\n" + " modified\n"); + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}since${rs} ${bld}TIME${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified more recently than ${bld}TIME${rs}\n"); + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}time${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified ${bld}N${rs} days ago\n"); +#if BFS_CAN_CHECK_CAPABILITIES + cfprintf(cout, " ${blu}-capable${rs}\n"); + cfprintf(cout, " Find files with POSIX.1e capabilities set\n"); +#endif + cfprintf(cout, " ${blu}-depth${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files with depth ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-empty${rs}\n"); + cfprintf(cout, " Find empty files/directories\n"); + cfprintf(cout, " ${blu}-executable${rs}\n"); + cfprintf(cout, " ${blu}-readable${rs}\n"); + cfprintf(cout, " ${blu}-writable${rs}\n"); + cfprintf(cout, " Find files the current user can execute/read/write\n"); + cfprintf(cout, " ${blu}-false${rs}\n"); + cfprintf(cout, " ${blu}-true${rs}\n"); + cfprintf(cout, " Always false/true\n"); + cfprintf(cout, " ${blu}-fstype${rs} ${bld}TYPE${rs}\n"); + cfprintf(cout, " Find files on file systems with the given ${bld}TYPE${rs}\n"); + cfprintf(cout, " ${blu}-gid${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " ${blu}-uid${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files owned by group/user ID ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-group${rs} ${bld}NAME${rs}\n"); + cfprintf(cout, " ${blu}-user${rs} ${bld}NAME${rs}\n"); + cfprintf(cout, " Find files owned by the group/user ${bld}NAME${rs}\n"); + cfprintf(cout, " ${blu}-hidden${rs}\n"); + cfprintf(cout, " Find hidden files\n"); +#ifdef FNM_CASEFOLD + cfprintf(cout, " ${blu}-ilname${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-iname${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-ipath${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-iregex${rs} ${bld}REGEX${rs}\n"); + cfprintf(cout, " ${blu}-iwholename${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Case-insensitive versions of ${blu}-lname${rs}/${blu}-name${rs}/${blu}-path${rs}" + "/${blu}-regex${rs}/${blu}-wholename${rs}\n"); +#endif + cfprintf(cout, " ${blu}-inum${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files with inode number ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-links${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files with ${bld}N${rs} hard links\n"); + cfprintf(cout, " ${blu}-lname${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find symbolic links whose target matches the ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-name${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find files whose name matches the ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-newer${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Find files newer than ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-newer${bld}XY${rs} ${bld}REFERENCE${rs}\n"); + cfprintf(cout, " Find files whose ${bld}X${rs} time is newer than the ${bld}Y${rs} time of" + " ${bld}REFERENCE${rs}. ${bld}X${rs} and ${bld}Y${rs}\n"); + cfprintf(cout, " can be any of [${bld}aBcm${rs}]. ${bld}Y${rs} may also be ${bld}t${rs} to parse ${bld}REFERENCE${rs} an explicit\n"); + cfprintf(cout, " timestamp.\n"); + cfprintf(cout, " ${blu}-nogroup${rs}\n"); + cfprintf(cout, " ${blu}-nouser${rs}\n"); + cfprintf(cout, " Find files owned by nonexistent groups/users\n"); + cfprintf(cout, " ${blu}-path${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-wholename${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find files whose entire path matches the ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-perm${rs} ${bld}[-]MODE${rs}\n"); + cfprintf(cout, " Find files with a matching mode\n"); + cfprintf(cout, " ${blu}-regex${rs} ${bld}REGEX${rs}\n"); + cfprintf(cout, " Find files whose entire path matches the regular expression ${bld}REGEX${rs}\n"); + cfprintf(cout, " ${blu}-samefile${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Find hard links to ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-since${rs} ${bld}TIME${rs}\n"); + cfprintf(cout, " Find files modified since ${bld}TIME${rs}\n"); + cfprintf(cout, " ${blu}-size${rs} ${bld}[-+]N[cwbkMGTP]${rs}\n"); + cfprintf(cout, " Find files with the given size, in 1-byte ${bld}c${rs}haracters, 2-byte ${bld}w${rs}ords,\n"); + cfprintf(cout, " 512-byte ${bld}b${rs}locks (default), or ${bld}k${rs}iB/${bld}M${rs}iB/${bld}G${rs}iB/${bld}T${rs}iB/${bld}P${rs}iB\n"); + cfprintf(cout, " ${blu}-sparse${rs}\n"); + cfprintf(cout, " Find files that occupy fewer disk blocks than expected\n"); + cfprintf(cout, " ${blu}-type${rs} ${bld}[bcdlpfswD]${rs}\n"); + cfprintf(cout, " Find files of the given type\n"); + cfprintf(cout, " ${blu}-used${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files last accessed ${bld}N${rs} days after they were changed\n"); +#if BFS_CAN_CHECK_XATTRS + cfprintf(cout, " ${blu}-xattr${rs}\n"); + cfprintf(cout, " Find files with extended attributes\n"); + cfprintf(cout, " ${blu}-xattrname${rs} ${bld}NAME${rs}\n"); + cfprintf(cout, " Find files with the extended attribute ${bld}NAME${rs}\n"); +#endif + cfprintf(cout, " ${blu}-xtype${rs} ${bld}[bcdlpfswD]${rs}\n"); + cfprintf(cout, " Find files of the given type, following links when ${blu}-type${rs} would not, and\n"); + cfprintf(cout, " vice versa\n\n"); + + cfprintf(cout, "${bld}Actions:${rs}\n\n"); + + cfprintf(cout, " ${blu}-delete${rs}\n"); + cfprintf(cout, " ${blu}-rm${rs}\n"); + cfprintf(cout, " Delete any found files (implies ${blu}-depth${rs})\n"); + cfprintf(cout, " ${blu}-exec${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " Execute a command\n"); + cfprintf(cout, " ${blu}-exec${rs} ${bld}command ... {} +${rs}\n"); + cfprintf(cout, " Execute a command with multiple files at once\n"); + cfprintf(cout, " ${blu}-ok${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " Prompt the user whether to execute a command\n"); + cfprintf(cout, " ${blu}-execdir${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " ${blu}-execdir${rs} ${bld}command ... {} +${rs}\n"); + cfprintf(cout, " ${blu}-okdir${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " Like ${blu}-exec${rs}/${blu}-ok${rs}, but run the command in the same directory as the found\n"); + cfprintf(cout, " file(s)\n"); + cfprintf(cout, " ${blu}-exit${rs} [${bld}STATUS${rs}]\n"); + cfprintf(cout, " Exit immediately with the given status (%d if unspecified)\n", EXIT_SUCCESS); + cfprintf(cout, " ${blu}-fls${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-fprint${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-fprint0${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-fprintf${rs} ${bld}FILE${rs} ${bld}FORMAT${rs}\n"); + cfprintf(cout, " Like ${blu}-ls${rs}/${blu}-print${rs}/${blu}-print0${rs}/${blu}-printf${rs}, but write to ${bld}FILE${rs} instead of standard\n" + " output\n"); + cfprintf(cout, " ${blu}-ls${rs}\n"); + cfprintf(cout, " List files like ${ex}ls${rs} ${bld}-dils${rs}\n"); + cfprintf(cout, " ${blu}-print${rs}\n"); + cfprintf(cout, " Print the path to the found file\n"); + cfprintf(cout, " ${blu}-print0${rs}\n"); + cfprintf(cout, " Like ${blu}-print${rs}, but use the null character ('\\0') as a separator rather than\n"); + cfprintf(cout, " newlines\n"); + cfprintf(cout, " ${blu}-printf${rs} ${bld}FORMAT${rs}\n"); + cfprintf(cout, " Print according to a format string (see ${ex}man${rs} ${bld}find${rs}). The additional format\n"); + cfprintf(cout, " directives %%w and %%W${bld}k${rs} for printing file birth times are supported.\n"); + cfprintf(cout, " ${blu}-printx${rs}\n"); + cfprintf(cout, " Like ${blu}-print${rs}, but escape whitespace and quotation characters, to make the\n"); + cfprintf(cout, " output safe for ${ex}xargs${rs}. Consider using ${blu}-print0${rs} and ${ex}xargs${rs} ${bld}-0${rs} instead.\n"); + cfprintf(cout, " ${blu}-prune${rs}\n"); + cfprintf(cout, " Don't descend into this directory\n"); + cfprintf(cout, " ${blu}-quit${rs}\n"); + cfprintf(cout, " Quit immediately\n"); + cfprintf(cout, " ${blu}-version${rs}\n"); + cfprintf(cout, " Print version information\n"); + cfprintf(cout, " ${blu}-help${rs}\n"); + cfprintf(cout, " Print this help message\n\n"); + + cfprintf(cout, "%s\n", BFS_HOMEPAGE); + + if (pager > 0) { + cfclose(cout); + waitpid(pager, NULL, 0); + } + + state->just_info = true; + return NULL; +} + +/** + * "Parse" -version. + */ +static struct bfs_expr *parse_version(struct parser_state *state, int arg1, int arg2) { + cfprintf(state->ctx->cout, "${ex}bfs${rs} ${bld}%s${rs}\n\n", BFS_VERSION); + + printf("%s\n", BFS_HOMEPAGE); + + state->just_info = true; + return NULL; +} + +typedef struct bfs_expr *parse_fn(struct parser_state *state, int arg1, int arg2); + +/** + * An entry in the parse table for literals. + */ +struct table_entry { + char *arg; + enum token_type type; + parse_fn *parse; + int arg1; + int arg2; + bool prefix; +}; + +/** + * The parse table for literals. + */ +static const struct table_entry parse_table[] = { + {"--", T_FLAG}, + {"--help", T_ACTION, parse_help}, + {"--version", T_ACTION, parse_version}, + {"-Bmin", T_TEST, parse_min, BFS_STAT_BTIME}, + {"-Bnewer", T_TEST, parse_newer, BFS_STAT_BTIME}, + {"-Bsince", T_TEST, parse_since, BFS_STAT_BTIME}, + {"-Btime", T_TEST, parse_time, BFS_STAT_BTIME}, + {"-D", T_FLAG, parse_debug}, + {"-E", T_FLAG, parse_regex_extended}, + {"-H", T_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false}, + {"-L", T_FLAG, parse_follow, BFTW_FOLLOW_ALL, false}, + {"-O", T_FLAG, parse_optlevel, 0, 0, true}, + {"-P", T_FLAG, parse_follow, 0, false}, + {"-S", T_FLAG, parse_search_strategy}, + {"-X", T_FLAG, parse_xargs_safe}, + {"-a", T_OPERATOR}, + {"-acl", T_TEST, parse_acl}, + {"-amin", T_TEST, parse_min, BFS_STAT_ATIME}, + {"-and", T_OPERATOR}, + {"-anewer", T_TEST, parse_newer, BFS_STAT_ATIME}, + {"-asince", T_TEST, parse_since, BFS_STAT_ATIME}, + {"-atime", T_TEST, parse_time, BFS_STAT_ATIME}, + {"-capable", T_TEST, parse_capable}, + {"-cmin", T_TEST, parse_min, BFS_STAT_CTIME}, + {"-cnewer", T_TEST, parse_newer, BFS_STAT_CTIME}, + {"-color", T_OPTION, parse_color, true}, + {"-csince", T_TEST, parse_since, BFS_STAT_CTIME}, + {"-ctime", T_TEST, parse_time, BFS_STAT_CTIME}, + {"-d", T_FLAG, parse_depth}, + {"-daystart", T_OPTION, parse_daystart}, + {"-delete", T_ACTION, parse_delete}, + {"-depth", T_OPTION, parse_depth_n}, + {"-empty", T_TEST, parse_empty}, + {"-exclude", T_OPERATOR}, + {"-exec", T_ACTION, parse_exec, 0}, + {"-execdir", T_ACTION, parse_exec, BFS_EXEC_CHDIR}, + {"-executable", T_TEST, parse_access, X_OK}, + {"-exit", T_ACTION, parse_exit}, + {"-f", T_FLAG, parse_f}, + {"-false", T_TEST, parse_const, false}, + {"-files0-from", T_OPTION, parse_files0_from}, + {"-flags", T_TEST, parse_flags}, + {"-fls", T_ACTION, parse_fls}, + {"-follow", T_OPTION, parse_follow, BFTW_FOLLOW_ALL, true}, + {"-fprint", T_ACTION, parse_fprint}, + {"-fprint0", T_ACTION, parse_fprint0}, + {"-fprintf", T_ACTION, parse_fprintf}, + {"-fstype", T_TEST, parse_fstype}, + {"-gid", T_TEST, parse_group}, + {"-group", T_TEST, parse_group}, + {"-help", T_ACTION, parse_help}, + {"-hidden", T_TEST, parse_hidden}, + {"-ignore_readdir_race", T_OPTION, parse_ignore_races, true}, + {"-ilname", T_TEST, parse_lname, true}, + {"-iname", T_TEST, parse_name, true}, + {"-inum", T_TEST, parse_inum}, + {"-ipath", T_TEST, parse_path, true}, + {"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE}, + {"-iwholename", T_TEST, parse_path, true}, + {"-links", T_TEST, parse_links}, + {"-lname", T_TEST, parse_lname, false}, + {"-ls", T_ACTION, parse_ls}, + {"-maxdepth", T_OPTION, parse_depth_limit, false}, + {"-mindepth", T_OPTION, parse_depth_limit, true}, + {"-mmin", T_TEST, parse_min, BFS_STAT_MTIME}, + {"-mnewer", T_TEST, parse_newer, BFS_STAT_MTIME}, + {"-mount", T_OPTION, parse_mount}, + {"-msince", T_TEST, parse_since, BFS_STAT_MTIME}, + {"-mtime", T_TEST, parse_time, BFS_STAT_MTIME}, + {"-name", T_TEST, parse_name, false}, + {"-newer", T_TEST, parse_newer, BFS_STAT_MTIME}, + {"-newer", T_TEST, parse_newerxy, 0, 0, true}, + {"-nocolor", T_OPTION, parse_color, false}, + {"-nogroup", T_TEST, parse_nogroup}, + {"-nohidden", T_TEST, parse_nohidden}, + {"-noignore_readdir_race", T_OPTION, parse_ignore_races, false}, + {"-noleaf", T_OPTION, parse_noleaf}, + {"-not", T_OPERATOR}, + {"-nouser", T_TEST, parse_nouser}, + {"-nowarn", T_OPTION, parse_warn, false}, + {"-o", T_OPERATOR}, + {"-ok", T_ACTION, parse_exec, BFS_EXEC_CONFIRM}, + {"-okdir", T_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR}, + {"-or", T_OPERATOR}, + {"-path", T_TEST, parse_path, false}, + {"-perm", T_TEST, parse_perm}, + {"-print", T_ACTION, parse_print}, + {"-print0", T_ACTION, parse_print0}, + {"-printf", T_ACTION, parse_printf}, + {"-printx", T_ACTION, parse_printx}, + {"-prune", T_ACTION, parse_prune}, + {"-quit", T_ACTION, parse_quit}, + {"-readable", T_TEST, parse_access, R_OK}, + {"-regex", T_TEST, parse_regex, 0}, + {"-regextype", T_OPTION, parse_regextype}, + {"-rm", T_ACTION, parse_delete}, + {"-s", T_FLAG, parse_s}, + {"-samefile", T_TEST, parse_samefile}, + {"-since", T_TEST, parse_since, BFS_STAT_MTIME}, + {"-size", T_TEST, parse_size}, + {"-sparse", T_TEST, parse_sparse}, + {"-status", T_OPTION, parse_status}, + {"-true", T_TEST, parse_const, true}, + {"-type", T_TEST, parse_type, false}, + {"-uid", T_TEST, parse_user}, + {"-unique", T_OPTION, parse_unique}, + {"-used", T_TEST, parse_used}, + {"-user", T_TEST, parse_user}, + {"-version", T_ACTION, parse_version}, + {"-warn", T_OPTION, parse_warn, true}, + {"-wholename", T_TEST, parse_path, false}, + {"-writable", T_TEST, parse_access, W_OK}, + {"-x", T_FLAG, parse_xdev}, + {"-xattr", T_TEST, parse_xattr}, + {"-xattrname", T_TEST, parse_xattrname}, + {"-xdev", T_OPTION, parse_xdev}, + {"-xtype", T_TEST, parse_type, true}, + {0}, +}; + +/** Look up an argument in the parse table. */ +static const struct table_entry *table_lookup(const char *arg) { + for (const struct table_entry *entry = parse_table; entry->arg; ++entry) { + bool match; + if (entry->prefix) { + match = strncmp(arg, entry->arg, strlen(entry->arg)) == 0; + } else { + match = strcmp(arg, entry->arg) == 0; + } + if (match) { + return entry; + } + } + + return NULL; +} + +/** Search for a fuzzy match in the parse table. */ +static const struct table_entry *table_lookup_fuzzy(const char *arg) { + const struct table_entry *best = NULL; + int best_dist; + + for (const struct table_entry *entry = parse_table; entry->arg; ++entry) { + int dist = typo_distance(arg, entry->arg); + if (!best || dist < best_dist) { + best = entry; + best_dist = dist; + } + } + + return best; +} + +/** + * LITERAL : OPTION + * | TEST + * | ACTION + */ +static struct bfs_expr *parse_literal(struct parser_state *state) { + // Paths are already skipped at this point + const char *arg = state->argv[0]; + + if (arg[0] != '-') { + goto unexpected; + } + + const struct table_entry *match = table_lookup(arg); + if (match) { + if (match->parse) { + goto matched; + } else { + goto unexpected; + } + } + + match = table_lookup_fuzzy(arg); + + CFILE *cerr = state->ctx->cerr; + parse_error(state, "Unknown argument; did you mean "); + switch (match->type) { + case T_FLAG: + cfprintf(cerr, "${cyn}%s${rs}?", match->arg); + break; + case T_OPERATOR: + cfprintf(cerr, "${red}%s${rs}?", match->arg); + break; + default: + cfprintf(cerr, "${blu}%s${rs}?", match->arg); + break; + } + + if (!state->interactive || !match->parse) { + fprintf(stderr, "\n"); + goto unmatched; + } + + fprintf(stderr, " "); + if (ynprompt() <= 0) { + goto unmatched; + } + + fprintf(stderr, "\n"); + state->argv[0] = match->arg; + +matched: + return match->parse(state, match->arg1, match->arg2); + +unmatched: + return NULL; + +unexpected: + parse_error(state, "Expected a predicate.\n"); + return NULL; +} + +/** + * FACTOR : "(" EXPR ")" + * | "!" FACTOR | "-not" FACTOR + * | "-exclude" FACTOR + * | LITERAL + */ +static struct bfs_expr *parse_factor(struct parser_state *state) { + if (skip_paths(state) != 0) { + return NULL; + } + + const char *arg = state->argv[0]; + if (!arg) { + parse_argv_error(state, state->last_arg, 1, "Expression terminated prematurely here.\n"); + return NULL; + } + + if (strcmp(arg, "(") == 0) { + parser_advance(state, T_OPERATOR, 1); + + struct bfs_expr *expr = parse_expr(state); + if (!expr) { + return NULL; + } + + if (skip_paths(state) != 0) { + bfs_expr_free(expr); + return NULL; + } + + arg = state->argv[0]; + if (!arg || strcmp(arg, ")") != 0) { + parse_argv_error(state, state->last_arg, 1, "Expected a ${red})${rs}.\n"); + bfs_expr_free(expr); + return NULL; + } + + parser_advance(state, T_OPERATOR, 1); + return expr; + } else if (strcmp(arg, "-exclude") == 0) { + if (state->excluding) { + parse_error(state, "${err}%s${rs} is not supported within ${red}-exclude${rs}.\n", arg); + return NULL; + } + + parser_advance(state, T_OPERATOR, 1); + state->excluding = true; + + struct bfs_expr *factor = parse_factor(state); + if (!factor) { + return NULL; + } + + state->excluding = false; + + if (parse_exclude(state, factor) != 0) { + return NULL; + } + + return &bfs_true; + } else if (strcmp(arg, "!") == 0 || strcmp(arg, "-not") == 0) { + char **argv = parser_advance(state, T_OPERATOR, 1); + + struct bfs_expr *factor = parse_factor(state); + if (!factor) { + return NULL; + } + + return new_unary_expr(eval_not, factor, argv); + } else { + return parse_literal(state); + } +} + +/** + * TERM : FACTOR + * | TERM FACTOR + * | TERM "-a" FACTOR + * | TERM "-and" FACTOR + */ +static struct bfs_expr *parse_term(struct parser_state *state) { + struct bfs_expr *term = parse_factor(state); + + while (term) { + if (skip_paths(state) != 0) { + bfs_expr_free(term); + return NULL; + } + + const char *arg = state->argv[0]; + if (!arg) { + break; + } + + if (strcmp(arg, "-o") == 0 || strcmp(arg, "-or") == 0 + || strcmp(arg, ",") == 0 + || strcmp(arg, ")") == 0) { + break; + } + + char **argv = &fake_and_arg; + if (strcmp(arg, "-a") == 0 || strcmp(arg, "-and") == 0) { + argv = parser_advance(state, T_OPERATOR, 1); + } + + struct bfs_expr *lhs = term; + struct bfs_expr *rhs = parse_factor(state); + if (!rhs) { + bfs_expr_free(lhs); + return NULL; + } + + term = new_binary_expr(eval_and, lhs, rhs, argv); + } + + return term; +} + +/** + * CLAUSE : TERM + * | CLAUSE "-o" TERM + * | CLAUSE "-or" TERM + */ +static struct bfs_expr *parse_clause(struct parser_state *state) { + struct bfs_expr *clause = parse_term(state); + + while (clause) { + if (skip_paths(state) != 0) { + bfs_expr_free(clause); + return NULL; + } + + const char *arg = state->argv[0]; + if (!arg) { + break; + } + + if (strcmp(arg, "-o") != 0 && strcmp(arg, "-or") != 0) { + break; + } + + char **argv = parser_advance(state, T_OPERATOR, 1); + + struct bfs_expr *lhs = clause; + struct bfs_expr *rhs = parse_term(state); + if (!rhs) { + bfs_expr_free(lhs); + return NULL; + } + + clause = new_binary_expr(eval_or, lhs, rhs, argv); + } + + return clause; +} + +/** + * EXPR : CLAUSE + * | EXPR "," CLAUSE + */ +static struct bfs_expr *parse_expr(struct parser_state *state) { + struct bfs_expr *expr = parse_clause(state); + + while (expr) { + if (skip_paths(state) != 0) { + bfs_expr_free(expr); + return NULL; + } + + const char *arg = state->argv[0]; + if (!arg) { + break; + } + + if (strcmp(arg, ",") != 0) { + break; + } + + char **argv = parser_advance(state, T_OPERATOR, 1); + + struct bfs_expr *lhs = expr; + struct bfs_expr *rhs = parse_clause(state); + if (!rhs) { + bfs_expr_free(lhs); + return NULL; + } + + expr = new_binary_expr(eval_comma, lhs, rhs, argv); + } + + return expr; +} + +/** + * Parse the top-level expression. + */ +static struct bfs_expr *parse_whole_expr(struct parser_state *state) { + if (skip_paths(state) != 0) { + return NULL; + } + + struct bfs_expr *expr = &bfs_true; + if (state->argv[0]) { + expr = parse_expr(state); + if (!expr) { + return NULL; + } + } + + if (state->argv[0]) { + parse_error(state, "Unexpected argument.\n"); + goto fail; + } + + if (state->implicit_print) { + struct bfs_expr *print = bfs_expr_new(eval_fprint, 1, &fake_print_arg); + if (!print) { + goto fail; + } + init_print_expr(state, print); + print->synthetic = true; + + expr = new_binary_expr(eval_and, expr, print, &fake_and_arg); + if (!expr) { + goto fail; + } + } + + if (state->mount_arg && state->xdev_arg) { + parse_conflict_warning(state, state->mount_arg, 1, state->xdev_arg, 1, + "${blu}%s${rs} is redundant in the presence of ${blu}%s${rs}.\n\n", + state->xdev_arg[0], state->mount_arg[0]); + } + + if (state->ctx->warn && state->depth_arg && state->prune_arg) { + parse_conflict_warning(state, state->depth_arg, 1, state->prune_arg, 1, + "${blu}%s${rs} does not work in the presence of ${blu}%s${rs}.\n", + state->prune_arg[0], state->depth_arg[0]); + + if (state->interactive) { + bfs_warning(state->ctx, "Do you want to continue? "); + if (ynprompt() == 0) { + goto fail; + } + } + + fprintf(stderr, "\n"); + } + + if (state->ok_expr && state->files0_stdin_arg) { + parse_conflict_error(state, state->ok_expr->argv, state->ok_expr->argc, state->files0_stdin_arg, 2, + "${blu}%s${rs} conflicts with ${blu}%s${rs} ${bld}%s${rs}.\n", + state->ok_expr->argv[0], state->files0_stdin_arg[0], state->files0_stdin_arg[1]); + goto fail; + } + + return expr; + +fail: + bfs_expr_free(expr); + return NULL; +} + +void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) { + if (!bfs_debug_prefix(ctx, flag)) { + return; + } + + CFILE *cerr = ctx->cerr; + + cfprintf(cerr, "${ex}%s${rs} ", ctx->argv[0]); + + if (ctx->flags & BFTW_FOLLOW_ALL) { + cfprintf(cerr, "${cyn}-L${rs} "); + } else if (ctx->flags & BFTW_FOLLOW_ROOTS) { + cfprintf(cerr, "${cyn}-H${rs} "); + } else { + cfprintf(cerr, "${cyn}-P${rs} "); + } + + if (ctx->xargs_safe) { + cfprintf(cerr, "${cyn}-X${rs} "); + } + + if (ctx->flags & BFTW_SORT) { + cfprintf(cerr, "${cyn}-s${rs} "); + } + + if (ctx->optlevel != 3) { + cfprintf(cerr, "${cyn}-O${bld}%d${rs} ", ctx->optlevel); + } + + const char *strategy = NULL; + switch (ctx->strategy) { + case BFTW_BFS: + strategy = "bfs"; + break; + case BFTW_DFS: + strategy = "dfs"; + break; + case BFTW_IDS: + strategy = "ids"; + break; + case BFTW_EDS: + strategy = "eds"; + break; + } + assert(strategy); + cfprintf(cerr, "${cyn}-S${rs} ${bld}%s${rs} ", strategy); + + enum debug_flags debug = ctx->debug; + if (debug == DEBUG_ALL) { + cfprintf(cerr, "${cyn}-D${rs} ${bld}all${rs} "); + } else if (debug) { + cfprintf(cerr, "${cyn}-D${rs} "); + for (enum debug_flags i = 1; DEBUG_ALL & i; i <<= 1) { + if (debug & i) { + cfprintf(cerr, "${bld}%s${rs}", debug_flag_name(i)); + debug ^= i; + if (debug) { + cfprintf(cerr, ","); + } + } + } + cfprintf(cerr, " "); + } + + for (size_t i = 0; i < darray_length(ctx->paths); ++i) { + const char *path = ctx->paths[i]; + char c = path[0]; + if (c == '-' || c == '(' || c == ')' || c == '!' || c == ',') { + cfprintf(cerr, "${cyn}-f${rs} "); + } + cfprintf(cerr, "${mag}%s${rs} ", path); + } + + if (ctx->cout->colors) { + cfprintf(cerr, "${blu}-color${rs} "); + } else { + cfprintf(cerr, "${blu}-nocolor${rs} "); + } + if (ctx->flags & BFTW_POST_ORDER) { + cfprintf(cerr, "${blu}-depth${rs} "); + } + if (ctx->ignore_races) { + cfprintf(cerr, "${blu}-ignore_readdir_race${rs} "); + } + if (ctx->mindepth != 0) { + cfprintf(cerr, "${blu}-mindepth${rs} ${bld}%d${rs} ", ctx->mindepth); + } + if (ctx->maxdepth != INT_MAX) { + cfprintf(cerr, "${blu}-maxdepth${rs} ${bld}%d${rs} ", ctx->maxdepth); + } + if (ctx->flags & BFTW_SKIP_MOUNTS) { + cfprintf(cerr, "${blu}-mount${rs} "); + } + if (ctx->status) { + cfprintf(cerr, "${blu}-status${rs} "); + } + if (ctx->unique) { + cfprintf(cerr, "${blu}-unique${rs} "); + } + if ((ctx->flags & (BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) == BFTW_PRUNE_MOUNTS) { + cfprintf(cerr, "${blu}-xdev${rs} "); + } + + if (flag == DEBUG_RATES) { + if (ctx->exclude != &bfs_false) { + cfprintf(cerr, "(${red}-exclude${rs} %pE) ", ctx->exclude); + } + cfprintf(cerr, "%pE", ctx->expr); + } else { + if (ctx->exclude != &bfs_false) { + cfprintf(cerr, "(${red}-exclude${rs} %pe) ", ctx->exclude); + } + cfprintf(cerr, "%pe", ctx->expr); + } + + fputs("\n", stderr); +} + +/** + * Dump the estimated costs. + */ +static void dump_costs(const struct bfs_ctx *ctx) { + const struct bfs_expr *expr = ctx->expr; + bfs_debug(ctx, DEBUG_COST, " Cost: ~${ylw}%g${rs}\n", expr->cost); + bfs_debug(ctx, DEBUG_COST, "Probability: ~${ylw}%g%%${rs}\n", 100.0*expr->probability); +} + +/** + * Get the current time. + */ +static int parse_gettime(const struct bfs_ctx *ctx, struct timespec *ts) { +#if _POSIX_TIMERS > 0 + int ret = clock_gettime(CLOCK_REALTIME, ts); + if (ret != 0) { + bfs_perror(ctx, "clock_gettime()"); + } + return ret; +#else + struct timeval tv; + int ret = gettimeofday(&tv, NULL); + if (ret == 0) { + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = tv.tv_usec * 1000L; + } else { + bfs_perror(ctx, "gettimeofday()"); + } + return ret; +#endif +} + +struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) { + struct bfs_ctx *ctx = bfs_ctx_new(); + if (!ctx) { + perror("bfs_new_ctx()"); + goto fail; + } + + static char* default_argv[] = {"bfs", NULL}; + if (argc < 1) { + argc = 1; + argv = default_argv; + } + + ctx->argc = argc; + ctx->argv = malloc((argc + 1)*sizeof(*ctx->argv)); + if (!ctx->argv) { + perror("malloc()"); + goto fail; + } + for (int i = 0; i <= argc; ++i) { + ctx->argv[i] = argv[i]; + } + + enum use_color use_color = COLOR_AUTO; + if (getenv("NO_COLOR")) { + // https://no-color.org/ + use_color = COLOR_NEVER; + } + + ctx->colors = parse_colors(); + if (!ctx->colors) { + ctx->colors_error = errno; + } + + ctx->cerr = cfwrap(stderr, use_color ? ctx->colors : NULL, false); + if (!ctx->cerr) { + perror("cfwrap()"); + goto fail; + } + + ctx->cout = cfwrap(stdout, use_color ? ctx->colors : NULL, false); + if (!ctx->cout) { + bfs_perror(ctx, "cfwrap()"); + goto fail; + } + + if (!bfs_ctx_dedup(ctx, ctx->cout, NULL) || !bfs_ctx_dedup(ctx, ctx->cerr, NULL)) { + bfs_perror(ctx, "bfs_ctx_dedup()"); + goto fail; + } + + bool stdin_tty = isatty(STDIN_FILENO); + bool stdout_tty = isatty(STDOUT_FILENO); + bool stderr_tty = isatty(STDERR_FILENO); + + if (getenv("POSIXLY_CORRECT")) { + ctx->posixly_correct = true; + } else { + ctx->warn = stdin_tty; + } + + struct parser_state state = { + .ctx = ctx, + .argv = ctx->argv + 1, + .command = ctx->argv[0], + .regex_type = BFS_REGEX_POSIX_BASIC, + .stdout_tty = stdout_tty, + .interactive = stdin_tty && stderr_tty, + .use_color = use_color, + .implicit_print = true, + .implicit_root = true, + .just_info = false, + .excluding = false, + .last_arg = NULL, + .depth_arg = NULL, + .prune_arg = NULL, + .mount_arg = NULL, + .xdev_arg = NULL, + .files0_arg = NULL, + .files0_stdin_arg = NULL, + .ok_expr = NULL, + }; + + if (strcmp(xbasename(state.command), "find") == 0) { + // Operate depth-first when invoked as "find" + ctx->strategy = BFTW_DFS; + } + + if (parse_gettime(ctx, &state.now) != 0) { + goto fail; + } + + ctx->exclude = &bfs_false; + ctx->expr = parse_whole_expr(&state); + if (!ctx->expr) { + if (state.just_info) { + goto done; + } else { + goto fail; + } + } + + if (bfs_optimize(ctx) != 0) { + goto fail; + } + + if (darray_length(ctx->paths) == 0) { + if (!state.implicit_root) { + parse_argv_error(&state, state.files0_arg, 2, "No root paths specified.\n"); + goto fail; + } else if (parse_root(&state, ".") != 0) { + goto fail; + } + } + + if ((ctx->flags & BFTW_FOLLOW_ALL) && !ctx->unique) { + // We need bftw() to detect cycles unless -unique does it for us + ctx->flags |= BFTW_DETECT_CYCLES; + } + + bfs_ctx_dump(ctx, DEBUG_TREE); + dump_costs(ctx); + +done: + return ctx; + +fail: + bfs_ctx_free(ctx); + return NULL; +} diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..7e29a03 --- /dev/null +++ b/src/parse.h @@ -0,0 +1,36 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * bfs command line parsing. + */ + +#ifndef BFS_PARSE_H +#define BFS_PARSE_H + +/** + * Parse the command line. + * + * @param argc + * The number of arguments. + * @param argv + * The arguments to parse. + * @return + * A new bfs context, or NULL on failure. + */ +struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]); + +#endif // BFS_PARSE_H diff --git a/src/printf.c b/src/printf.c new file mode 100644 index 0000000..8fdde41 --- /dev/null +++ b/src/printf.c @@ -0,0 +1,927 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "printf.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "darray.h" +#include "diag.h" +#include "dir.h" +#include "dstring.h" +#include "expr.h" +#include "mtab.h" +#include "pwcache.h" +#include "stat.h" +#include "util.h" +#include "xtime.h" +#include <assert.h> +#include <errno.h> +#include <grp.h> +#include <pwd.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +/** + * A function implementing a printf directive. + */ +typedef int bfs_printf_fn(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf); + +/** + * A single printf directive like %f or %#4m. The whole format string is stored + * as a darray of these. + */ +struct bfs_printf { + /** The printing function to invoke. */ + bfs_printf_fn *fn; + /** String data associated with this directive. */ + char *str; + /** The stat field to print. */ + enum bfs_stat_field stat_field; + /** Character data associated with this directive. */ + char c; + /** Some data used by the directive. */ + const void *ptr; +}; + +/** Print some text as-is. */ +static int bfs_printf_literal(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + size_t len = dstrlen(directive->str); + if (fwrite(directive->str, 1, len, cfile->file) == len) { + return 0; + } else { + return -1; + } +} + +/** \c: flush */ +static int bfs_printf_flush(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + return fflush(cfile->file); +} + +/** Check if we can safely colorize this directive. */ +static bool should_color(CFILE *cfile, const struct bfs_printf *directive) { + return cfile->colors && strcmp(directive->str, "%s") == 0; +} + +/** + * Print a value to a temporary buffer before formatting it. + */ +#define BFS_PRINTF_BUF(buf, format, ...) \ + char buf[256]; \ + int ret = snprintf(buf, sizeof(buf), format, __VA_ARGS__); \ + assert(ret >= 0 && (size_t)ret < sizeof(buf)); \ + (void)ret + +/** %a, %c, %t: ctime() */ +static int bfs_printf_ctime(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + // Not using ctime() itself because GNU find adds nanoseconds + static const char *days[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; + static const char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const struct timespec *ts = bfs_stat_time(statbuf, directive->stat_field); + if (!ts) { + return -1; + } + + struct tm tm; + if (xlocaltime(&ts->tv_sec, &tm) != 0) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%s %s %2d %.2d:%.2d:%.2d.%09ld0 %4d", + days[tm.tm_wday], + months[tm.tm_mon], + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec, + 1900 + tm.tm_year); + + return fprintf(cfile->file, directive->str, buf); +} + +/** %A, %B/%W, %C, %T: strftime() */ +static int bfs_printf_strftime(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const struct timespec *ts = bfs_stat_time(statbuf, directive->stat_field); + if (!ts) { + return -1; + } + + struct tm tm; + if (xlocaltime(&ts->tv_sec, &tm) != 0) { + return -1; + } + + int ret; + char buf[256]; + char format[] = "% "; + switch (directive->c) { + // Non-POSIX strftime() features + case '@': + ret = snprintf(buf, sizeof(buf), "%lld.%09ld0", (long long)ts->tv_sec, (long)ts->tv_nsec); + break; + case '+': + ret = snprintf(buf, sizeof(buf), "%4d-%.2d-%.2d+%.2d:%.2d:%.2d.%09ld0", + 1900 + tm.tm_year, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec); + break; + case 'k': + ret = snprintf(buf, sizeof(buf), "%2d", tm.tm_hour); + break; + case 'l': + ret = snprintf(buf, sizeof(buf), "%2d", (tm.tm_hour + 11)%12 + 1); + break; + case 's': + ret = snprintf(buf, sizeof(buf), "%lld", (long long)ts->tv_sec); + break; + case 'S': + ret = snprintf(buf, sizeof(buf), "%.2d.%09ld0", tm.tm_sec, (long)ts->tv_nsec); + break; + case 'T': + ret = snprintf(buf, sizeof(buf), "%.2d:%.2d:%.2d.%09ld0", + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec); + break; + + // POSIX strftime() features + default: + format[1] = directive->c; + ret = strftime(buf, sizeof(buf), format, &tm); + break; + } + + assert(ret >= 0 && (size_t)ret < sizeof(buf)); + (void)ret; + + return fprintf(cfile->file, directive->str, buf); +} + +/** %b: blocks */ +static int bfs_printf_b(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + uintmax_t blocks = ((uintmax_t)statbuf->blocks*BFS_STAT_BLKSIZE + 511)/512; + BFS_PRINTF_BUF(buf, "%ju", blocks); + return fprintf(cfile->file, directive->str, buf); +} + +/** %d: depth */ +static int bfs_printf_d(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + return fprintf(cfile->file, directive->str, (intmax_t)ftwbuf->depth); +} + +/** %D: device */ +static int bfs_printf_D(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->dev); + return fprintf(cfile->file, directive->str, buf); +} + +/** %f: file name */ +static int bfs_printf_f(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + if (should_color(cfile, directive)) { + return cfprintf(cfile, "%pF", ftwbuf); + } else { + return fprintf(cfile->file, directive->str, ftwbuf->path + ftwbuf->nameoff); + } +} + +/** %F: file system type */ +static int bfs_printf_F(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const char *type = bfs_fstype(directive->ptr, statbuf); + return fprintf(cfile->file, directive->str, type); +} + +/** %G: gid */ +static int bfs_printf_G(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->gid); + return fprintf(cfile->file, directive->str, buf); +} + +/** %g: group name */ +static int bfs_printf_g(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const struct bfs_groups *groups = directive->ptr; + const struct group *grp = groups ? bfs_getgrgid(groups, statbuf->gid) : NULL; + if (!grp) { + return bfs_printf_G(cfile, directive, ftwbuf); + } + + return fprintf(cfile->file, directive->str, grp->gr_name); +} + +/** %h: leading directories */ +static int bfs_printf_h(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + char *copy = NULL; + const char *buf; + + if (ftwbuf->nameoff > 0) { + size_t len = ftwbuf->nameoff; + if (len > 1) { + --len; + } + + buf = copy = strndup(ftwbuf->path, len); + } else if (ftwbuf->path[0] == '/') { + buf = "/"; + } else { + buf = "."; + } + + if (!buf) { + return -1; + } + + int ret; + if (should_color(cfile, directive)) { + ret = cfprintf(cfile, "${di}%s${rs}", buf); + } else { + ret = fprintf(cfile->file, directive->str, buf); + } + + free(copy); + return ret; +} + +/** %H: current root */ +static int bfs_printf_H(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + if (should_color(cfile, directive)) { + if (ftwbuf->depth == 0) { + return cfprintf(cfile, "%pP", ftwbuf); + } else { + return cfprintf(cfile, "${di}%s${rs}", ftwbuf->root); + } + } else { + return fprintf(cfile->file, directive->str, ftwbuf->root); + } +} + +/** %i: inode */ +static int bfs_printf_i(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->ino); + return fprintf(cfile->file, directive->str, buf); +} + +/** %k: 1K blocks */ +static int bfs_printf_k(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + uintmax_t blocks = ((uintmax_t)statbuf->blocks*BFS_STAT_BLKSIZE + 1023)/1024; + BFS_PRINTF_BUF(buf, "%ju", blocks); + return fprintf(cfile->file, directive->str, buf); +} + +/** %l: link target */ +static int bfs_printf_l(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + char *buf = NULL; + const char *target = ""; + + if (ftwbuf->type == BFS_LNK) { + if (should_color(cfile, directive)) { + return cfprintf(cfile, "%pL", ftwbuf); + } + + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + size_t len = statbuf ? statbuf->size : 0; + + target = buf = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len); + if (!target) { + return -1; + } + } + + int ret = fprintf(cfile->file, directive->str, target); + free(buf); + return ret; +} + +/** %m: mode */ +static int bfs_printf_m(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + return fprintf(cfile->file, directive->str, (unsigned int)(statbuf->mode & 07777)); +} + +/** %M: symbolic mode */ +static int bfs_printf_M(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + char buf[11]; + xstrmode(statbuf->mode, buf); + return fprintf(cfile->file, directive->str, buf); +} + +/** %n: link count */ +static int bfs_printf_n(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->nlink); + return fprintf(cfile->file, directive->str, buf); +} + +/** %p: full path */ +static int bfs_printf_p(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + if (should_color(cfile, directive)) { + return cfprintf(cfile, "%pP", ftwbuf); + } else { + return fprintf(cfile->file, directive->str, ftwbuf->path); + } +} + +/** %P: path after root */ +static int bfs_printf_P(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + size_t offset = strlen(ftwbuf->root); + if (ftwbuf->path[offset] == '/') { + ++offset; + } + + if (should_color(cfile, directive)) { + if (ftwbuf->depth == 0) { + return 0; + } + + struct BFTW copybuf = *ftwbuf; + copybuf.path += offset; + copybuf.nameoff -= offset; + return cfprintf(cfile, "%pP", ©buf); + } else { + return fprintf(cfile->file, directive->str, ftwbuf->path + offset); + } +} + +/** %s: size */ +static int bfs_printf_s(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->size); + return fprintf(cfile->file, directive->str, buf); +} + +/** %S: sparseness */ +static int bfs_printf_S(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + double sparsity; + if (statbuf->size == 0 && statbuf->blocks == 0) { + sparsity = 1.0; + } else { + sparsity = (double)BFS_STAT_BLKSIZE*statbuf->blocks/statbuf->size; + } + return fprintf(cfile->file, directive->str, sparsity); +} + +/** %U: uid */ +static int bfs_printf_U(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->uid); + return fprintf(cfile->file, directive->str, buf); +} + +/** %u: user name */ +static int bfs_printf_u(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const struct bfs_users *users = directive->ptr; + const struct passwd *pwd = users ? bfs_getpwuid(users, statbuf->uid) : NULL; + if (!pwd) { + return bfs_printf_U(cfile, directive, ftwbuf); + } + + return fprintf(cfile->file, directive->str, pwd->pw_name); +} + +static const char *bfs_printf_type(enum bfs_type type) { + switch (type) { + case BFS_BLK: + return "b"; + case BFS_CHR: + return "c"; + case BFS_DIR: + return "d"; + case BFS_DOOR: + return "D"; + case BFS_FIFO: + return "p"; + case BFS_LNK: + return "l"; + case BFS_REG: + return "f"; + case BFS_SOCK: + return "s"; + default: + return "U"; + } +} + +/** %y: type */ +static int bfs_printf_y(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + const char *type = bfs_printf_type(ftwbuf->type); + return fprintf(cfile->file, directive->str, type); +} + +/** %Y: target type */ +static int bfs_printf_Y(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { + int error = 0; + + if (ftwbuf->type != BFS_LNK) { + return bfs_printf_y(cfile, directive, ftwbuf); + } + + const char *type = "U"; + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, BFS_STAT_FOLLOW); + if (statbuf) { + type = bfs_printf_type(bfs_mode_to_type(statbuf->mode)); + } else { + switch (errno) { + case ELOOP: + type = "L"; + break; + case ENOENT: + case ENOTDIR: + type = "N"; + break; + default: + type = "?"; + error = errno; + break; + } + } + + int ret = fprintf(cfile->file, directive->str, type); + if (error != 0) { + ret = -1; + errno = error; + } + return ret; +} + +/** + * Append a literal string to the chain. + */ +static int append_literal(const struct bfs_ctx *ctx, struct bfs_printf **format, char **literal) { + if (dstrlen(*literal) == 0) { + return 0; + } + + struct bfs_printf directive = { + .fn = bfs_printf_literal, + .str = *literal, + }; + + if (DARRAY_PUSH(format, &directive) != 0) { + bfs_perror(ctx, "DARRAY_PUSH()"); + return -1; + } + + *literal = dstralloc(0); + if (!*literal) { + bfs_perror(ctx, "dstralloc()"); + return -1; + } + + return 0; +} + +/** + * Append a printf directive to the chain. + */ +static int append_directive(const struct bfs_ctx *ctx, struct bfs_printf **format, char **literal, struct bfs_printf *directive) { + if (append_literal(ctx, format, literal) != 0) { + return -1; + } + + if (DARRAY_PUSH(format, directive) != 0) { + bfs_perror(ctx, "DARRAY_PUSH()"); + return -1; + } + + return 0; +} + +int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format) { + expr->printf = NULL; + + char *literal = dstralloc(0); + if (!literal) { + bfs_perror(ctx, "dstralloc()"); + goto error; + } + + for (const char *i = format; *i; ++i) { + char c = *i; + + if (c == '\\') { + c = *++i; + + if (c >= '0' && c < '8') { + c = 0; + for (int j = 0; j < 3 && *i >= '0' && *i < '8'; ++i, ++j) { + c *= 8; + c += *i - '0'; + } + --i; + goto one_char; + } + + switch (c) { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\\': c = '\\'; break; + + case 'c': + { + struct bfs_printf directive = { + .fn = bfs_printf_flush, + }; + if (append_directive(ctx, &expr->printf, &literal, &directive) != 0) { + goto error; + } + goto done; + } + + case '\0': + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Incomplete escape sequence '\\'.\n"); + goto error; + + default: + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Unrecognized escape sequence '\\%c'.\n", c); + goto error; + } + } else if (c == '%') { + if (i[1] == '%') { + c = *++i; + goto one_char; + } + + struct bfs_printf directive = { + .str = dstralloc(2), + }; + if (!directive.str) { + goto directive_error; + } + if (dstrapp(&directive.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto directive_error; + } + + const char *specifier = "s"; + + // Parse any flags + bool must_be_numeric = false; + while (true) { + c = *++i; + + switch (c) { + case '#': + case '0': + case '+': + must_be_numeric = true; + BFS_FALLTHROUGH; + case ' ': + case '-': + if (strchr(directive.str, c)) { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Duplicate flag '%c'.\n", c); + goto directive_error; + } + if (dstrapp(&directive.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto directive_error; + } + continue; + } + + break; + } + + // Parse the field width + while (c >= '0' && c <= '9') { + if (dstrapp(&directive.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto directive_error; + } + c = *++i; + } + + // Parse the precision + if (c == '.') { + do { + if (dstrapp(&directive.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto directive_error; + } + c = *++i; + } while (c >= '0' && c <= '9'); + } + + switch (c) { + case 'a': + directive.fn = bfs_printf_ctime; + directive.stat_field = BFS_STAT_ATIME; + break; + case 'b': + directive.fn = bfs_printf_b; + break; + case 'c': + directive.fn = bfs_printf_ctime; + directive.stat_field = BFS_STAT_CTIME; + break; + case 'd': + directive.fn = bfs_printf_d; + specifier = "jd"; + break; + case 'D': + directive.fn = bfs_printf_D; + break; + case 'f': + directive.fn = bfs_printf_f; + break; + case 'F': + directive.ptr = bfs_ctx_mtab(ctx); + if (!directive.ptr) { + int error = errno; + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Couldn't parse the mount table: %s.\n", strerror(error)); + goto directive_error; + } + directive.fn = bfs_printf_F; + break; + case 'g': + directive.ptr = bfs_ctx_groups(ctx); + if (!directive.ptr) { + int error = errno; + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Couldn't parse the group table: %s.\n", strerror(error)); + goto directive_error; + } + directive.fn = bfs_printf_g; + break; + case 'G': + directive.fn = bfs_printf_G; + break; + case 'h': + directive.fn = bfs_printf_h; + break; + case 'H': + directive.fn = bfs_printf_H; + break; + case 'i': + directive.fn = bfs_printf_i; + break; + case 'k': + directive.fn = bfs_printf_k; + break; + case 'l': + directive.fn = bfs_printf_l; + break; + case 'm': + directive.fn = bfs_printf_m; + specifier = "o"; + break; + case 'M': + directive.fn = bfs_printf_M; + break; + case 'n': + directive.fn = bfs_printf_n; + break; + case 'p': + directive.fn = bfs_printf_p; + break; + case 'P': + directive.fn = bfs_printf_P; + break; + case 's': + directive.fn = bfs_printf_s; + break; + case 'S': + directive.fn = bfs_printf_S; + specifier = "g"; + break; + case 't': + directive.fn = bfs_printf_ctime; + directive.stat_field = BFS_STAT_MTIME; + break; + case 'u': + directive.ptr = bfs_ctx_users(ctx); + if (!directive.ptr) { + int error = errno; + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Couldn't parse the user table: %s.\n", strerror(error)); + goto directive_error; + } + directive.fn = bfs_printf_u; + break; + case 'U': + directive.fn = bfs_printf_U; + break; + case 'w': + directive.fn = bfs_printf_ctime; + directive.stat_field = BFS_STAT_BTIME; + break; + case 'y': + directive.fn = bfs_printf_y; + break; + case 'Y': + directive.fn = bfs_printf_Y; + break; + + case 'A': + directive.stat_field = BFS_STAT_ATIME; + goto directive_strftime; + case 'B': + case 'W': + directive.stat_field = BFS_STAT_BTIME; + goto directive_strftime; + case 'C': + directive.stat_field = BFS_STAT_CTIME; + goto directive_strftime; + case 'T': + directive.stat_field = BFS_STAT_MTIME; + goto directive_strftime; + + directive_strftime: + directive.fn = bfs_printf_strftime; + c = *++i; + if (!c) { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Incomplete time specifier '%s%c'.\n", directive.str, i[-1]); + goto directive_error; + } else if (strchr("%+@aAbBcCdDeFgGhHIjklmMnprRsStTuUVwWxXyYzZ", c)) { + directive.c = c; + } else { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Unrecognized time specifier '%%%c%c'.\n", i[-1], c); + goto directive_error; + } + break; + + case '\0': + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Incomplete format specifier '%s'.\n", directive.str); + goto directive_error; + + default: + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Unrecognized format specifier '%%%c'.\n", c); + goto directive_error; + } + + if (must_be_numeric && strcmp(specifier, "s") == 0) { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Invalid flags '%s' for string format '%%%c'.\n", directive.str + 1, c); + goto directive_error; + } + + if (dstrcat(&directive.str, specifier) != 0) { + bfs_perror(ctx, "dstrcat()"); + goto directive_error; + } + + if (append_directive(ctx, &expr->printf, &literal, &directive) != 0) { + goto directive_error; + } + + continue; + + directive_error: + dstrfree(directive.str); + goto error; + } + + one_char: + if (dstrapp(&literal, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto error; + } + } + +done: + if (append_literal(ctx, &expr->printf, &literal) != 0) { + goto error; + } + dstrfree(literal); + return 0; + +error: + dstrfree(literal); + bfs_printf_free(expr->printf); + expr->printf = NULL; + return -1; +} + +int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf) { + int ret = 0, error = 0; + + for (size_t i = 0; i < darray_length(format); ++i) { + const struct bfs_printf *directive = &format[i]; + if (directive->fn(cfile, directive, ftwbuf) < 0) { + ret = -1; + error = errno; + } + } + + errno = error; + return ret; +} + +void bfs_printf_free(struct bfs_printf *format) { + for (size_t i = 0; i < darray_length(format); ++i) { + dstrfree(format[i].str); + } + darray_free(format); +} diff --git a/src/printf.h b/src/printf.h new file mode 100644 index 0000000..a8c5f2a --- /dev/null +++ b/src/printf.h @@ -0,0 +1,68 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2017-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Implementation of -printf/-fprintf. + */ + +#ifndef BFS_PRINTF_H +#define BFS_PRINTF_H + +#include "color.h" + +struct BFTW; +struct bfs_ctx; +struct bfs_expr; + +/** + * A printf command, the result of parsing a single format string. + */ +struct bfs_printf; + +/** + * Parse a -printf format string. + * + * @param ctx + * The bfs context. + * @param expr + * The expression to fill in. + * @param format + * The format string to parse. + * @return + * 0 on success, -1 on failure. + */ +int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format); + +/** + * Evaluate a parsed format string. + * + * @param cfile + * The CFILE to print to. + * @param format + * The parsed printf format. + * @param ftwbuf + * The bftw() data for the current file. + * @return + * 0 on success, -1 on failure. + */ +int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf); + +/** + * Free a parsed format string. + */ +void bfs_printf_free(struct bfs_printf *format); + +#endif // BFS_PRINTF_H diff --git a/src/pwcache.c b/src/pwcache.c new file mode 100644 index 0000000..91435bd --- /dev/null +++ b/src/pwcache.c @@ -0,0 +1,293 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "pwcache.h" +#include "darray.h" +#include "trie.h" +#include <errno.h> +#include <grp.h> +#include <pwd.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +struct bfs_users { + /** The array of passwd entries. */ + struct passwd *entries; + /** A map from usernames to entries. */ + struct trie by_name; + /** A map from UIDs to entries. */ + struct trie by_uid; +}; + +struct bfs_users *bfs_users_parse(void) { + int error; + + struct bfs_users *users = malloc(sizeof(*users)); + if (!users) { + return NULL; + } + + users->entries = NULL; + trie_init(&users->by_name); + trie_init(&users->by_uid); + + setpwent(); + + while (true) { + errno = 0; + struct passwd *ent = getpwent(); + if (!ent) { + if (errno) { + error = errno; + goto fail_end; + } else { + break; + } + } + + if (DARRAY_PUSH(&users->entries, ent) != 0) { + error = errno; + goto fail_end; + } + + ent = users->entries + darray_length(users->entries) - 1; + ent->pw_name = strdup(ent->pw_name); + ent->pw_dir = strdup(ent->pw_dir); + ent->pw_shell = strdup(ent->pw_shell); + if (!ent->pw_name || !ent->pw_dir || !ent->pw_shell) { + error = ENOMEM; + goto fail_end; + } + } + + endpwent(); + + for (size_t i = 0; i < darray_length(users->entries); ++i) { + struct passwd *entry = &users->entries[i]; + struct trie_leaf *leaf = trie_insert_str(&users->by_name, entry->pw_name); + if (leaf) { + if (!leaf->value) { + leaf->value = entry; + } + } else { + error = errno; + goto fail_free; + } + + leaf = trie_insert_mem(&users->by_uid, &entry->pw_uid, sizeof(entry->pw_uid)); + if (leaf) { + if (!leaf->value) { + leaf->value = entry; + } + } else { + error = errno; + goto fail_free; + } + } + + return users; + +fail_end: + endpwent(); +fail_free: + bfs_users_free(users); + errno = error; + return NULL; +} + +const struct passwd *bfs_getpwnam(const struct bfs_users *users, const char *name) { + const struct trie_leaf *leaf = trie_find_str(&users->by_name, name); + if (leaf) { + return leaf->value; + } else { + return NULL; + } +} + +const struct passwd *bfs_getpwuid(const struct bfs_users *users, uid_t uid) { + const struct trie_leaf *leaf = trie_find_mem(&users->by_uid, &uid, sizeof(uid)); + if (leaf) { + return leaf->value; + } else { + return NULL; + } +} + +void bfs_users_free(struct bfs_users *users) { + if (users) { + trie_destroy(&users->by_uid); + trie_destroy(&users->by_name); + + for (size_t i = 0; i < darray_length(users->entries); ++i) { + struct passwd *entry = &users->entries[i]; + free(entry->pw_shell); + free(entry->pw_dir); + free(entry->pw_name); + } + darray_free(users->entries); + + free(users); + } +} + +struct bfs_groups { + /** The array of group entries. */ + struct group *entries; + /** A map from group names to entries. */ + struct trie by_name; + /** A map from GIDs to entries. */ + struct trie by_gid; +}; + +/** + * struct group::gr_mem isn't properly aligned on macOS, so do this to avoid + * ASAN warnings. + */ +static char *next_gr_mem(void **gr_mem) { + char *mem; + memcpy(&mem, *gr_mem, sizeof(mem)); + *gr_mem = (char *)*gr_mem + sizeof(mem); + return mem; +} + +struct bfs_groups *bfs_groups_parse(void) { + int error; + + struct bfs_groups *groups = malloc(sizeof(*groups)); + if (!groups) { + return NULL; + } + + groups->entries = NULL; + trie_init(&groups->by_name); + trie_init(&groups->by_gid); + + setgrent(); + + while (true) { + errno = 0; + struct group *ent = getgrent(); + if (!ent) { + if (errno) { + error = errno; + goto fail_end; + } else { + break; + } + } + + if (DARRAY_PUSH(&groups->entries, ent) != 0) { + error = errno; + goto fail_end; + } + ent = groups->entries + darray_length(groups->entries) - 1; + + void *members = ent->gr_mem; + ent->gr_mem = NULL; + + ent->gr_name = strdup(ent->gr_name); + if (!ent->gr_name) { + error = errno; + goto fail_end; + } + + for (char *mem = next_gr_mem(&members); mem; mem = next_gr_mem(&members)) { + char *dup = strdup(mem); + if (!dup) { + error = errno; + goto fail_end; + } + + if (DARRAY_PUSH(&ent->gr_mem, &dup) != 0) { + error = errno; + free(dup); + goto fail_end; + } + } + } + + endgrent(); + + for (size_t i = 0; i < darray_length(groups->entries); ++i) { + struct group *entry = &groups->entries[i]; + struct trie_leaf *leaf = trie_insert_str(&groups->by_name, entry->gr_name); + if (leaf) { + if (!leaf->value) { + leaf->value = entry; + } + } else { + error = errno; + goto fail_free; + } + + leaf = trie_insert_mem(&groups->by_gid, &entry->gr_gid, sizeof(entry->gr_gid)); + if (leaf) { + if (!leaf->value) { + leaf->value = entry; + } + } else { + error = errno; + goto fail_free; + } + } + + return groups; + +fail_end: + endgrent(); +fail_free: + bfs_groups_free(groups); + errno = error; + return NULL; +} + +const struct group *bfs_getgrnam(const struct bfs_groups *groups, const char *name) { + const struct trie_leaf *leaf = trie_find_str(&groups->by_name, name); + if (leaf) { + return leaf->value; + } else { + return NULL; + } +} + +const struct group *bfs_getgrgid(const struct bfs_groups *groups, gid_t gid) { + const struct trie_leaf *leaf = trie_find_mem(&groups->by_gid, &gid, sizeof(gid)); + if (leaf) { + return leaf->value; + } else { + return NULL; + } +} + +void bfs_groups_free(struct bfs_groups *groups) { + if (groups) { + trie_destroy(&groups->by_gid); + trie_destroy(&groups->by_name); + + for (size_t i = 0; i < darray_length(groups->entries); ++i) { + struct group *entry = &groups->entries[i]; + for (size_t j = 0; j < darray_length(entry->gr_mem); ++j) { + free(entry->gr_mem[j]); + } + darray_free(entry->gr_mem); + free(entry->gr_name); + } + darray_free(groups->entries); + + free(groups); + } +} diff --git a/src/pwcache.h b/src/pwcache.h new file mode 100644 index 0000000..f1a1db3 --- /dev/null +++ b/src/pwcache.h @@ -0,0 +1,117 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A caching wrapper for /etc/{passwd,group}. + */ + +#ifndef BFS_PWCACHE_H +#define BFS_PWCACHE_H + +#include <grp.h> +#include <pwd.h> + +/** + * The user table. + */ +struct bfs_users; + +/** + * Parse the user table. + * + * @return + * The parsed user table, or NULL on failure. + */ +struct bfs_users *bfs_users_parse(void); + +/** + * Get a user entry by name. + * + * @param users + * The user table. + * @param name + * The username to look up. + * @return + * The matching user, or NULL if not found. + */ +const struct passwd *bfs_getpwnam(const struct bfs_users *users, const char *name); + +/** + * Get a user entry by ID. + * + * @param users + * The user table. + * @param uid + * The ID to look up. + * @return + * The matching user, or NULL if not found. + */ +const struct passwd *bfs_getpwuid(const struct bfs_users *users, uid_t uid); + +/** + * Free a user table. + * + * @param users + * The user table to free. + */ +void bfs_users_free(struct bfs_users *users); + +/** + * The group table. + */ +struct bfs_groups; + +/** + * Parse the group table. + * + * @return + * The parsed group table, or NULL on failure. + */ +struct bfs_groups *bfs_groups_parse(void); + +/** + * Get a group entry by name. + * + * @param groups + * The group table. + * @param name + * The group name to look up. + * @return + * The matching group, or NULL if not found. + */ +const struct group *bfs_getgrnam(const struct bfs_groups *groups, const char *name); + +/** + * Get a group entry by ID. + * + * @param groups + * The group table. + * @param uid + * The ID to look up. + * @return + * The matching group, or NULL if not found. + */ +const struct group *bfs_getgrgid(const struct bfs_groups *groups, gid_t gid); + +/** + * Free a group table. + * + * @param groups + * The group table to free. + */ +void bfs_groups_free(struct bfs_groups *groups); + +#endif // BFS_PWCACHE_H diff --git a/src/stat.c b/src/stat.c new file mode 100644 index 0000000..47e60b6 --- /dev/null +++ b/src/stat.c @@ -0,0 +1,376 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2018-2019 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "stat.h" +#include "util.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> + +#if BFS_HAS_SYS_PARAM +# include <sys/param.h> +#endif + +#ifdef STATX_BASIC_STATS +# define HAVE_STATX true +#elif __linux__ +# include <linux/stat.h> +# include <sys/syscall.h> +# include <unistd.h> +#endif + +#if HAVE_STATX || defined(__NR_statx) +# define HAVE_BFS_STATX true +#endif + +#if __APPLE__ +# define st_atim st_atimespec +# define st_ctim st_ctimespec +# define st_mtim st_mtimespec +# define st_birthtim st_birthtimespec +#endif + +const char *bfs_stat_field_name(enum bfs_stat_field field) { + switch (field) { + case BFS_STAT_DEV: + return "device number"; + case BFS_STAT_INO: + return "inode nunmber"; + case BFS_STAT_TYPE: + return "type"; + case BFS_STAT_MODE: + return "mode"; + case BFS_STAT_NLINK: + return "link count"; + case BFS_STAT_GID: + return "group ID"; + case BFS_STAT_UID: + return "user ID"; + case BFS_STAT_SIZE: + return "size"; + case BFS_STAT_BLOCKS: + return "block count"; + case BFS_STAT_RDEV: + return "underlying device"; + case BFS_STAT_ATTRS: + return "attributes"; + case BFS_STAT_ATIME: + return "access time"; + case BFS_STAT_BTIME: + return "birth time"; + case BFS_STAT_CTIME: + return "change time"; + case BFS_STAT_MTIME: + return "modification time"; + } + + assert(!"Unrecognized stat field"); + return "???"; +} + +/** + * Check if we should retry a failed stat() due to a potentially broken link. + */ +static bool bfs_stat_retry(int ret, enum bfs_stat_flags flags) { + return ret != 0 + && (flags & (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW)) == BFS_STAT_TRYFOLLOW + && is_nonexistence_error(errno); +} + +/** + * Convert a struct stat to a struct bfs_stat. + */ +static void bfs_stat_convert(const struct stat *statbuf, struct bfs_stat *buf) { + buf->mask = 0; + + buf->dev = statbuf->st_dev; + buf->mask |= BFS_STAT_DEV; + + buf->ino = statbuf->st_ino; + buf->mask |= BFS_STAT_INO; + + buf->mode = statbuf->st_mode; + buf->mask |= BFS_STAT_TYPE | BFS_STAT_MODE; + + buf->nlink = statbuf->st_nlink; + buf->mask |= BFS_STAT_NLINK; + + buf->gid = statbuf->st_gid; + buf->mask |= BFS_STAT_GID; + + buf->uid = statbuf->st_uid; + buf->mask |= BFS_STAT_UID; + + buf->size = statbuf->st_size; + buf->mask |= BFS_STAT_SIZE; + + buf->blocks = statbuf->st_blocks; + buf->mask |= BFS_STAT_BLOCKS; + + buf->rdev = statbuf->st_rdev; + buf->mask |= BFS_STAT_RDEV; + +#if BSD + buf->attrs = statbuf->st_flags; + buf->mask |= BFS_STAT_ATTRS; +#endif + + buf->atime = statbuf->st_atim; + buf->mask |= BFS_STAT_ATIME; + + buf->ctime = statbuf->st_ctim; + buf->mask |= BFS_STAT_CTIME; + + buf->mtime = statbuf->st_mtim; + buf->mask |= BFS_STAT_MTIME; + +#if __APPLE__ || __FreeBSD__ || __NetBSD__ + buf->btime = statbuf->st_birthtim; + buf->mask |= BFS_STAT_BTIME; +#endif +} + +/** + * bfs_stat() implementation backed by stat(). + */ +static int bfs_stat_impl(int at_fd, const char *at_path, int at_flags, enum bfs_stat_flags flags, struct bfs_stat *buf) { + struct stat statbuf; + int ret = fstatat(at_fd, at_path, &statbuf, at_flags); + + if (bfs_stat_retry(ret, flags)) { + at_flags |= AT_SYMLINK_NOFOLLOW; + ret = fstatat(at_fd, at_path, &statbuf, at_flags); + } + + if (ret == 0) { + bfs_stat_convert(&statbuf, buf); + } + + return ret; +} + +#if HAVE_BFS_STATX + +/** + * Wrapper for the statx() system call, which had no glibc wrapper prior to 2.28. + */ +static int bfs_statx(int at_fd, const char *at_path, int at_flags, unsigned int mask, struct statx *buf) { +#if BFS_HAS_FEATURE(memory_sanitizer, false) + // -fsanitize=memory doesn't know about statx(), so tell it the memory + // got initialized + memset(buf, 0, sizeof(*buf)); +#endif + +#if HAVE_STATX + return statx(at_fd, at_path, at_flags, mask, buf); +#else + return syscall(__NR_statx, at_fd, at_path, at_flags, mask, buf); +#endif +} + +/** + * bfs_stat() implementation backed by statx(). + */ +static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, enum bfs_stat_flags flags, struct bfs_stat *buf) { + unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; + struct statx xbuf; + int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf); + + if (bfs_stat_retry(ret, flags)) { + at_flags |= AT_SYMLINK_NOFOLLOW; + ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf); + } + + if (ret != 0) { + return ret; + } + + // Callers shouldn't have to check anything except the times + const unsigned int guaranteed = STATX_BASIC_STATS ^ (STATX_ATIME | STATX_CTIME | STATX_MTIME); + if ((xbuf.stx_mask & guaranteed) != guaranteed) { + errno = ENOTSUP; + return -1; + } + + buf->mask = 0; + + buf->dev = bfs_makedev(xbuf.stx_dev_major, xbuf.stx_dev_minor); + buf->mask |= BFS_STAT_DEV; + + if (xbuf.stx_mask & STATX_INO) { + buf->ino = xbuf.stx_ino; + buf->mask |= BFS_STAT_INO; + } + + buf->mode = xbuf.stx_mode; + if (xbuf.stx_mask & STATX_TYPE) { + buf->mask |= BFS_STAT_TYPE; + } + if (xbuf.stx_mask & STATX_MODE) { + buf->mask |= BFS_STAT_MODE; + } + + if (xbuf.stx_mask & STATX_NLINK) { + buf->nlink = xbuf.stx_nlink; + buf->mask |= BFS_STAT_NLINK; + } + + if (xbuf.stx_mask & STATX_GID) { + buf->gid = xbuf.stx_gid; + buf->mask |= BFS_STAT_GID; + } + + if (xbuf.stx_mask & STATX_UID) { + buf->uid = xbuf.stx_uid; + buf->mask |= BFS_STAT_UID; + } + + if (xbuf.stx_mask & STATX_SIZE) { + buf->size = xbuf.stx_size; + buf->mask |= BFS_STAT_SIZE; + } + + if (xbuf.stx_mask & STATX_BLOCKS) { + buf->blocks = xbuf.stx_blocks; + buf->mask |= BFS_STAT_BLOCKS; + } + + buf->rdev = bfs_makedev(xbuf.stx_rdev_major, xbuf.stx_rdev_minor); + buf->mask |= BFS_STAT_RDEV; + + buf->attrs = xbuf.stx_attributes; + buf->mask |= BFS_STAT_ATTRS; + + if (xbuf.stx_mask & STATX_ATIME) { + buf->atime.tv_sec = xbuf.stx_atime.tv_sec; + buf->atime.tv_nsec = xbuf.stx_atime.tv_nsec; + buf->mask |= BFS_STAT_ATIME; + } + + if (xbuf.stx_mask & STATX_BTIME) { + buf->btime.tv_sec = xbuf.stx_btime.tv_sec; + buf->btime.tv_nsec = xbuf.stx_btime.tv_nsec; + buf->mask |= BFS_STAT_BTIME; + } + + if (xbuf.stx_mask & STATX_CTIME) { + buf->ctime.tv_sec = xbuf.stx_ctime.tv_sec; + buf->ctime.tv_nsec = xbuf.stx_ctime.tv_nsec; + buf->mask |= BFS_STAT_CTIME; + } + + if (xbuf.stx_mask & STATX_MTIME) { + buf->mtime.tv_sec = xbuf.stx_mtime.tv_sec; + buf->mtime.tv_nsec = xbuf.stx_mtime.tv_nsec; + buf->mask |= BFS_STAT_MTIME; + } + + return ret; +} + +#endif // HAVE_BFS_STATX + +/** + * Allows calling stat with custom at_flags. + */ +static int bfs_stat_explicit(int at_fd, const char *at_path, int at_flags, enum bfs_stat_flags flags, struct bfs_stat *buf) { +#if HAVE_BFS_STATX + static bool has_statx = true; + + if (has_statx) { + int ret = bfs_statx_impl(at_fd, at_path, at_flags, flags, buf); + // EPERM is commonly returned in a seccomp() sandbox that does + // not allow statx() + if (ret != 0 && (errno == ENOSYS || errno == EPERM)) { + has_statx = false; + } else { + return ret; + } + } +#endif + + return bfs_stat_impl(at_fd, at_path, at_flags, flags, buf); +} + +int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf) { + int at_flags = 0; + if (flags & BFS_STAT_NOFOLLOW) { + at_flags |= AT_SYMLINK_NOFOLLOW; + } + +#ifdef AT_STATX_DONT_SYNC + if (flags & BFS_STAT_NOSYNC) { + at_flags |= AT_STATX_DONT_SYNC; + } +#endif + + if (at_path) { + return bfs_stat_explicit(at_fd, at_path, at_flags, flags, buf); + } + + // Check __GNU__ to work around https://lists.gnu.org/archive/html/bug-hurd/2021-12/msg00001.html +#if defined(AT_EMPTY_PATH) && !__GNU__ + static bool has_at_ep = true; + if (has_at_ep) { + at_flags |= AT_EMPTY_PATH; + int ret = bfs_stat_explicit(at_fd, "", at_flags, flags, buf); + if (ret != 0 && errno == EINVAL) { + has_at_ep = false; + } else { + return ret; + } + } +#endif + + struct stat statbuf; + if (fstat(at_fd, &statbuf) == 0) { + bfs_stat_convert(&statbuf, buf); + return 0; + } else { + return -1; + } +} + +const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field) { + if (!(buf->mask & field)) { + errno = ENOTSUP; + return NULL; + } + + switch (field) { + case BFS_STAT_ATIME: + return &buf->atime; + case BFS_STAT_BTIME: + return &buf->btime; + case BFS_STAT_CTIME: + return &buf->ctime; + case BFS_STAT_MTIME: + return &buf->mtime; + default: + assert(!"Invalid stat field for time"); + errno = EINVAL; + return NULL; + } +} + +void bfs_stat_id(const struct bfs_stat *buf, bfs_file_id *id) { + memcpy(*id, &buf->dev, sizeof(buf->dev)); + memcpy(*id + sizeof(buf->dev), &buf->ino, sizeof(buf->ino)); +} diff --git a/src/stat.h b/src/stat.h new file mode 100644 index 0000000..55c75e9 --- /dev/null +++ b/src/stat.h @@ -0,0 +1,155 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2018-2019 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A facade over the stat() API that unifies some details that diverge between + * implementations, like the names of the timespec fields and the presence of + * file "birth" times. On new enough Linux kernels, the facade is backed by + * statx() instead, and so it exposes a similar interface with a mask for which + * fields were successfully returned. + */ + +#ifndef BFS_STAT_H +#define BFS_STAT_H + +#include "util.h" +#include <sys/types.h> +#include <time.h> + +#if BFS_HAS_SYS_PARAM +# include <sys/param.h> +#endif + +/** + * bfs_stat field bitmask. + */ +enum bfs_stat_field { + BFS_STAT_DEV = 1 << 0, + BFS_STAT_INO = 1 << 1, + BFS_STAT_TYPE = 1 << 2, + BFS_STAT_MODE = 1 << 3, + BFS_STAT_NLINK = 1 << 4, + BFS_STAT_GID = 1 << 5, + BFS_STAT_UID = 1 << 6, + BFS_STAT_SIZE = 1 << 7, + BFS_STAT_BLOCKS = 1 << 8, + BFS_STAT_RDEV = 1 << 9, + BFS_STAT_ATTRS = 1 << 10, + BFS_STAT_ATIME = 1 << 11, + BFS_STAT_BTIME = 1 << 12, + BFS_STAT_CTIME = 1 << 13, + BFS_STAT_MTIME = 1 << 14, +}; + +/** + * Get the human-readable name of a bfs_stat field. + */ +const char *bfs_stat_field_name(enum bfs_stat_field field); + +/** + * bfs_stat() flags. + */ +enum bfs_stat_flags { + /** Follow symlinks (the default). */ + BFS_STAT_FOLLOW = 0, + /** Never follow symlinks. */ + BFS_STAT_NOFOLLOW = 1 << 0, + /** Try to follow symlinks, but fall back to the link itself if broken. */ + BFS_STAT_TRYFOLLOW = 1 << 1, + /** Try to use cached values without synchronizing remote filesystems. */ + BFS_STAT_NOSYNC = 1 << 2, +}; + +#ifdef DEV_BSIZE +# define BFS_STAT_BLKSIZE DEV_BSIZE +#elif defined(S_BLKSIZE) +# define BFS_STAT_BLKSIZE S_BLKSIZE +#else +# define BFS_STAT_BLKSIZE 512 +#endif + +/** + * Facade over struct stat. + */ +struct bfs_stat { + /** Bitmask indicating filled fields. */ + enum bfs_stat_field mask; + + /** Device ID containing the file. */ + dev_t dev; + /** Inode number. */ + ino_t ino; + /** File type and access mode. */ + mode_t mode; + /** Number of hard links. */ + nlink_t nlink; + /** Owner group ID. */ + gid_t gid; + /** Owner user ID. */ + uid_t uid; + /** File size in bytes. */ + off_t size; + /** Number of disk blocks allocated (of size BFS_STAT_BLKSIZE). */ + blkcnt_t blocks; + /** The device ID represented by this file. */ + dev_t rdev; + + /** Attributes/flags set on the file. */ + unsigned long long attrs; + + /** Access time. */ + struct timespec atime; + /** Birth/creation time. */ + struct timespec btime; + /** Status change time. */ + struct timespec ctime; + /** Modification time. */ + struct timespec mtime; +}; + +/** + * Facade over fstatat(). + * + * @param at_fd + * The base file descriptor for the lookup. + * @param at_path + * The path to stat, relative to at_fd. Pass NULL to fstat() at_fd + * itself. + * @param flags + * Flags that affect the lookup. + * @param[out] buf + * A place to store the stat buffer, if successful. + * @return + * 0 on success, -1 on error. + */ +int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf); + +/** + * Get a particular time field from a bfs_stat() buffer. + */ +const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field); + +/** + * A unique ID for a file. + */ +typedef unsigned char bfs_file_id[sizeof(dev_t) + sizeof(ino_t)]; + +/** + * Compute a unique ID for a file. + */ +void bfs_stat_id(const struct bfs_stat *buf, bfs_file_id *id); + +#endif // BFS_STAT_H diff --git a/src/trie.c b/src/trie.c new file mode 100644 index 0000000..bae9acb --- /dev/null +++ b/src/trie.c @@ -0,0 +1,693 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * This is an implementation of a "qp trie," as documented at + * https://dotat.at/prog/qp/README.html + * + * An uncompressed trie over the dataset {AAAA, AADD, ABCD, DDAA, DDDD} would + * look like + * + * A A A A + * *--->*--->*--->*--->$ + * | | | D D + * | | +--->*--->$ + * | | B C D + * | +--->*--->*--->$ + * | D D A A + * +--->*--->*--->*--->$ + * | D D + * +--->*--->$ + * + * A compressed (PATRICIA) trie collapses internal nodes that have only a single + * child, like this: + * + * A A AA + * *--->*--->*---->$ + * | | | DD + * | | +---->$ + * | | BCD + * | +----->$ + * | DD AA + * +---->*---->$ + * | DD + * +---->$ + * + * The nodes can be compressed further by dropping the actual compressed + * sequences from the nodes, storing it only in the leaves. This is the + * technique applied in QP tries, and the crit-bit trees that inspired them + * (https://cr.yp.to/critbit.html). Only the index to test, and the values to + * branch on, need to be stored in each node. + * + * A A A + * 0--->1--->2--->AAAA + * | | | D + * | | +--->AADD + * | | B + * | +--->ABCD + * | D A + * +--->2--->DDAA + * | D + * +--->DDDD + * + * Nodes are represented very compactly. Rather than a dense array of children, + * a sparse array of only the non-NULL children directly follows the node in + * memory. A bitmap is used to track which children exist; the index of a child + * i is found by counting the number of bits below bit i that are set. A tag + * bit is used to tell pointers to internal nodes apart from pointers to leaves. + * + * This implementation tests a whole nibble (half byte/hex digit) at every + * branch, so the bitmap takes up 16 bits. The remainder of a machine word is + * used to hold the offset, which severely constrains its range on 32-bit + * platforms. As a workaround, we store relative instead of absolute offsets, + * and insert intermediate singleton "jump" nodes when necessary. + */ + +#include "trie.h" +#include "util.h" +#include <assert.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#if CHAR_BIT != 8 +# error "This trie implementation assumes 8-bit bytes." +#endif + +/** Number of bits for the sparse array bitmap, aka the range of a nibble. */ +#define BITMAP_BITS 16 +/** The number of remaining bits in a word, to hold the offset. */ +#define OFFSET_BITS (sizeof(size_t)*CHAR_BIT - BITMAP_BITS) +/** The highest representable offset (only 64k on a 32-bit architecture). */ +#define OFFSET_MAX (((size_t)1 << OFFSET_BITS) - 1) + +/** + * An internal node of the trie. + */ +struct trie_node { + /** + * A bitmap that hold which indices exist in the sparse children array. + * Bit i will be set if a child exists at logical index i, and its index + * into the array will be popcount(bitmap & ((1 << i) - 1)). + */ + size_t bitmap : BITMAP_BITS; + + /** + * The offset into the key in nibbles. This is relative to the parent + * node, to support offsets larger than OFFSET_MAX. + */ + size_t offset : OFFSET_BITS; + + /** + * Flexible array of children. Each pointer uses the lowest bit as a + * tag to distinguish internal nodes from leaves. This is safe as long + * as all dynamic allocations are aligned to more than a single byte. + */ + uintptr_t children[]; +}; + +/** Check if an encoded pointer is to a leaf. */ +static bool trie_is_leaf(uintptr_t ptr) { + return ptr & 1; +} + +/** Decode a pointer to a leaf. */ +static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { + assert(trie_is_leaf(ptr)); + return (struct trie_leaf *)(ptr ^ 1); +} + +/** Encode a pointer to a leaf. */ +static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { + uintptr_t ptr = (uintptr_t)leaf ^ 1; + assert(trie_is_leaf(ptr)); + return ptr; +} + +/** Decode a pointer to an internal node. */ +static struct trie_node *trie_decode_node(uintptr_t ptr) { + assert(!trie_is_leaf(ptr)); + return (struct trie_node *)ptr; +} + +/** Encode a pointer to an internal node. */ +static uintptr_t trie_encode_node(const struct trie_node *node) { + uintptr_t ptr = (uintptr_t)node; + assert(!trie_is_leaf(ptr)); + return ptr; +} + +void trie_init(struct trie *trie) { + trie->root = 0; +} + +/** Compute the popcount (Hamming weight) of a bitmap. */ +static unsigned int trie_popcount(unsigned int n) { +#if __POPCNT__ + // Use the x86 instruction if we have it. Otherwise, GCC generates a + // library call, so use the below implementation instead. + return __builtin_popcount(n); +#else + // See https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation + n -= (n >> 1) & 0x5555; + n = (n & 0x3333) + ((n >> 2) & 0x3333); + n = (n + (n >> 4)) & 0x0F0F; + n = (n + (n >> 8)) & 0xFF; + return n; +#endif +} + +/** Extract the nibble at a certain offset from a byte sequence. */ +static unsigned char trie_key_nibble(const void *key, size_t offset) { + const unsigned char *bytes = key; + size_t byte = offset >> 1; + + // A branchless version of + // if (offset & 1) { + // return bytes[byte] >> 4; + // } else { + // return bytes[byte] & 0xF; + // } + unsigned int shift = (offset & 1) << 2; + return (bytes[byte] >> shift) & 0xF; +} + +/** + * Finds a leaf in the trie that matches the key at every branch. If the key + * exists in the trie, the representative will match the searched key. But + * since only branch points are tested, it can be different from the key. In + * that case, the first mismatch between the key and the representative will be + * the depth at which to make a new branch to insert the key. + */ +static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) { + uintptr_t ptr = trie->root; + if (!ptr) { + return NULL; + } + + size_t offset = 0; + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + offset += node->offset; + + unsigned int index = 0; + if ((offset >> 1) < length) { + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + if (node->bitmap & bit) { + index = trie_popcount(node->bitmap & (bit - 1)); + } + } + ptr = node->children[index]; + } + + return trie_decode_leaf(ptr); +} + +struct trie_leaf *trie_first_leaf(const struct trie *trie) { + return trie_representative(trie, NULL, 0); +} + +struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) { + return trie_find_mem(trie, key, strlen(key) + 1); +} + +struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) { + struct trie_leaf *rep = trie_representative(trie, key, length); + if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) { + return rep; + } else { + return NULL; + } +} + +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { + size_t length = strlen(key); + struct trie_leaf *rep = trie_representative(trie, key, length + 1); + if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) { + return rep; + } else { + return NULL; + } +} + +/** + * Find a leaf that may end at the current node. + */ +static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) { + // Finding a terminating NUL byte may take two nibbles + for (int i = 0; i < 2; ++i) { + if (!(node->bitmap & 1)) { + break; + } + + uintptr_t ptr = node->children[0]; + if (trie_is_leaf(ptr)) { + return trie_decode_leaf(ptr); + } else { + node = trie_decode_node(ptr); + } + } + + return NULL; +} + +/** Check if a leaf is a prefix of a search key. */ +static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) { + if (leaf && leaf->length <= length) { + return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0; + } else { + return false; + } +} + +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { + uintptr_t ptr = trie->root; + if (!ptr) { + return NULL; + } + + struct trie_leaf *best = NULL; + size_t skip = 0; + size_t length = strlen(key) + 1; + + size_t offset = 0; + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + offset += node->offset; + if ((offset >> 1) >= length) { + return best; + } + + struct trie_leaf *leaf = trie_terminal_leaf(node); + if (trie_check_prefix(leaf, skip, key, length)) { + best = leaf; + skip = offset >> 1; + } + + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + if (node->bitmap & bit) { + unsigned int index = trie_popcount(node->bitmap & (bit - 1)); + ptr = node->children[index]; + } else { + return best; + } + } + + struct trie_leaf *leaf = trie_decode_leaf(ptr); + if (trie_check_prefix(leaf, skip, key, length)) { + best = leaf; + } + + return best; +} + +/** Create a new leaf, holding a copy of the given key. */ +static struct trie_leaf *new_trie_leaf(const void *key, size_t length) { + struct trie_leaf *leaf = malloc(BFS_FLEX_SIZEOF(struct trie_leaf, key, length)); + if (leaf) { + leaf->value = NULL; + leaf->length = length; + memcpy(leaf->key, key, length); + } + return leaf; +} + +/** Compute the size of a trie node with a certain number of children. */ +static size_t trie_node_size(unsigned int size) { + // Empty nodes aren't supported + assert(size > 0); + // Node size must be a power of two + assert((size & (size - 1)) == 0); + + return BFS_FLEX_SIZEOF(struct trie_node, children, size); +} + +/** Find the offset of the first nibble that differs between two keys. */ +static size_t trie_key_mismatch(const void *key1, const void *key2, size_t length) { + const unsigned char *bytes1 = key1; + const unsigned char *bytes2 = key2; + size_t i = 0; + size_t offset = 0; + const size_t chunk = sizeof(size_t); + + for (; i + chunk <= length; i += chunk) { + if (memcmp(bytes1 + i, bytes2 + i, chunk) != 0) { + break; + } + } + + for (; i < length; ++i) { + unsigned char b1 = bytes1[i], b2 = bytes2[i]; + if (b1 != b2) { + offset = (b1 & 0xF) == (b2 & 0xF); + break; + } + } + + offset |= i << 1; + return offset; +} + +/** + * Insert a key into a node. The node must not have a child in that position + * already. Effectively takes a subtrie like this: + * + * ptr + * | + * v X + * *--->... + * | Z + * +--->... + * + * and transforms it to: + * + * ptr + * | + * v X + * *--->... + * | Y + * +--->key + * | Z + * +--->... + */ +static struct trie_leaf *trie_node_insert(uintptr_t *ptr, const void *key, size_t length, size_t offset) { + struct trie_node *node = trie_decode_node(*ptr); + unsigned int size = trie_popcount(node->bitmap); + + // Double the capacity every power of two + if ((size & (size - 1)) == 0) { + node = realloc(node, trie_node_size(2*size)); + if (!node) { + return NULL; + } + *ptr = trie_encode_node(node); + } + + struct trie_leaf *leaf = new_trie_leaf(key, length); + if (!leaf) { + return NULL; + } + + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + + // The child must not already be present + assert(!(node->bitmap & bit)); + node->bitmap |= bit; + + unsigned int index = trie_popcount(node->bitmap & (bit - 1)); + uintptr_t *child = &node->children[index]; + if (index < size) { + memmove(child + 1, child, (size - index)*sizeof(*child)); + } + *child = trie_encode_leaf(leaf); + return leaf; +} + +/** + * When the current offset exceeds OFFSET_MAX, insert "jump" nodes that bridge + * the gap. This function takes a subtrie like this: + * + * ptr + * | + * v + * *--->rep + * + * and changes it to: + * + * ptr ret + * | | + * v v + * *--->*--->rep + * + * so that a new key can be inserted like: + * + * ptr ret + * | | + * v v X + * *--->*--->rep + * | Y + * +--->key + */ +static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t *offset) { + // We only ever need to jump to leaf nodes, since internal nodes are + // guaranteed to be within OFFSET_MAX anyway + assert(trie_is_leaf(*ptr)); + + struct trie_node *node = malloc(trie_node_size(1)); + if (!node) { + return NULL; + } + + *offset += OFFSET_MAX; + node->offset = OFFSET_MAX; + + unsigned char nibble = trie_key_nibble(key, *offset); + node->bitmap = 1 << nibble; + + node->children[0] = *ptr; + *ptr = trie_encode_node(node); + return node->children; +} + +/** + * Split a node in the trie. Changes a subtrie like this: + * + * ptr + * | + * v + * *...>--->rep + * + * into this: + * + * ptr + * | + * v X + * *--->*...>--->rep + * | Y + * +--->key + */ +static struct trie_leaf *trie_split(uintptr_t *ptr, const void *key, size_t length, struct trie_leaf *rep, size_t offset, size_t mismatch) { + unsigned char key_nibble = trie_key_nibble(key, mismatch); + unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch); + assert(key_nibble != rep_nibble); + + struct trie_node *node = malloc(trie_node_size(2)); + if (!node) { + return NULL; + } + + struct trie_leaf *leaf = new_trie_leaf(key, length); + if (!leaf) { + free(node); + return NULL; + } + + node->bitmap = (1 << key_nibble) | (1 << rep_nibble); + + size_t delta = mismatch - offset; + if (!trie_is_leaf(*ptr)) { + struct trie_node *child = trie_decode_node(*ptr); + child->offset -= delta; + } + node->offset = delta; + + unsigned int key_index = key_nibble > rep_nibble; + node->children[key_index] = trie_encode_leaf(leaf); + node->children[key_index ^ 1] = *ptr; + *ptr = trie_encode_node(node); + return leaf; +} + +struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) { + return trie_insert_mem(trie, key, strlen(key) + 1); +} + +struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) { + struct trie_leaf *rep = trie_representative(trie, key, length); + if (!rep) { + struct trie_leaf *leaf = new_trie_leaf(key, length); + if (leaf) { + trie->root = trie_encode_leaf(leaf); + } + return leaf; + } + + size_t limit = length < rep->length ? length : rep->length; + size_t mismatch = trie_key_mismatch(key, rep->key, limit); + if ((mismatch >> 1) >= length) { + return rep; + } + + size_t offset = 0; + uintptr_t *ptr = &trie->root; + while (!trie_is_leaf(*ptr)) { + struct trie_node *node = trie_decode_node(*ptr); + if (offset + node->offset > mismatch) { + break; + } + offset += node->offset; + + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + if (node->bitmap & bit) { + assert(offset < mismatch); + unsigned int index = trie_popcount(node->bitmap & (bit - 1)); + ptr = &node->children[index]; + } else { + assert(offset == mismatch); + return trie_node_insert(ptr, key, length, offset); + } + } + + while (mismatch - offset > OFFSET_MAX) { + ptr = trie_jump(ptr, key, &offset); + if (!ptr) { + return NULL; + } + } + + return trie_split(ptr, key, length, rep, offset, mismatch); +} + +/** Free a chain of singleton nodes. */ +static void trie_free_singletons(uintptr_t ptr) { + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + + // Make sure the bitmap is a power of two, i.e. it has just one child + assert((node->bitmap & (node->bitmap - 1)) == 0); + + ptr = node->children[0]; + free(node); + } + + free(trie_decode_leaf(ptr)); +} + +/** + * Try to collapse a two-child node like: + * + * parent child + * | | + * v v + * *----->*----->*----->leaf + * | + * +----->other + * + * into + * + * parent + * | + * v + * other + */ +static int trie_collapse_node(uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) { + uintptr_t other = parent_node->children[child_index ^ 1]; + if (!trie_is_leaf(other)) { + struct trie_node *other_node = trie_decode_node(other); + if (other_node->offset + parent_node->offset <= OFFSET_MAX) { + other_node->offset += parent_node->offset; + } else { + return -1; + } + } + + *parent = other; + free(parent_node); + return 0; +} + +void trie_remove(struct trie *trie, struct trie_leaf *leaf) { + uintptr_t *child = &trie->root; + uintptr_t *parent = NULL; + unsigned int child_bit = 0, child_index = 0; + size_t offset = 0; + while (!trie_is_leaf(*child)) { + struct trie_node *node = trie_decode_node(*child); + offset += node->offset; + assert((offset >> 1) < leaf->length); + + unsigned char nibble = trie_key_nibble(leaf->key, offset); + unsigned int bit = 1U << nibble; + unsigned int bitmap = node->bitmap; + assert(bitmap & bit); + unsigned int index = trie_popcount(bitmap & (bit - 1)); + + // Advance the parent pointer, unless this node had only one child + if (bitmap & (bitmap - 1)) { + parent = child; + child_bit = bit; + child_index = index; + } + + child = &node->children[index]; + } + + assert(trie_decode_leaf(*child) == leaf); + + if (!parent) { + trie_free_singletons(trie->root); + trie->root = 0; + return; + } + + struct trie_node *node = trie_decode_node(*parent); + child = node->children + child_index; + trie_free_singletons(*child); + + node->bitmap ^= child_bit; + unsigned int parent_size = trie_popcount(node->bitmap); + assert(parent_size > 0); + if (parent_size == 1 && trie_collapse_node(parent, node, child_index) == 0) { + return; + } + + if (child_index < parent_size) { + memmove(child, child + 1, (parent_size - child_index)*sizeof(*child)); + } + + if ((parent_size & (parent_size - 1)) == 0) { + node = realloc(node, trie_node_size(parent_size)); + if (node) { + *parent = trie_encode_node(node); + } + } +} + +/** Free an encoded pointer to a node. */ +static void free_trie_ptr(uintptr_t ptr) { + if (trie_is_leaf(ptr)) { + free(trie_decode_leaf(ptr)); + } else { + struct trie_node *node = trie_decode_node(ptr); + size_t size = trie_popcount(node->bitmap); + for (size_t i = 0; i < size; ++i) { + free_trie_ptr(node->children[i]); + } + free(node); + } +} + +void trie_destroy(struct trie *trie) { + if (trie->root) { + free_trie_ptr(trie->root); + } +} diff --git a/src/trie.h b/src/trie.h new file mode 100644 index 0000000..2d29ac7 --- /dev/null +++ b/src/trie.h @@ -0,0 +1,156 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2019 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#ifndef BFS_TRIE_H +#define BFS_TRIE_H + +#include <stddef.h> +#include <stdint.h> + +/** + * A trie that holds a set of fixed- or variable-length strings. + */ +struct trie { + uintptr_t root; +}; + +/** + * A leaf of a trie. + */ +struct trie_leaf { + /** + * An arbitrary value associated with this leaf. + */ + void *value; + + /** + * The length of the key in bytes. + */ + size_t length; + + /** + * The key itself, stored inline. + */ + char key[]; +}; + +/** + * Initialize an empty trie. + */ +void trie_init(struct trie *trie); + +/** + * Get the first (lexicographically earliest) leaf in the trie. + * + * @param trie + * The trie to search. + * @return + * The first leaf, or NULL if the trie is empty. + */ +struct trie_leaf *trie_first_leaf(const struct trie *trie); + +/** + * Find the leaf for a string key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * The found leaf, or NULL if the key is not present. + */ +struct trie_leaf *trie_find_str(const struct trie *trie, const char *key); + +/** + * Find the leaf for a fixed-size key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @param length + * The length of the key in bytes. + * @return + * The found leaf, or NULL if the key is not present. + */ +struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length); + +/** + * Find the shortest leaf that starts with a given key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * A leaf that starts with the given key, or NULL. + */ +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key); + +/** + * Find the leaf that is the longest prefix of the given key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * The longest prefix match for the given key, or NULL. + */ +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key); + +/** + * Insert a string key into the trie. + * + * @param trie + * The trie to modify. + * @param key + * The key to insert. + * @return + * The inserted leaf, or NULL on failure. + */ +struct trie_leaf *trie_insert_str(struct trie *trie, const char *key); + +/** + * Insert a fixed-size key into the trie. + * + * @param trie + * The trie to modify. + * @param key + * The key to insert. + * @param length + * The length of the key in bytes. + * @return + * The inserted leaf, or NULL on failure. + */ +struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length); + +/** + * Remove a leaf from a trie. + * + * @param trie + * The trie to modify. + * @param leaf + * The leaf to remove. + */ +void trie_remove(struct trie *trie, struct trie_leaf *leaf); + +/** + * Destroy a trie and its contents. + */ +void trie_destroy(struct trie *trie); + +#endif // BFS_TRIE_H diff --git a/src/typo.c b/src/typo.c new file mode 100644 index 0000000..4012730 --- /dev/null +++ b/src/typo.c @@ -0,0 +1,176 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2016 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "typo.h" +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +// Assume QWERTY layout for now +static const int key_coords[UCHAR_MAX][3] = { + ['`'] = { 0, 0, 0}, + ['~'] = { 0, 0, 1}, + ['1'] = { 3, 0, 0}, + ['!'] = { 3, 0, 1}, + ['2'] = { 6, 0, 0}, + ['@'] = { 6, 0, 1}, + ['3'] = { 9, 0, 0}, + ['#'] = { 9, 0, 1}, + ['4'] = {12, 0, 0}, + ['$'] = {12, 0, 1}, + ['5'] = {15, 0, 0}, + ['%'] = {15, 0, 1}, + ['6'] = {18, 0, 0}, + ['^'] = {18, 0, 1}, + ['7'] = {21, 0, 0}, + ['&'] = {21, 0, 1}, + ['8'] = {24, 0, 0}, + ['*'] = {24, 0, 1}, + ['9'] = {27, 0, 0}, + ['('] = {27, 0, 1}, + ['0'] = {30, 0, 0}, + [')'] = {30, 0, 1}, + ['-'] = {33, 0, 0}, + ['_'] = {33, 0, 1}, + ['='] = {36, 0, 0}, + ['+'] = {36, 0, 1}, + + ['\t'] = { 1, 3, 0}, + ['q'] = { 4, 3, 0}, + ['Q'] = { 4, 3, 1}, + ['w'] = { 7, 3, 0}, + ['W'] = { 7, 3, 1}, + ['e'] = {10, 3, 0}, + ['E'] = {10, 3, 1}, + ['r'] = {13, 3, 0}, + ['R'] = {13, 3, 1}, + ['t'] = {16, 3, 0}, + ['T'] = {16, 3, 1}, + ['y'] = {19, 3, 0}, + ['Y'] = {19, 3, 1}, + ['u'] = {22, 3, 0}, + ['U'] = {22, 3, 1}, + ['i'] = {25, 3, 0}, + ['I'] = {25, 3, 1}, + ['o'] = {28, 3, 0}, + ['O'] = {28, 3, 1}, + ['p'] = {31, 3, 0}, + ['P'] = {31, 3, 1}, + ['['] = {34, 3, 0}, + ['{'] = {34, 3, 1}, + [']'] = {37, 3, 0}, + ['}'] = {37, 3, 1}, + ['\\'] = {40, 3, 0}, + ['|'] = {40, 3, 1}, + + ['a'] = { 5, 6, 0}, + ['A'] = { 5, 6, 1}, + ['s'] = { 8, 6, 0}, + ['S'] = { 8, 6, 1}, + ['d'] = {11, 6, 0}, + ['D'] = {11, 6, 1}, + ['f'] = {14, 6, 0}, + ['F'] = {14, 6, 1}, + ['g'] = {17, 6, 0}, + ['G'] = {17, 6, 1}, + ['h'] = {20, 6, 0}, + ['H'] = {20, 6, 1}, + ['j'] = {23, 6, 0}, + ['J'] = {23, 6, 1}, + ['k'] = {26, 6, 0}, + ['K'] = {26, 6, 1}, + ['l'] = {29, 6, 0}, + ['L'] = {29, 6, 1}, + [';'] = {32, 6, 0}, + [':'] = {32, 6, 1}, + ['\''] = {35, 6, 0}, + ['"'] = {35, 6, 1}, + ['\n'] = {38, 6, 0}, + + ['z'] = { 6, 9, 0}, + ['Z'] = { 6, 9, 1}, + ['x'] = { 9, 9, 0}, + ['X'] = { 9, 9, 1}, + ['c'] = {12, 9, 0}, + ['C'] = {12, 9, 1}, + ['v'] = {15, 9, 0}, + ['V'] = {15, 9, 1}, + ['b'] = {18, 9, 0}, + ['B'] = {18, 9, 1}, + ['n'] = {21, 9, 0}, + ['N'] = {21, 9, 1}, + ['m'] = {24, 9, 0}, + ['M'] = {24, 9, 1}, + [','] = {27, 9, 0}, + ['<'] = {27, 9, 1}, + ['.'] = {30, 9, 0}, + ['>'] = {30, 9, 1}, + ['/'] = {33, 9, 0}, + ['?'] = {33, 9, 1}, + + [' '] = {18, 12, 0}, +}; + +static int char_distance(char a, char b) { + const int *ac = key_coords[(unsigned char)a], *bc = key_coords[(unsigned char)b]; + int ret = 0; + for (int i = 0; i < 3; ++i) { + ret += abs(ac[i] - bc[i]); + } + return ret; +} + +int typo_distance(const char *actual, const char *expected) { + // This is the Wagner-Fischer algorithm for Levenshtein distance, using + // Manhattan distance on the keyboard for individual characters. + + const int insert_cost = 12; + + size_t rows = strlen(actual) + 1; + size_t cols = strlen(expected) + 1; + + int arr0[cols], arr1[cols]; + int *row0 = arr0, *row1 = arr1; + + for (size_t j = 0; j < cols; ++j) { + row0[j] = insert_cost * j; + } + + for (size_t i = 1; i < rows; ++i) { + row1[0] = row0[0] + insert_cost; + + char a = actual[i - 1]; + for (size_t j = 1; j < cols; ++j) { + char b = expected[j - 1]; + int cost = row0[j - 1] + char_distance(a, b); + int del_cost = row0[j] + insert_cost; + if (del_cost < cost) { + cost = del_cost; + } + int ins_cost = row1[j - 1] + insert_cost; + if (ins_cost < cost) { + cost = ins_cost; + } + row1[j] = cost; + } + + int *tmp = row0; + row0 = row1; + row1 = tmp; + } + + return row0[cols - 1]; +} diff --git a/src/typo.h b/src/typo.h new file mode 100644 index 0000000..0347aae --- /dev/null +++ b/src/typo.h @@ -0,0 +1,31 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2016 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#ifndef BFS_TYPO_H +#define BFS_TYPO_H + +/** + * Find the "typo" distance between two strings. + * + * @param actual + * The actual string typed by the user. + * @param expected + * The expected valid string. + * @return The distance between the two strings. + */ +int typo_distance(const char *actual, const char *expected); + +#endif // BFS_TYPO_H diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..a62e66c --- /dev/null +++ b/src/util.c @@ -0,0 +1,510 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2016-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "util.h" +#include "dstring.h" +#include "xregex.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <langinfo.h> +#include <nl_types.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <wchar.h> + +#if BFS_HAS_SYS_PARAM +# include <sys/param.h> +#endif + +#if BFS_HAS_SYS_SYSMACROS +# include <sys/sysmacros.h> +#elif BFS_HAS_SYS_MKDEV +# include <sys/mkdev.h> +#endif + +#if BFS_HAS_UTIL +# include <util.h> +#endif + +char *xreadlinkat(int fd, const char *path, size_t size) { + ssize_t len; + char *name = NULL; + + if (size == 0) { + size = 64; + } else { + ++size; // NUL terminator + } + + while (true) { + char *new_name = realloc(name, size); + if (!new_name) { + goto error; + } + name = new_name; + + len = readlinkat(fd, path, name, size); + if (len < 0) { + goto error; + } else if ((size_t)len >= size) { + size *= 2; + } else { + break; + } + } + + name[len] = '\0'; + return name; + +error: + free(name); + return NULL; +} + +int dup_cloexec(int fd) { +#ifdef F_DUPFD_CLOEXEC + return fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else + int ret = dup(fd); + if (ret < 0) { + return -1; + } + + if (fcntl(ret, F_SETFD, FD_CLOEXEC) == -1) { + close_quietly(ret); + return -1; + } + + return ret; +#endif +} + +int pipe_cloexec(int pipefd[2]) { +#if __linux__ || (BSD && !__APPLE__) + return pipe2(pipefd, O_CLOEXEC); +#else + if (pipe(pipefd) != 0) { + return -1; + } + + if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1 || fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1) { + close_quietly(pipefd[1]); + close_quietly(pipefd[0]); + return -1; + } + + return 0; +#endif +} + +/** Get the single character describing the given file type. */ +static char type_char(mode_t mode) { + switch (mode & S_IFMT) { + case S_IFREG: + return '-'; + case S_IFBLK: + return 'b'; + case S_IFCHR: + return 'c'; + case S_IFDIR: + return 'd'; + case S_IFLNK: + return 'l'; + case S_IFIFO: + return 'p'; + case S_IFSOCK: + return 's'; +#ifdef S_IFDOOR + case S_IFDOOR: + return 'D'; +#endif +#ifdef S_IFPORT + case S_IFPORT: + return 'P'; +#endif +#ifdef S_IFWHT + case S_IFWHT: + return 'w'; +#endif + } + + return '?'; +} + +void xstrmode(mode_t mode, char str[11]) { + strcpy(str, "----------"); + + str[0] = type_char(mode); + + if (mode & 00400) { + str[1] = 'r'; + } + if (mode & 00200) { + str[2] = 'w'; + } + if ((mode & 04100) == 04000) { + str[3] = 'S'; + } else if (mode & 04000) { + str[3] = 's'; + } else if (mode & 00100) { + str[3] = 'x'; + } + + if (mode & 00040) { + str[4] = 'r'; + } + if (mode & 00020) { + str[5] = 'w'; + } + if ((mode & 02010) == 02000) { + str[6] = 'S'; + } else if (mode & 02000) { + str[6] = 's'; + } else if (mode & 00010) { + str[6] = 'x'; + } + + if (mode & 00004) { + str[7] = 'r'; + } + if (mode & 00002) { + str[8] = 'w'; + } + if ((mode & 01001) == 01000) { + str[9] = 'T'; + } else if (mode & 01000) { + str[9] = 't'; + } else if (mode & 00001) { + str[9] = 'x'; + } +} + +const char *xbasename(const char *path) { + const char *i; + + // Skip trailing slashes + for (i = path + strlen(path); i > path && i[-1] == '/'; --i); + + // Find the beginning of the name + for (; i > path && i[-1] != '/'; --i); + + // Skip leading slashes + for (; i[0] == '/' && i[1]; ++i); + + return i; +} + +int xfaccessat(int fd, const char *path, int amode) { + int ret = faccessat(fd, path, amode, 0); + +#ifdef AT_EACCESS + // Some platforms, like Hurd, only support AT_EACCESS. Other platforms, + // like Android, don't support AT_EACCESS at all. + if (ret != 0 && (errno == EINVAL || errno == ENOTSUP)) { + ret = faccessat(fd, path, amode, AT_EACCESS); + } +#endif + + return ret; +} + +int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear) { +#if BSD && !__GNU__ + char *str_arg = (char *)*str; + unsigned long set_arg = 0; + unsigned long clear_arg = 0; + +#if __NetBSD__ + int ret = string_to_flags(&str_arg, &set_arg, &clear_arg); +#else + int ret = strtofflags(&str_arg, &set_arg, &clear_arg); +#endif + + *str = str_arg; + *set = set_arg; + *clear = clear_arg; + + if (ret != 0) { + errno = EINVAL; + } + return ret; +#else // !BSD + errno = ENOTSUP; + return -1; +#endif +} + +size_t xstrwidth(const char *str) { + size_t len = strlen(str); + size_t ret = 0; + + mbstate_t mb; + memset(&mb, 0, sizeof(mb)); + + while (len > 0) { + wchar_t wc; + size_t mblen = mbrtowc(&wc, str, len, &mb); + int cwidth; + if (mblen == (size_t)-1) { + // Invalid byte sequence, assume a single-width '?' + mblen = 1; + cwidth = 1; + memset(&mb, 0, sizeof(mb)); + } else if (mblen == (size_t)-2) { + // Incomplete byte sequence, assume a single-width '?' + mblen = len; + cwidth = 1; + } else { + cwidth = wcwidth(wc); + if (cwidth < 0) { + cwidth = 0; + } + } + + str += mblen; + len -= mblen; + ret += cwidth; + } + + return ret; +} + +bool is_nonexistence_error(int error) { + return error == ENOENT || errno == ENOTDIR; +} + +/** Compile and execute a regular expression for xrpmatch(). */ +static int xrpregex(nl_item item, const char *response) { + const char *pattern = nl_langinfo(item); + if (!pattern) { + return -1; + } + + struct bfs_regex *regex; + int ret = bfs_regcomp(®ex, pattern, BFS_REGEX_POSIX_EXTENDED, 0); + if (ret == 0) { + ret = bfs_regexec(regex, response, 0); + } + + bfs_regfree(regex); + return ret; +} + +/** Check if a response is affirmative or negative. */ +static int xrpmatch(const char *response) { + int ret = xrpregex(NOEXPR, response); + if (ret > 0) { + return 0; + } else if (ret < 0) { + return -1; + } + + ret = xrpregex(YESEXPR, response); + if (ret > 0) { + return 1; + } else if (ret < 0) { + return -1; + } + + // Failsafe: always handle y/n + char c = response[0]; + if (c == 'n' || c == 'N') { + return 0; + } else if (c == 'y' || c == 'Y') { + return 1; + } else { + return -1; + } +} + +int ynprompt(void) { + fflush(stderr); + + char *line = xgetdelim(stdin, '\n'); + int ret = line ? xrpmatch(line) : -1; + free(line); + return ret; +} + +dev_t bfs_makedev(int ma, int mi) { +#ifdef makedev + return makedev(ma, mi); +#else + return (ma << 8) | mi; +#endif +} + +int bfs_major(dev_t dev) { +#ifdef major + return major(dev); +#else + return dev >> 8; +#endif +} + +int bfs_minor(dev_t dev) { +#ifdef minor + return minor(dev); +#else + return dev & 0xFF; +#endif +} + +size_t xread(int fd, void *buf, size_t nbytes) { + size_t count = 0; + + while (count < nbytes) { + ssize_t ret = read(fd, (char *)buf + count, nbytes - count); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else { + break; + } + } else if (ret == 0) { + // EOF + errno = 0; + break; + } else { + count += ret; + } + } + + return count; +} + +size_t xwrite(int fd, const void *buf, size_t nbytes) { + size_t count = 0; + + while (count < nbytes) { + ssize_t ret = write(fd, (const char *)buf + count, nbytes - count); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else { + break; + } + } else if (ret == 0) { + // EOF? + errno = 0; + break; + } else { + count += ret; + } + } + + return count; +} + +char *xconfstr(int name) { + size_t len = confstr(name, NULL, 0); + if (len == 0) { + return NULL; + } + + char *str = malloc(len); + if (!str) { + return NULL; + } + + if (confstr(name, str, len) != len) { + free(str); + return NULL; + } + + return str; +} + +char *xgetdelim(FILE *file, char delim) { + char *chunk = NULL; + size_t n = 0; + ssize_t len = getdelim(&chunk, &n, delim, file); + if (len >= 0) { + if (chunk[len] == delim) { + chunk[len] = '\0'; + } + return chunk; + } else { + free(chunk); + if (!ferror(file)) { + errno = 0; + } + return NULL; + } +} + +FILE *xfopen(const char *path, int flags) { + char mode[4]; + + switch (flags & O_ACCMODE) { + case O_RDONLY: + strcpy(mode, "rb"); + break; + case O_WRONLY: + strcpy(mode, "wb"); + break; + case O_RDWR: + strcpy(mode, "r+b"); + break; + default: + assert(!"Invalid access mode"); + errno = EINVAL; + return NULL; + } + + if (flags & O_APPEND) { + mode[0] = 'a'; + } + + int fd; + if (flags & O_CREAT) { + fd = open(path, flags, 0666); + } else { + fd = open(path, flags); + } + + if (fd < 0) { + return NULL; + } + + FILE *ret = fdopen(fd, mode); + if (!ret) { + close_quietly(fd); + return NULL; + } + + return ret; +} + +int xclose(int fd) { + int ret = close(fd); + if (ret != 0) { + assert(errno != EBADF); + } + return ret; +} + +void close_quietly(int fd) { + int error = errno; + xclose(fd); + errno = error; +} diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..b5c7d80 --- /dev/null +++ b/src/util.h @@ -0,0 +1,317 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2016-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Assorted utilities that don't belong anywhere else. + */ + +#ifndef BFS_UTIL_H +#define BFS_UTIL_H + +#include <fcntl.h> +#include <fnmatch.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +// Some portability concerns + +#ifdef __has_feature +# define BFS_HAS_FEATURE(feature, fallback) __has_feature(feature) +#else +# define BFS_HAS_FEATURE(feature, fallback) fallback +#endif + +#ifdef __has_include +# define BFS_HAS_INCLUDE(header, fallback) __has_include(header) +#else +# define BFS_HAS_INCLUDE(header, fallback) fallback +#endif + +#ifdef __has_c_attribute +# define BFS_HAS_C_ATTRIBUTE(attr) __has_c_attribute(attr) +#else +# define BFS_HAS_C_ATTRIBUTE(attr) false +#endif + +#if __GNUC__ && defined(__has_attribute) +# define BFS_HAS_GNU_ATTRIBUTE(attr) __has_attribute(attr) +#else +# define BFS_HAS_GNU_ATTRIBUTE(attr) false +#endif + +#ifndef BFS_HAS_MNTENT +# define BFS_HAS_MNTENT BFS_HAS_INCLUDE(<mntent.h>, __GLIBC__) +#endif + +#ifndef BFS_HAS_SYS_ACL +# define BFS_HAS_SYS_ACL BFS_HAS_INCLUDE(<sys/acl.h>, true) +#endif + +#ifndef BFS_HAS_SYS_CAPABILITY +# define BFS_HAS_SYS_CAPABILITY BFS_HAS_INCLUDE(<sys/capability.h>, __linux__) +#endif + +#ifndef BFS_HAS_SYS_EXTATTR +# define BFS_HAS_SYS_EXTATTR BFS_HAS_INCLUDE(<sys/extattr.h>, __FreeBSD__) +#endif + +#ifndef BFS_HAS_SYS_MKDEV +# define BFS_HAS_SYS_MKDEV BFS_HAS_INCLUDE(<sys/mkdev.h>, false) +#endif + +#ifndef BFS_HAS_SYS_PARAM +# define BFS_HAS_SYS_PARAM BFS_HAS_INCLUDE(<sys/param.h>, true) +#endif + +#ifndef BFS_HAS_SYS_SYSMACROS +# define BFS_HAS_SYS_SYSMACROS BFS_HAS_INCLUDE(<sys/sysmacros.h>, __GLIBC__) +#endif + +#ifndef BFS_HAS_SYS_XATTR +# define BFS_HAS_SYS_XATTR BFS_HAS_INCLUDE(<sys/xattr.h>, __linux__) +#endif + +#ifndef BFS_HAS_UTIL +# define BFS_HAS_UTIL BFS_HAS_INCLUDE(<util.h>, __NetBSD__) +#endif + +#if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE) +# define FNM_CASEFOLD FNM_IGNORECASE +#endif + +#ifndef O_DIRECTORY +# define O_DIRECTORY 0 +#endif + +#if BFS_HAS_C_ATTRIBUTE(fallthrough) +# define BFS_FALLTHROUGH [[fallthrough]] +#elif BFS_HAS_GNU_ATTRIBUTE(fallthrough) +# define BFS_FALLTHROUGH __attribute__((fallthrough)) +#else +# define BFS_FALLTHROUGH ((void)0) +#endif + +/** + * Adds compiler warnings for bad printf()-style function calls, if supported. + */ +#if BFS_HAS_GNU_ATTRIBUTE(format) +# define BFS_FORMATTER(fmt, args) __attribute__((format(printf, fmt, args))) +#else +# define BFS_FORMATTER(fmt, args) +#endif + +// Lower bound on BFS_FLEX_SIZEOF() +#define BFS_FLEX_LB(type, member, length) (offsetof(type, member) + sizeof(((type *)NULL)->member[0]) * (length)) + +// Maximum macro for BFS_FLEX_SIZE() +#define BFS_FLEX_MAX(a, b) ((a) > (b) ? (a) : (b)) + +/** + * Computes the size of a struct containing a flexible array member of the given + * length. + * + * @param type + * The type of the struct containing the flexible array. + * @param member + * The name of the flexible array member. + * @param length + * The length of the flexible array. + */ +#define BFS_FLEX_SIZEOF(type, member, length) \ + (sizeof(type) <= BFS_FLEX_LB(type, member, 0) \ + ? BFS_FLEX_LB(type, member, length) \ + : BFS_FLEX_MAX(sizeof(type), BFS_FLEX_LB(type, member, length))) + +/** + * readlinkat() wrapper that dynamically allocates the result. + * + * @param fd + * The base directory descriptor. + * @param path + * The path to the link, relative to fd. + * @param size + * An estimate for the size of the link name (pass 0 if unknown). + * @return The target of the link, allocated with malloc(), or NULL on failure. + */ +char *xreadlinkat(int fd, const char *path, size_t size); + +/** + * Like dup(), but set the FD_CLOEXEC flag. + * + * @param fd + * The file descriptor to duplicate. + * @return A duplicated file descriptor, or -1 on failure. + */ +int dup_cloexec(int fd); + +/** + * Like pipe(), but set the FD_CLOEXEC flag. + * + * @param pipefd + * The array to hold the two file descriptors. + * @return 0 on success, -1 on failure. + */ +int pipe_cloexec(int pipefd[2]); + +/** + * Format a mode like ls -l (e.g. -rw-r--r--). + * + * @param mode + * The mode to format. + * @param str + * The string to hold the formatted mode. + */ +void xstrmode(mode_t mode, char str[11]); + +/** + * basename() variant that doesn't modify the input. + * + * @param path + * The path in question. + * @return A pointer into path at the base name offset. + */ +const char *xbasename(const char *path); + +/** + * Wrapper for faccessat() that handles some portability issues. + */ +int xfaccessat(int fd, const char *path, int amode); + +/** + * Portability wrapper for strtofflags(). + * + * @param str + * The string to parse. The pointee will be advanced to the first + * invalid position on error. + * @param set + * The flags that are set in the string. + * @param clear + * The flags that are cleared in the string. + * @return + * 0 on success, -1 on failure. + */ +int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear); + +/** + * wcswidth() variant that works on narrow strings. + * + * @param str + * The string to measure. + * @return + * The likely width of that string in a terminal. + */ +size_t xstrwidth(const char *str); + +/** + * Return whether an error code is due to a path not existing. + */ +bool is_nonexistence_error(int error); + +/** + * Process a yes/no prompt. + * + * @return 1 for yes, 0 for no, and -1 for unknown. + */ +int ynprompt(void); + +/** + * Portable version of makedev(). + */ +dev_t bfs_makedev(int ma, int mi); + +/** + * Portable version of major(). + */ +int bfs_major(dev_t dev); + +/** + * Portable version of minor(). + */ +int bfs_minor(dev_t dev); + +/** + * A safe version of read() that handles interrupted system calls and partial + * reads. + * + * @return + * The number of bytes read. A value != nbytes indicates an error + * (errno != 0) or end of file (errno == 0). + */ +size_t xread(int fd, void *buf, size_t nbytes); + +/** + * A safe version of write() that handles interrupted system calls and partial + * writes. + * + * @return + The number of bytes written. A value != nbytes indicates an error. + */ +size_t xwrite(int fd, const void *buf, size_t nbytes); + +/** + * Wrapper for confstr() that allocates with malloc(). + * + * @param name + * The ID of the confstr to look up. + * @return + * The value of the confstr, or NULL on failure. + */ +char *xconfstr(int name); + +/** + * Convenience wrapper for getdelim(). + * + * @param file + * The file to read. + * @param delim + * The delimiter character to split on. + * @return + * The read chunk (without the delimiter), allocated with malloc(). + * NULL is returned on error (errno != 0) or end of file (errno == 0). + */ +char *xgetdelim(FILE *file, char delim); + +/** + * fopen() variant that takes open() style flags. + * + * @param path + * The path to open. + * @param flags + * Flags to pass to open(). + */ +FILE *xfopen(const char *path, int flags); + +/** + * close() wrapper that asserts the file descriptor is valid. + * + * @param fd + * The file descriptor to close. + * @return + * 0 on success, or -1 on error. + */ +int xclose(int fd); + +/** + * close() variant that preserves errno. + * + * @param fd + * The file descriptor to close. + */ +void close_quietly(int fd); + +#endif // BFS_UTIL_H diff --git a/src/xregex.c b/src/xregex.c new file mode 100644 index 0000000..3c3cf35 --- /dev/null +++ b/src/xregex.c @@ -0,0 +1,301 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "xregex.h" +#include "util.h" +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#if BFS_WITH_ONIGURUMA +# include <langinfo.h> +# include <oniguruma.h> +#else +# include <regex.h> +#endif + +struct bfs_regex { +#if BFS_WITH_ONIGURUMA + unsigned char *pattern; + OnigRegex impl; + int err; + OnigErrorInfo einfo; +#else + regex_t impl; + int err; +#endif +}; + +#if BFS_WITH_ONIGURUMA +/** Get (and initialize) the appropriate encoding for the current locale. */ +static int bfs_onig_encoding(OnigEncoding *penc) { + static OnigEncoding enc = NULL; + if (enc) { + *penc = enc; + return ONIG_NORMAL; + } + + // Fall back to ASCII by default + enc = ONIG_ENCODING_ASCII; + + // Oniguruma has no locale support, so try to guess the right encoding + // from the current locale. + const char *charmap = nl_langinfo(CODESET); + if (charmap) { +#define BFS_MAP_ENCODING(name, value) \ + do { \ + if (strcmp(charmap, name) == 0) { \ + enc = value; \ + } \ + } while (0) +#define BFS_MAP_ENCODING2(name1, name2, value) \ + do { \ + BFS_MAP_ENCODING(name1, value); \ + BFS_MAP_ENCODING(name2, value); \ + } while (0) + + // These names were found with locale -m on Linux and FreeBSD +#define BFS_MAP_ISO_8859(n) \ + BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n) + + BFS_MAP_ISO_8859(1); + BFS_MAP_ISO_8859(2); + BFS_MAP_ISO_8859(3); + BFS_MAP_ISO_8859(4); + BFS_MAP_ISO_8859(5); + BFS_MAP_ISO_8859(6); + BFS_MAP_ISO_8859(7); + BFS_MAP_ISO_8859(8); + BFS_MAP_ISO_8859(9); + BFS_MAP_ISO_8859(10); + BFS_MAP_ISO_8859(11); + // BFS_MAP_ISO_8859(12); + BFS_MAP_ISO_8859(13); + BFS_MAP_ISO_8859(14); + BFS_MAP_ISO_8859(15); + BFS_MAP_ISO_8859(16); + + BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8); + +#define BFS_MAP_EUC(name) \ + BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name) + + BFS_MAP_EUC(JP); + BFS_MAP_EUC(TW); + BFS_MAP_EUC(KR); + BFS_MAP_EUC(CN); + + BFS_MAP_ENCODING2("SHIFT_JIS", "SJIS", ONIG_ENCODING_SJIS); + + // BFS_MAP_ENCODING("KOI-8", ONIG_ENCODING_KOI8); + BFS_MAP_ENCODING("KOI8-R", ONIG_ENCODING_KOI8_R); + + BFS_MAP_ENCODING("CP1251", ONIG_ENCODING_CP1251); + + BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5); + } + + int ret = onig_initialize(&enc, 1); + if (ret != ONIG_NORMAL) { + enc = NULL; + } + *penc = enc; + return ret; +} +#endif + +int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) { + struct bfs_regex *regex = *preg = malloc(sizeof(*regex)); + if (!regex) { + return -1; + } + +#if BFS_WITH_ONIGURUMA + // onig_error_code_to_str() says + // + // don't call this after the pattern argument of onig_new() is freed + // + // so make a defensive copy. + regex->pattern = (unsigned char *)strdup(pattern); + if (!regex->pattern) { + goto fail; + } + + regex->impl = NULL; + regex->err = ONIG_NORMAL; + + OnigSyntaxType *syntax = NULL; + switch (type) { + case BFS_REGEX_POSIX_BASIC: + syntax = ONIG_SYNTAX_POSIX_BASIC; + break; + case BFS_REGEX_POSIX_EXTENDED: + syntax = ONIG_SYNTAX_POSIX_EXTENDED; + break; + case BFS_REGEX_EMACS: + syntax = ONIG_SYNTAX_EMACS; + break; + case BFS_REGEX_GREP: + syntax = ONIG_SYNTAX_GREP; + break; + } + assert(syntax); + + OnigOptionType options = syntax->options; + if (flags & BFS_REGEX_ICASE) { + options |= ONIG_OPTION_IGNORECASE; + } + + OnigEncoding enc; + regex->err = bfs_onig_encoding(&enc); + if (regex->err != ONIG_NORMAL) { + return -1; + } + + const unsigned char *end = regex->pattern + strlen(pattern); + regex->err = onig_new(®ex->impl, regex->pattern, end, options, enc, syntax, ®ex->einfo); + if (regex->err != ONIG_NORMAL) { + return -1; + } +#else + int cflags = 0; + switch (type) { + case BFS_REGEX_POSIX_BASIC: + break; + case BFS_REGEX_POSIX_EXTENDED: + cflags |= REG_EXTENDED; + break; + default: + errno = EINVAL; + goto fail; + } + + if (flags & BFS_REGEX_ICASE) { + cflags |= REG_ICASE; + } + +#if BFS_HAS_FEATURE(memory_sanitizer, false) + // https://github.com/google/sanitizers/issues/1496 + memset(®ex->impl, 0, sizeof(regex->impl)); +#endif + + regex->err = regcomp(®ex->impl, pattern, cflags); + if (regex->err != 0) { + return -1; + } +#endif + + return 0; + +fail: + free(regex); + *preg = NULL; + return -1; +} + +int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { + size_t len = strlen(str); + +#if BFS_WITH_ONIGURUMA + const unsigned char *ustr = (const unsigned char *)str; + const unsigned char *end = ustr + len; + + // The docs for onig_{match,search}() say + // + // Do not pass invalid byte string in the regex character encoding. + if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) { + return 0; + } + + int ret; + if (flags & BFS_REGEX_ANCHOR) { + ret = onig_match(regex->impl, ustr, end, ustr, NULL, ONIG_OPTION_DEFAULT); + } else { + ret = onig_search(regex->impl, ustr, end, ustr, end, NULL, ONIG_OPTION_DEFAULT); + } + + if (ret >= 0) { + if (flags & BFS_REGEX_ANCHOR) { + return (size_t)ret == len; + } else { + return 1; + } + } else if (ret == ONIG_MISMATCH) { + return 0; + } else { + regex->err = ret; + return -1; + } +#else + regmatch_t match = { + .rm_so = 0, + .rm_eo = len, + }; + + int eflags = 0; +#ifdef REG_STARTEND + eflags |= REG_STARTEND; +#endif + + int ret = regexec(®ex->impl, str, 1, &match, eflags); + if (ret == 0) { + if (flags & BFS_REGEX_ANCHOR) { + return match.rm_so == 0 && (size_t)match.rm_eo == len; + } else { + return 1; + } + } else if (ret == REG_NOMATCH) { + return 0; + } else { + regex->err = ret; + return -1; + } +#endif +} + +void bfs_regfree(struct bfs_regex *regex) { + if (regex) { +#if BFS_WITH_ONIGURUMA + onig_free(regex->impl); + free(regex->pattern); +#else + regfree(®ex->impl); +#endif + free(regex); + } +} + +char *bfs_regerror(const struct bfs_regex *regex) { + if (!regex) { + return strdup(strerror(ENOMEM)); + } + +#if BFS_WITH_ONIGURUMA + unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); + if (str) { + onig_error_code_to_str(str, regex->err, ®ex->einfo); + } + return (char *)str; +#else + size_t len = regerror(regex->err, ®ex->impl, NULL, 0); + char *str = malloc(len); + if (str) { + regerror(regex->err, ®ex->impl, str, len); + } + return str; +#endif +} diff --git a/src/xregex.h b/src/xregex.h new file mode 100644 index 0000000..b2f56a5 --- /dev/null +++ b/src/xregex.h @@ -0,0 +1,97 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> and bfs * + * contributors * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#ifndef BFS_XREGEX_H +#define BFS_XREGEX_H + +/** + * A compiled regular expression. + */ +struct bfs_regex; + +/** + * Regex syntax flavors. + */ +enum bfs_regex_type { + BFS_REGEX_POSIX_BASIC, + BFS_REGEX_POSIX_EXTENDED, + BFS_REGEX_EMACS, + BFS_REGEX_GREP, +}; + +/** + * Regex compilation flags. + */ +enum bfs_regcomp_flags { + /** Treat the regex case-insensitively. */ + BFS_REGEX_ICASE = 1 << 0, +}; + +/** + * Regex execution flags. + */ +enum bfs_regexec_flags { + /** Only treat matches of the entire string as successful. */ + BFS_REGEX_ANCHOR = 1 << 0, +}; + +/** + * Wrapper for regcomp() that supports additional regex types. + * + * @param[out] preg + * Will hold the compiled regex. + * @param pattern + * The regular expression to compile. + * @param type + * The regular expression syntax to use. + * @param flags + * Regex compilation flags. + * @return + * 0 on success, -1 on failure. + */ +int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags); + +/** + * Wrapper for regexec(). + * + * @param regex + * The regular expression to execute. + * @param str + * The string to match against. + * @param flags + * Regex execution flags. + * @return + * 1 for a match, 0 for no match, -1 on failure. + */ +int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags); + +/** + * Free a compiled regex. + */ +void bfs_regfree(struct bfs_regex *regex); + +/** + * Get a human-readable regex error message. + * + * @param regex + * The compiled regex. + * @return + * A human-readable description of the error, which should be free()'d. + */ +char *bfs_regerror(const struct bfs_regex *regex); + +#endif // BFS_XREGEX_H diff --git a/src/xspawn.c b/src/xspawn.c new file mode 100644 index 0000000..93c270a --- /dev/null +++ b/src/xspawn.c @@ -0,0 +1,318 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2018-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "xspawn.h" +#include "util.h" +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +/** + * Types of spawn actions. + */ +enum bfs_spawn_op { + BFS_SPAWN_CLOSE, + BFS_SPAWN_DUP2, + BFS_SPAWN_FCHDIR, + BFS_SPAWN_SETRLIMIT, +}; + +/** + * A spawn action. + */ +struct bfs_spawn_action { + struct bfs_spawn_action *next; + + enum bfs_spawn_op op; + int in_fd; + int out_fd; + int resource; + struct rlimit rlimit; +}; + +int bfs_spawn_init(struct bfs_spawn *ctx) { + ctx->flags = 0; + ctx->actions = NULL; + ctx->tail = &ctx->actions; + return 0; +} + +int bfs_spawn_destroy(struct bfs_spawn *ctx) { + struct bfs_spawn_action *action = ctx->actions; + while (action) { + struct bfs_spawn_action *next = action->next; + free(action); + action = next; + } + return 0; +} + +int bfs_spawn_setflags(struct bfs_spawn *ctx, enum bfs_spawn_flags flags) { + ctx->flags = flags; + return 0; +} + +/** Add a spawn action to the chain. */ +static struct bfs_spawn_action *bfs_spawn_add(struct bfs_spawn *ctx, enum bfs_spawn_op op) { + struct bfs_spawn_action *action = malloc(sizeof(*action)); + if (action) { + action->next = NULL; + action->op = op; + action->in_fd = -1; + action->out_fd = -1; + + *ctx->tail = action; + ctx->tail = &action->next; + } + return action; +} + +int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd) { + if (fd < 0) { + errno = EBADF; + return -1; + } + + struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_CLOSE); + if (action) { + action->out_fd = fd; + return 0; + } else { + return -1; + } +} + +int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) { + if (oldfd < 0 || newfd < 0) { + errno = EBADF; + return -1; + } + + struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_DUP2); + if (action) { + action->in_fd = oldfd; + action->out_fd = newfd; + return 0; + } else { + return -1; + } +} + +int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) { + if (fd < 0) { + errno = EBADF; + return -1; + } + + struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_FCHDIR); + if (action) { + action->in_fd = fd; + return 0; + } else { + return -1; + } +} + +int bfs_spawn_addsetrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl) { + struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_SETRLIMIT); + if (action) { + action->resource = resource; + action->rlimit = *rl; + return 0; + } else { + return -1; + } +} + +/** Actually exec() the new process. */ +static void bfs_spawn_exec(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) { + int error; + const struct bfs_spawn_action *actions = ctx ? ctx->actions : NULL; + + xclose(pipefd[0]); + + for (const struct bfs_spawn_action *action = actions; action; action = action->next) { + // Move the error-reporting pipe out of the way if necessary... + if (action->out_fd == pipefd[1]) { + int fd = dup_cloexec(pipefd[1]); + if (fd < 0) { + goto fail; + } + xclose(pipefd[1]); + pipefd[1] = fd; + } + + // ... and pretend the pipe doesn't exist + if (action->in_fd == pipefd[1]) { + errno = EBADF; + goto fail; + } + + switch (action->op) { + case BFS_SPAWN_CLOSE: + if (close(action->out_fd) != 0) { + goto fail; + } + break; + case BFS_SPAWN_DUP2: + if (dup2(action->in_fd, action->out_fd) < 0) { + goto fail; + } + break; + case BFS_SPAWN_FCHDIR: + if (fchdir(action->in_fd) != 0) { + goto fail; + } + break; + case BFS_SPAWN_SETRLIMIT: + if (setrlimit(action->resource, &action->rlimit) != 0) { + goto fail; + } + break; + } + } + + execve(exe, argv, envp); + +fail: + error = errno; + + // In case of a write error, the parent will still see that we exited + // unsuccessfully, but won't know why + (void) xwrite(pipefd[1], &error, sizeof(error)); + + xclose(pipefd[1]); + _Exit(127); +} + +pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp) { + extern char **environ; + if (!envp) { + envp = environ; + } + + enum bfs_spawn_flags flags = ctx ? ctx->flags : 0; + char *resolved = NULL; + if (flags & BFS_SPAWN_USEPATH) { + exe = resolved = bfs_spawn_resolve(exe); + if (!resolved) { + return -1; + } + } + + // Use a pipe to report errors from the child + int pipefd[2]; + if (pipe_cloexec(pipefd) != 0) { + free(resolved); + return -1; + } + + pid_t pid = fork(); + if (pid < 0) { + close_quietly(pipefd[1]); + close_quietly(pipefd[0]); + free(resolved); + return -1; + } else if (pid == 0) { + // Child + bfs_spawn_exec(exe, ctx, argv, envp, pipefd); + } + + // Parent + xclose(pipefd[1]); + free(resolved); + + int error; + ssize_t nbytes = xread(pipefd[0], &error, sizeof(error)); + xclose(pipefd[0]); + if (nbytes == sizeof(error)) { + int wstatus; + waitpid(pid, &wstatus, 0); + errno = error; + return -1; + } + + return pid; +} + +char *bfs_spawn_resolve(const char *exe) { + if (strchr(exe, '/')) { + return strdup(exe); + } + + const char *path = getenv("PATH"); + + char *confpath = NULL; + if (!path) { + path = confpath = xconfstr(_CS_PATH); + if (!path) { + return NULL; + } + } + + size_t cap = 0; + char *ret = NULL; + while (true) { + const char *end = strchr(path, ':'); + size_t len = end ? (size_t)(end - path) : strlen(path); + + // POSIX 8.3: "A zero-length prefix is a legacy feature that + // indicates the current working directory." + if (len == 0) { + path = "."; + len = 1; + } + + size_t total = len + 1 + strlen(exe) + 1; + if (cap < total) { + char *grown = realloc(ret, total); + if (!grown) { + goto fail; + } + ret = grown; + cap = total; + } + + memcpy(ret, path, len); + if (ret[len - 1] != '/') { + ret[len++] = '/'; + } + strcpy(ret + len, exe); + + if (xfaccessat(AT_FDCWD, ret, X_OK) == 0) { + break; + } + + if (!end) { + errno = ENOENT; + goto fail; + } + + path = end + 1; + } + + free(confpath); + return ret; + +fail: + free(confpath); + free(ret); + return NULL; +} diff --git a/src/xspawn.h b/src/xspawn.h new file mode 100644 index 0000000..cd6a42e --- /dev/null +++ b/src/xspawn.h @@ -0,0 +1,123 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2018-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * A process-spawning library inspired by posix_spawn(). + */ + +#ifndef BFS_XSPAWN_H +#define BFS_XSPAWN_H + +#include <sys/resource.h> +#include <sys/types.h> + +/** + * bfs_spawn() flags. + */ +enum bfs_spawn_flags { + /** Use the PATH variable to resolve the executable (like execvp()). */ + BFS_SPAWN_USEPATH = 1 << 0, +}; + +/** + * bfs_spawn() attributes, controlling the context of the new process. + */ +struct bfs_spawn { + enum bfs_spawn_flags flags; + struct bfs_spawn_action *actions; + struct bfs_spawn_action **tail; +}; + +/** + * Create a new bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_init(struct bfs_spawn *ctx); + +/** + * Destroy a bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_destroy(struct bfs_spawn *ctx); + +/** + * Set the flags for a bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_setflags(struct bfs_spawn *ctx, enum bfs_spawn_flags flags); + +/** + * Add a close() action to a bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd); + +/** + * Add a dup2() action to a bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd); + +/** + * Add an fchdir() action to a bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd); + +/** + * Add a setrlimit() action to a bfs_spawn() context. + * + * @return 0 on success, -1 on failure. + */ +int bfs_spawn_addsetrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl); + +/** + * Spawn a new process. + * + * @param exe + * The executable to run. + * @param ctx + * The context for the new process. + * @param argv + * The arguments for the new process. + * @param envp + * The environment variables for the new process (NULL for the current + * environment). + * @return + * The PID of the new process, or -1 on error. + */ +pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp); + +/** + * Look up an executable in the current PATH, as BFS_SPAWN_USEPATH or execvp() + * would do. + * + * @param exe + * The name of the binary to execute. Bare names without a '/' will be + * searched on the provided PATH. + * @return + * The full path to the executable, which should be free()'d, or NULL on + * failure. + */ +char *bfs_spawn_resolve(const char *exe); + +#endif // BFS_XSPAWN_H diff --git a/src/xtime.c b/src/xtime.c new file mode 100644 index 0000000..8ca963b --- /dev/null +++ b/src/xtime.c @@ -0,0 +1,323 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +#include "xtime.h" +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#include <time.h> + +/** Whether tzset() has been called. */ +static bool tz_is_set = false; + +int xlocaltime(const time_t *timep, struct tm *result) { + // Should be called before localtime_r() according to POSIX.1-2004 + if (!tz_is_set) { + tzset(); + tz_is_set = true; + } + + if (localtime_r(timep, result)) { + return 0; + } else { + return -1; + } +} + +int xgmtime(const time_t *timep, struct tm *result) { + // Should be called before gmtime_r() according to POSIX.1-2004 + if (!tz_is_set) { + tzset(); + tz_is_set = true; + } + + if (gmtime_r(timep, result)) { + return 0; + } else { + return -1; + } +} + +int xmktime(struct tm *tm, time_t *timep) { + *timep = mktime(tm); + + if (*timep == -1) { + int error = errno; + + struct tm tmp; + if (xlocaltime(timep, &tmp) != 0) { + return -1; + } + + if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday + || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) { + errno = error; + return -1; + } + } + + return 0; +} + +static int safe_add(int *value, int delta) { + if (*value >= 0) { + if (delta > INT_MAX - *value) { + return -1; + } + } else { + if (delta < INT_MIN - *value) { + return -1; + } + } + + *value += delta; + return 0; +} + +static int floor_div(int n, int d) { + int a = n < 0; + return (n + a)/d - a; +} + +static int wrap(int *value, int max, int *next) { + int carry = floor_div(*value, max); + *value -= carry * max; + return safe_add(next, carry); +} + +static int month_length(int year, int month) { + static const int month_lengths[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + int ret = month_lengths[month]; + if (month == 1 && year%4 == 0 && (year%100 != 0 || (year + 300)%400 == 0)) { + ++ret; + } + return ret; +} + +int xtimegm(struct tm *tm, time_t *timep) { + tm->tm_isdst = 0; + + if (wrap(&tm->tm_sec, 60, &tm->tm_min) != 0) { + goto overflow; + } + if (wrap(&tm->tm_min, 60, &tm->tm_hour) != 0) { + goto overflow; + } + if (wrap(&tm->tm_hour, 24, &tm->tm_mday) != 0) { + goto overflow; + } + + // In order to wrap the days of the month, we first need to know what + // month it is + if (wrap(&tm->tm_mon, 12, &tm->tm_year) != 0) { + goto overflow; + } + + if (tm->tm_mday < 1) { + do { + --tm->tm_mon; + if (wrap(&tm->tm_mon, 12, &tm->tm_year) != 0) { + goto overflow; + } + + tm->tm_mday += month_length(tm->tm_year, tm->tm_mon); + } while (tm->tm_mday < 1); + } else { + while (true) { + int days = month_length(tm->tm_year, tm->tm_mon); + if (tm->tm_mday <= days) { + break; + } + + tm->tm_mday -= days; + ++tm->tm_mon; + if (wrap(&tm->tm_mon, 12, &tm->tm_year) != 0) { + goto overflow; + } + } + } + + tm->tm_yday = 0; + for (int i = 0; i < tm->tm_mon; ++i) { + tm->tm_yday += month_length(tm->tm_year, i); + } + tm->tm_yday += tm->tm_mday - 1; + + int leap_days; + // Compute floor((year - 69)/4) - floor((year - 1)/100) + floor((year + 299)/400) without overflows + if (tm->tm_year >= 0) { + leap_days = floor_div(tm->tm_year - 69, 4) - floor_div(tm->tm_year - 1, 100) + floor_div(tm->tm_year - 101, 400) + 1; + } else { + leap_days = floor_div(tm->tm_year + 3, 4) - floor_div(tm->tm_year + 99, 100) + floor_div(tm->tm_year + 299, 400) - 17; + } + + long long epoch_days = 365LL*(tm->tm_year - 70) + leap_days + tm->tm_yday; + tm->tm_wday = (epoch_days + 4)%7; + if (tm->tm_wday < 0) { + tm->tm_wday += 7; + } + + long long epoch_time = tm->tm_sec + 60*(tm->tm_min + 60*(tm->tm_hour + 24*epoch_days)); + *timep = (time_t)epoch_time; + if ((long long)*timep != epoch_time) { + goto overflow; + } + return 0; + +overflow: + errno = EOVERFLOW; + return -1; +} + +/** Parse some digits from a timestamp. */ +static int parse_timestamp_part(const char **str, size_t n, int *result) { + char buf[n + 1]; + for (size_t i = 0; i < n; ++i, ++*str) { + char c = **str; + if (c < '0' || c > '9') { + return -1; + } + buf[i] = c; + } + buf[n] = '\0'; + + *result = atoi(buf); + return 0; +} + +int parse_timestamp(const char *str, struct timespec *result) { + struct tm tm = { + .tm_isdst = -1, + }; + + int tz_hour = 0; + int tz_min = 0; + bool tz_negative = false; + bool local = true; + + // YYYY + if (parse_timestamp_part(&str, 4, &tm.tm_year) != 0) { + goto invalid; + } + tm.tm_year -= 1900; + + // MM + if (*str == '-') { + ++str; + } + if (parse_timestamp_part(&str, 2, &tm.tm_mon) != 0) { + goto invalid; + } + tm.tm_mon -= 1; + + // DD + if (*str == '-') { + ++str; + } + if (parse_timestamp_part(&str, 2, &tm.tm_mday) != 0) { + goto invalid; + } + + if (!*str) { + goto end; + } else if (*str == 'T') { + ++str; + } + + // hh + if (parse_timestamp_part(&str, 2, &tm.tm_hour) != 0) { + goto invalid; + } + + // mm + if (!*str) { + goto end; + } else if (*str == ':') { + ++str; + } + if (parse_timestamp_part(&str, 2, &tm.tm_min) != 0) { + goto invalid; + } + + // ss + if (!*str) { + goto end; + } else if (*str == ':') { + ++str; + } + if (parse_timestamp_part(&str, 2, &tm.tm_sec) != 0) { + goto invalid; + } + + if (!*str) { + goto end; + } else if (*str == 'Z') { + local = false; + ++str; + } else if (*str == '+' || *str == '-') { + local = false; + tz_negative = *str == '-'; + ++str; + + // hh + if (parse_timestamp_part(&str, 2, &tz_hour) != 0) { + goto invalid; + } + + // mm + if (!*str) { + goto end; + } else if (*str == ':') { + ++str; + } + if (parse_timestamp_part(&str, 2, &tz_min) != 0) { + goto invalid; + } + } else { + goto invalid; + } + + if (*str) { + goto invalid; + } + +end: + if (local) { + if (xmktime(&tm, &result->tv_sec) != 0) { + goto error; + } + } else { + if (xtimegm(&tm, &result->tv_sec) != 0) { + goto error; + } + + int offset = 60*tz_hour + tz_min; + if (tz_negative) { + result->tv_sec -= offset; + } else { + result->tv_sec += offset; + } + } + + result->tv_nsec = 0; + return 0; + +invalid: + errno = EINVAL; +error: + return -1; +} diff --git a/src/xtime.h b/src/xtime.h new file mode 100644 index 0000000..ceff48f --- /dev/null +++ b/src/xtime.h @@ -0,0 +1,86 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2020-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * Date/time handling. + */ + +#ifndef BFS_XTIME_H +#define BFS_XTIME_H + +#include <time.h> + +/** + * localtime_r() wrapper that calls tzset() first. + * + * @param[in] timep + * The time_t to convert. + * @param[out] result + * Buffer to hold the result. + * @return + * 0 on success, -1 on failure. + */ +int xlocaltime(const time_t *timep, struct tm *result); + +/** + * gmtime_r() wrapper that calls tzset() first. + * + * @param[in] timep + * The time_t to convert. + * @param[out] result + * Buffer to hold the result. + * @return + * 0 on success, -1 on failure. + */ +int xgmtime(const time_t *timep, struct tm *result); + +/** + * mktime() wrapper that reports errors more reliably. + * + * @param[in,out] tm + * The struct tm to convert. + * @param[out] timep + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int xmktime(struct tm *tm, time_t *timep); + +/** + * A portable timegm(), the inverse of gmtime(). + * + * @param[in,out] tm + * The struct tm to convert. + * @param[out] timep + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int xtimegm(struct tm *tm, time_t *timep); + +/** + * Parse an ISO 8601-style timestamp. + * + * @param[in] str + * The string to parse. + * @param[out] result + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int parse_timestamp(const char *str, struct timespec *result); + +#endif // BFS_XTIME_H |