/**************************************************************************** * bfs * * Copyright (C) 2015-2022 Tavian Barnes * * * * Permission to use, copy, modify, and/or distribute this software for any * * purpose with or without fee is hereby granted. * * * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * ****************************************************************************/ /** * The bftw() implementation consists of the following components: * * - struct bftw_file: A file that has been encountered during the traversal. * They have reference-counted links to their parents in the directory tree. * * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, * used for openat() to minimize the amount of path re-traversals. * * - struct bftw_queue: The queue of bftw_file's left to explore. Implemented * as a simple circular buffer. * * - struct bftw_state: Represents the current state of the traversal, allowing * various helper functions to take fewer parameters. */ #include "bftw.h" #include "dir.h" #include "darray.h" #include "dstring.h" #include "mtab.h" #include "stat.h" #include "trie.h" #include "util.h" #include #include #include #include #include #include #include #include /** * A file. */ struct bftw_file { /** The parent directory, if any. */ struct bftw_file *parent; /** The root under which this file was found. */ struct bftw_file *root; /** The next file in the queue, if any. */ struct bftw_file *next; /** The previous file in the LRU list. */ struct bftw_file *lru_prev; /** The next file in the LRU list. */ struct bftw_file *lru_next; /** This file's depth in the walk. */ size_t depth; /** Reference count. */ size_t refcount; /** An open descriptor to this file, or -1. */ int fd; /** This file's type, if known. */ enum bfs_type type; /** The device number, for cycle detection. */ dev_t dev; /** The inode number, for cycle detection. */ ino_t ino; /** The offset of this file in the full path. */ size_t nameoff; /** The length of the file's name. */ size_t namelen; /** The file's name. */ char name[]; }; /** * A cache of open directories. */ struct bftw_cache { /** The head of the LRU list. */ struct bftw_file *head; /** The insertion target for the LRU list. */ struct bftw_file *target; /** The tail of the LRU list. */ struct bftw_file *tail; /** The remaining capacity of the LRU list. */ size_t capacity; }; /** Initialize a cache. */ static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { cache->head = NULL; cache->target = NULL; cache->tail = NULL; cache->capacity = capacity; } /** Destroy a cache. */ static void bftw_cache_destroy(struct bftw_cache *cache) { assert(!cache->tail); assert(!cache->target); assert(!cache->head); } /** Add a bftw_file to the cache. */ static void bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { assert(cache->capacity > 0); assert(file->fd >= 0); assert(!file->lru_prev); assert(!file->lru_next); if (cache->target) { file->lru_prev = cache->target; file->lru_next = cache->target->lru_next; } else { file->lru_next = cache->head; } if (file->lru_prev) { file->lru_prev->lru_next = file; } else { cache->head = file; } if (file->lru_next) { file->lru_next->lru_prev = file; } else { cache->tail = file; } // Prefer to keep the root paths open by keeping them at the head of the list if (file->depth == 0) { cache->target = file; } --cache->capacity; } /** Remove a bftw_file from the cache. */ static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { if (cache->target == file) { cache->target = file->lru_prev; } if (file->lru_prev) { assert(cache->head != file); file->lru_prev->lru_next = file->lru_next; } else { assert(cache->head == file); cache->head = file->lru_next; } if (file->lru_next) { assert(cache->tail != file); file->lru_next->lru_prev = file->lru_prev; } else { assert(cache->tail == file); cache->tail = file->lru_prev; } file->lru_prev = NULL; file->lru_next = NULL; ++cache->capacity; } /** Mark a cache entry as recently used. */ static void bftw_cache_use(struct bftw_cache *cache, struct bftw_file *file) { bftw_cache_remove(cache, file); bftw_cache_add(cache, file); } /** Close a bftw_file. */ static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) { assert(file->fd >= 0); bftw_cache_remove(cache, file); xclose(file->fd); file->fd = -1; } /** Pop a directory from the cache. */ static void bftw_cache_pop(struct bftw_cache *cache) { assert(cache->tail); bftw_file_close(cache, cache->tail); } /** * Shrink the cache, to recover from EMFILE. * * @param cache * The cache in question. * @param saved * A bftw_file that must be preserved. * @return * 0 if successfully shrunk, otherwise -1. */ static int bftw_cache_shrink(struct bftw_cache *cache, const struct bftw_file *saved) { struct bftw_file *file = cache->tail; if (!file) { return -1; } if (file == saved) { file = file->lru_prev; if (!file) { return -1; } } bftw_file_close(cache, file); cache->capacity = 0; return 0; } /** Compute the name offset of a child path. */ static size_t bftw_child_nameoff(const struct bftw_file *parent) { size_t ret = parent->nameoff + parent->namelen; if (parent->name[parent->namelen - 1] != '/') { ++ret; } return ret; } /** Create a new bftw_file. */ static struct bftw_file *bftw_file_new(struct bftw_file *parent, const char *name) { size_t namelen = strlen(name); size_t size = BFS_FLEX_SIZEOF(struct bftw_file, name, namelen + 1); struct bftw_file *file = malloc(size); if (!file) { return NULL; } file->parent = parent; if (parent) { file->root = parent->root; file->depth = parent->depth + 1; file->nameoff = bftw_child_nameoff(parent); ++parent->refcount; } else { file->root = file; file->depth = 0; file->nameoff = 0; } file->next = NULL; file->lru_prev = NULL; file->lru_next = NULL; file->refcount = 1; file->fd = -1; file->type = BFS_UNKNOWN; file->dev = -1; file->ino = -1; file->namelen = namelen; memcpy(file->name, name, namelen + 1); return file; } /** * Open a bftw_file relative to another one. * * @param cache * The cache to hold the file. * @param file * The file to open. * @param base * The base directory for the relative path (may be NULL). * @param at_fd * The base file descriptor, AT_FDCWD if base == NULL. * @param at_path * The relative path to the file. * @return * The opened file descriptor, or negative on error. */ static int bftw_file_openat(struct bftw_cache *cache, struct bftw_file *file, struct bftw_file *base, const char *at_path) { assert(file->fd < 0); int at_fd = AT_FDCWD; if (base) { bftw_cache_use(cache, base); at_fd = base->fd; } int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; int fd = openat(at_fd, at_path, flags); if (fd < 0 && errno == EMFILE) { if (bftw_cache_shrink(cache, base) == 0) { fd = openat(at_fd, at_path, flags); } } if (fd >= 0) { if (cache->capacity == 0) { bftw_cache_pop(cache); } file->fd = fd; bftw_cache_add(cache, file); } return fd; } /** * Open a bftw_file. * * @param cache * The cache to hold the file. * @param file * The file to open. * @param path * The full path to the file. * @return * The opened file descriptor, or negative on error. */ static int bftw_file_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { // Find the nearest open ancestor struct bftw_file *base = file; do { base = base->parent; } while (base && base->fd < 0); const char *at_path = path; if (base) { at_path += bftw_child_nameoff(base); } int fd = bftw_file_openat(cache, file, base, at_path); if (fd >= 0 || errno != ENAMETOOLONG) { return fd; } // Handle ENAMETOOLONG by manually traversing the path component-by-component // Use the ->next linked list to temporarily hold the reversed parent // chain between base and file struct bftw_file *cur; for (cur = file; cur->parent != base; cur = cur->parent) { cur->parent->next = cur; } // Open the files in the chain one by one for (base = cur; base; base = base->next) { fd = bftw_file_openat(cache, base, base->parent, base->name); if (fd < 0 || base == file) { break; } } // Clear out the linked list for (struct bftw_file *next = cur->next; cur != file; cur = next, next = next->next) { cur->next = NULL; } return fd; } /** * Open a bftw_file as a directory. * * @param cache * The cache to hold the file. * @param file * The directory to open. * @param path * The full path to the directory. * @return * The opened directory, or NULL on error. */ static struct bfs_dir *bftw_file_opendir(struct bftw_cache *cache, struct bftw_file *file, const char *path) { int fd = bftw_file_open(cache, file, path); if (fd < 0) { return NULL; } return bfs_opendir(fd, NULL); } /** Free a bftw_file. */ static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { assert(file->refcount == 0); if (file->fd >= 0) { bftw_file_close(cache, file); } free(file); } /** * A queue of bftw_file's to examine. */ struct bftw_queue { /** The head of the queue. */ struct bftw_file *head; /** The insertion target. */ struct bftw_file **target; }; /** Initialize a bftw_queue. */ static void bftw_queue_init(struct bftw_queue *queue) { queue->head = NULL; queue->target = &queue->head; } /** Add a file to a bftw_queue. */ static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { assert(file->next == NULL); file->next = *queue->target; *queue->target = file; queue->target = &file->next; } /** Pop the next file from the head of the queue. */ static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { struct bftw_file *file = queue->head; queue->head = file->next; file->next = NULL; if (queue->target == &file->next) { queue->target = &queue->head; } return file; } /** The split phase of mergesort. */ static struct bftw_file **bftw_sort_split(struct bftw_file **head, struct bftw_file **tail) { struct bftw_file **tortoise = head, **hare = head; while (*hare != *tail) { tortoise = &(*tortoise)->next; hare = &(*hare)->next; if (*hare != *tail) { hare = &(*hare)->next; } } return tortoise; } /** The merge phase of mergesort. */ static struct bftw_file **bftw_sort_merge(struct bftw_file **head, struct bftw_file **mid, struct bftw_file **tail) { struct bftw_file *left = *head, *right = *mid, *end = *tail; *mid = NULL; *tail = NULL; while (left || right) { struct bftw_file *next; if (left && (!right || strcoll(left->name, right->name) <= 0)) { next = left; left = left->next; } else { next = right; right = right->next; } *head = next; head = &next->next; } *head = end; return head; } /** * Sort a (sub-)list of files. * * @param head * The head of the (sub-)list to sort. * @param tail * The tail of the (sub-)list to sort. * @return * The new tail of the (sub-)list. */ static struct bftw_file **bftw_sort_files(struct bftw_file **head, struct bftw_file **tail) { struct bftw_file **mid = bftw_sort_split(head, tail); if (*mid == *head || *mid == *tail) { return tail; } mid = bftw_sort_files(head, mid); tail = bftw_sort_files(mid, tail); return bftw_sort_merge(head, mid, tail); } /** * Holds the current state of the bftw() traversal. */ struct bftw_state { /** bftw() callback. */ bftw_callback *callback; /** bftw() callback data. */ void *ptr; /** bftw() flags. */ enum bftw_flags flags; /** Search strategy. */ enum bftw_strategy strategy; /** The mount table. */ const struct bfs_mtab *mtab; /** The appropriate errno value, if any. */ int error; /** The cache of open directories. */ struct bftw_cache cache; /** The queue of directories left to explore. */ struct bftw_queue queue; /** The start of the current batch of files. */ struct bftw_file **batch; /** The current path. */ char *path; /** The current file. */ struct bftw_file *file; /** The previous file. */ struct bftw_file *previous; /** The currently open directory. */ struct bfs_dir *dir; /** The current directory entry. */ struct bfs_dirent *de; /** Storage for the directory entry. */ struct bfs_dirent de_storage; /** Any error encountered while reading the directory. */ int direrror; /** Extra data about the current file. */ struct BFTW ftwbuf; }; /** * Initialize the bftw() state. */ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { state->callback = args->callback; state->ptr = args->ptr; state->flags = args->flags; state->strategy = args->strategy; state->mtab = args->mtab; state->error = 0; if (args->nopenfd < 1) { errno = EMFILE; return -1; } state->path = dstralloc(0); if (!state->path) { return -1; } bftw_cache_init(&state->cache, args->nopenfd); bftw_queue_init(&state->queue); state->batch = NULL; state->file = NULL; state->previous = NULL; state->dir = NULL; state->de = NULL; state->direrror = 0; return 0; } /** Cached bfs_stat(). */ static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, struct bftw_stat *cache, enum bfs_stat_flags flags) { if (!cache->buf) { if (cache->error) { errno = cache->error; } else if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, &cache->storage) == 0) { cache->buf = &cache->storage; } else { cache->error = errno; } } return cache->buf; } const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { struct BFTW *mutbuf = (struct BFTW *)ftwbuf; const struct bfs_stat *ret; if (flags & BFS_STAT_NOFOLLOW) { ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); if (ret && !S_ISLNK(ret->mode) && !mutbuf->stat_cache.buf) { // Non-link, so share stat info mutbuf->stat_cache.buf = ret; } } else { ret = bftw_stat_impl(mutbuf, &mutbuf->stat_cache, BFS_STAT_FOLLOW); if (!ret && (flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(errno)) { ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); } } return ret; } const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { if (flags & BFS_STAT_NOFOLLOW) { return ftwbuf->lstat_cache.buf; } else if (ftwbuf->stat_cache.buf) { return ftwbuf->stat_cache.buf; } else if ((flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(ftwbuf->stat_cache.error)) { return ftwbuf->lstat_cache.buf; } else { return NULL; } } enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { if (flags & BFS_STAT_NOFOLLOW) { if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { return ftwbuf->type; } } else if (flags & BFS_STAT_TRYFOLLOW) { if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { return ftwbuf->type; } } else { if (ftwbuf->type != BFS_LNK) { return ftwbuf->type; } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { return BFS_ERROR; } } const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); if (statbuf) { return bfs_mode_to_type(statbuf->mode); } else { return BFS_ERROR; } } /** * Update the path for the current file. */ static int bftw_update_path(struct bftw_state *state, const char *name) { const struct bftw_file *file = state->file; size_t length = file ? file->nameoff + file->namelen : 0; assert(dstrlen(state->path) >= length); dstresize(&state->path, length); if (name) { if (length > 0 && state->path[length - 1] != '/') { if (dstrapp(&state->path, '/') != 0) { return -1; } } if (dstrcat(&state->path, name) != 0) { return -1; } } return 0; } /** Check if a stat() call is needed for this visit. */ static bool bftw_need_stat(const struct bftw_state *state) { if (state->flags & BFTW_STAT) { return true; } const struct BFTW *ftwbuf = &state->ftwbuf; if (ftwbuf->type == BFS_UNKNOWN) { return true; } if (ftwbuf->type == BFS_LNK && !(ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { return true; } if (ftwbuf->type == BFS_DIR) { if (state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) { return true; } #if __linux__ } else if (state->mtab) { // Linux fills in d_type from the underlying inode, even when // the directory entry is a bind mount point. In that case, we // need to stat() to get the correct type. We don't need to // check for directories because they can only be mounted over // by other directories. if (bfs_might_be_mount(state->mtab, ftwbuf->path)) { return true; } #endif } return false; } /** Initialize bftw_stat cache. */ static void bftw_stat_init(struct bftw_stat *cache) { cache->buf = NULL; cache->error = 0; } /** * Open a file if necessary. * * @param file * The file to open. * @param path * The path to that file or one of its descendants. * @return * The opened file descriptor, or -1 on error. */ static int bftw_ensure_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { int ret = file->fd; if (ret < 0) { char *copy = strndup(path, file->nameoff + file->namelen); if (!copy) { return -1; } ret = bftw_file_open(cache, file, copy); free(copy); } return ret; } /** * Initialize the buffers with data about the current path. */ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { struct bftw_file *file = state->file; const struct bfs_dirent *de = state->de; struct BFTW *ftwbuf = &state->ftwbuf; ftwbuf->path = state->path; ftwbuf->root = file ? file->root->name : ftwbuf->path; ftwbuf->depth = 0; ftwbuf->visit = visit; ftwbuf->type = BFS_UNKNOWN; ftwbuf->error = state->direrror; ftwbuf->at_fd = AT_FDCWD; ftwbuf->at_path = ftwbuf->path; ftwbuf->stat_flags = BFS_STAT_NOFOLLOW; bftw_stat_init(&ftwbuf->lstat_cache); bftw_stat_init(&ftwbuf->stat_cache); struct bftw_file *parent = NULL; if (de) { parent = file; ftwbuf->depth = file->depth + 1; ftwbuf->type = de->type; ftwbuf->nameoff = bftw_child_nameoff(file); } else if (file) { parent = file->parent; ftwbuf->depth = file->depth; ftwbuf->type = file->type; ftwbuf->nameoff = file->nameoff; } if (parent) { // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG if (bftw_ensure_open(&state->cache, parent, state->path) >= 0) { ftwbuf->at_fd = parent->fd; ftwbuf->at_path += ftwbuf->nameoff; } else { ftwbuf->error = errno; } } if (ftwbuf->depth == 0) { // Compute the name offset for root paths like "foo/bar" ftwbuf->nameoff = xbasename(ftwbuf->path) - ftwbuf->path; } if (ftwbuf->error != 0) { ftwbuf->type = BFS_ERROR; return; } int follow_flags = BFTW_FOLLOW_ALL; if (ftwbuf->depth == 0) { follow_flags |= BFTW_FOLLOW_ROOTS; } bool follow = state->flags & follow_flags; if (follow) { ftwbuf->stat_flags = BFS_STAT_TRYFOLLOW; } const struct bfs_stat *statbuf = NULL; if (bftw_need_stat(state)) { statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (statbuf) { ftwbuf->type = bfs_mode_to_type(statbuf->mode); } else { ftwbuf->type = BFS_ERROR; ftwbuf->error = errno; return; } } if (ftwbuf->type == BFS_DIR && (state->flags & BFTW_DETECT_CYCLES)) { for (const struct bftw_file *ancestor = parent; ancestor; ancestor = ancestor->parent) { if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) { ftwbuf->type = BFS_ERROR; ftwbuf->error = ELOOP; return; } } } } /** Check if the current file is a mount point. */ static bool bftw_is_mount(struct bftw_state *state, const char *name) { const struct bftw_file *file = state->file; if (!file) { return false; } const struct bftw_file *parent = name ? file : file->parent; if (!parent) { return false; } const struct BFTW *ftwbuf = &state->ftwbuf; const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); return statbuf && statbuf->dev != parent->dev; } /** Fill file identity information from an ftwbuf. */ static void bftw_fill_id(struct bftw_file *file, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; if (!statbuf || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { statbuf = ftwbuf->lstat_cache.buf; } if (statbuf) { file->dev = statbuf->dev; file->ino = statbuf->ino; } } /** * Visit a path, invoking the callback. */ static enum bftw_action bftw_visit(struct bftw_state *state, const char *name, enum bftw_visit visit) { if (bftw_update_path(state, name) != 0) { state->error = errno; return BFTW_STOP; } const struct BFTW *ftwbuf = &state->ftwbuf; bftw_init_ftwbuf(state, visit); // Never give the callback BFS_ERROR unless BFTW_RECOVER is specified if (ftwbuf->type == BFS_ERROR && !(state->flags & BFTW_RECOVER)) { state->error = ftwbuf->error; return BFTW_STOP; } if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { return BFTW_PRUNE; } enum bftw_action ret = state->callback(ftwbuf, state->ptr); switch (ret) { case BFTW_CONTINUE: break; case BFTW_PRUNE: case BFTW_STOP: goto done; default: state->error = EINVAL; return BFTW_STOP; } if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { ret = BFTW_PRUNE; goto done; } if ((state->flags & BFTW_PRUNE_MOUNTS) && bftw_is_mount(state, name)) { ret = BFTW_PRUNE; goto done; } done: if (state->file && !name) { bftw_fill_id(state->file, ftwbuf); } return ret; } /** * Push a new file onto the queue. */ static int bftw_push(struct bftw_state *state, const char *name, bool fill_id) { struct bftw_file *parent = state->file; struct bftw_file *file = bftw_file_new(parent, name); if (!file) { state->error = errno; return -1; } if (state->de) { file->type = state->de->type; } if (fill_id) { bftw_fill_id(file, &state->ftwbuf); } bftw_queue_push(&state->queue, file); return 0; } /** * Build the path to the current file. */ static int bftw_build_path(struct bftw_state *state) { const struct bftw_file *file = state->file; size_t pathlen = file->nameoff + file->namelen; if (dstresize(&state->path, pathlen) != 0) { state->error = errno; return -1; } // Try to find a common ancestor with the existing path const struct bftw_file *ancestor = state->previous; while (ancestor && ancestor->depth > file->depth) { ancestor = ancestor->parent; } // Build the path backwards while (file && file != ancestor) { if (file->nameoff > 0) { state->path[file->nameoff - 1] = '/'; } memcpy(state->path + file->nameoff, file->name, file->namelen); if (ancestor && ancestor->depth == file->depth) { ancestor = ancestor->parent; } file = file->parent; } state->previous = state->file; return 0; } /** * Pop the next file from the queue. */ static int bftw_pop(struct bftw_state *state) { if (!state->queue.head) { return 0; } state->file = bftw_queue_pop(&state->queue); if (bftw_build_path(state) != 0) { return -1; } return 1; } /** * Open the current directory. */ static void bftw_opendir(struct bftw_state *state) { assert(!state->dir); assert(!state->de); state->direrror = 0; state->dir = bftw_file_opendir(&state->cache, state->file, state->path); if (!state->dir) { state->direrror = errno; } } /** * Read an entry from the current directory. */ static int bftw_readdir(struct bftw_state *state) { if (!state->dir) { return -1; } int ret = bfs_readdir(state->dir, &state->de_storage); if (ret > 0) { state->de = &state->de_storage; } else if (ret == 0) { state->de = NULL; } else { state->de = NULL; state->direrror = errno; } return ret; } /** * Flags controlling which files get visited when done with a directory. */ enum bftw_gc_flags { /** Don't visit anything. */ BFTW_VISIT_NONE = 0, /** Visit the file itself. */ BFTW_VISIT_FILE = 1 << 0, /** Visit the file's ancestors. */ BFTW_VISIT_PARENTS = 1 << 1, /** Visit both the file and its ancestors. */ BFTW_VISIT_ALL = BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, }; /** * Close the current directory. */ static enum bftw_action bftw_closedir(struct bftw_state *state, enum bftw_gc_flags flags) { struct bftw_file *file = state->file; enum bftw_action ret = BFTW_CONTINUE; if (state->dir) { assert(file->fd >= 0); if (file->refcount > 1) { // Keep the fd around if any subdirectories exist file->fd = bfs_freedir(state->dir); } else { bfs_closedir(state->dir); file->fd = -1; } if (file->fd < 0) { bftw_cache_remove(&state->cache, file); } } state->de = NULL; state->dir = NULL; if (state->direrror != 0) { if (flags & BFTW_VISIT_FILE) { ret = bftw_visit(state, NULL, BFTW_PRE); } else { state->error = state->direrror; } state->direrror = 0; } return ret; } /** * Finalize and free a file we're done with. */ static enum bftw_action bftw_gc_file(struct bftw_state *state, enum bftw_gc_flags flags) { enum bftw_action ret = BFTW_CONTINUE; if (!(state->flags & BFTW_POST_ORDER)) { flags = 0; } bool visit = flags & BFTW_VISIT_FILE; while (state->file) { struct bftw_file *file = state->file; if (--file->refcount > 0) { state->file = NULL; break; } if (visit && bftw_visit(state, NULL, BFTW_POST) == BFTW_STOP) { ret = BFTW_STOP; flags &= ~BFTW_VISIT_PARENTS; } visit = flags & BFTW_VISIT_PARENTS; struct bftw_file *parent = file->parent; if (state->previous == file) { state->previous = parent; } bftw_file_free(&state->cache, file); state->file = parent; } return ret; } /** * Drain all the entries from a bftw_queue. */ static void bftw_drain_queue(struct bftw_state *state, struct bftw_queue *queue) { while (queue->head) { state->file = bftw_queue_pop(queue); bftw_gc_file(state, BFTW_VISIT_NONE); } } /** * Dispose of the bftw() state. * * @return * The bftw() return value. */ static int bftw_state_destroy(struct bftw_state *state) { dstrfree(state->path); bftw_closedir(state, BFTW_VISIT_NONE); bftw_gc_file(state, BFTW_VISIT_NONE); bftw_drain_queue(state, &state->queue); bftw_cache_destroy(&state->cache); errno = state->error; return state->error ? -1 : 0; } /** Start a batch of files. */ static void bftw_batch_start(struct bftw_state *state) { if (state->strategy == BFTW_DFS) { state->queue.target = &state->queue.head; } state->batch = state->queue.target; } /** Finish adding a batch of files. */ static void bftw_batch_finish(struct bftw_state *state) { if (state->flags & BFTW_SORT) { state->queue.target = bftw_sort_files(state->batch, state->queue.target); } } /** * Streaming mode: visit files as they are encountered. */ static int bftw_stream(const struct bftw_args *args) { struct bftw_state state; if (bftw_state_init(&state, args) != 0) { return -1; } assert(!(state.flags & (BFTW_SORT | BFTW_BUFFER))); bftw_batch_start(&state); for (size_t i = 0; i < args->npaths; ++i) { const char *path = args->paths[i]; switch (bftw_visit(&state, path, BFTW_PRE)) { case BFTW_CONTINUE: break; case BFTW_PRUNE: continue; case BFTW_STOP: goto done; } if (bftw_push(&state, path, true) != 0) { goto done; } } bftw_batch_finish(&state); while (bftw_pop(&state) > 0) { bftw_opendir(&state); bftw_batch_start(&state); while (bftw_readdir(&state) > 0) { const char *name = state.de->name; switch (bftw_visit(&state, name, BFTW_PRE)) { case BFTW_CONTINUE: break; case BFTW_PRUNE: continue; case BFTW_STOP: goto done; } if (bftw_push(&state, name, true) != 0) { goto done; } } bftw_batch_finish(&state); if (bftw_closedir(&state, BFTW_VISIT_ALL) == BFTW_STOP) { goto done; } if (bftw_gc_file(&state, BFTW_VISIT_ALL) == BFTW_STOP) { goto done; } } done: return bftw_state_destroy(&state); } /** * Batching mode: queue up all children before visiting them. */ static int bftw_batch(const struct bftw_args *args) { struct bftw_state state; if (bftw_state_init(&state, args) != 0) { return -1; } bftw_batch_start(&state); for (size_t i = 0; i < args->npaths; ++i) { if (bftw_push(&state, args->paths[i], false) != 0) { goto done; } } bftw_batch_finish(&state); while (bftw_pop(&state) > 0) { enum bftw_gc_flags gcflags = BFTW_VISIT_ALL; switch (bftw_visit(&state, NULL, BFTW_PRE)) { case BFTW_CONTINUE: break; case BFTW_PRUNE: gcflags &= ~BFTW_VISIT_FILE; goto next; case BFTW_STOP: goto done; } bftw_opendir(&state); bftw_batch_start(&state); while (bftw_readdir(&state) > 0) { if (bftw_push(&state, state.de->name, false) != 0) { goto done; } } bftw_batch_finish(&state); if (bftw_closedir(&state, gcflags) == BFTW_STOP) { goto done; } next: if (bftw_gc_file(&state, gcflags) == BFTW_STOP) { goto done; } } done: return bftw_state_destroy(&state); } /** Select bftw_stream() or bftw_batch() appropriately. */ static int bftw_auto(const struct bftw_args *args) { if (args->flags & (BFTW_SORT | BFTW_BUFFER)) { return bftw_batch(args); } else { return bftw_stream(args); } } /** * Iterative deepening search state. */ struct bftw_ids_state { /** The wrapped callback. */ bftw_callback *delegate; /** The wrapped callback arguments. */ void *ptr; /** Which visit this search corresponds to. */ enum bftw_visit visit; /** Whether to override the bftw_visit. */ bool force_visit; /** The current minimum depth (inclusive). */ size_t min_depth; /** The current maximum depth (exclusive). */ size_t max_depth; /** The set of pruned paths. */ struct trie pruned; /** An error code to report. */ int error; /** Whether the bottom has been found. */ bool bottom; /** Whether to quit the search. */ bool quit; }; /** Iterative deepening callback function. */ static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) { struct bftw_ids_state *state = ptr; if (state->force_visit) { struct BFTW *mutbuf = (struct BFTW *)ftwbuf; mutbuf->visit = state->visit; } if (ftwbuf->type == BFS_ERROR) { if (ftwbuf->depth + 1 >= state->min_depth) { return state->delegate(ftwbuf, state->ptr); } else { return BFTW_PRUNE; } } if (ftwbuf->depth < state->min_depth) { if (trie_find_str(&state->pruned, ftwbuf->path)) { return BFTW_PRUNE; } else { return BFTW_CONTINUE; } } else if (state->visit == BFTW_POST) { if (trie_find_str(&state->pruned, ftwbuf->path)) { return BFTW_PRUNE; } } enum bftw_action ret = BFTW_CONTINUE; if (ftwbuf->visit == state->visit) { ret = state->delegate(ftwbuf, state->ptr); } switch (ret) { case BFTW_CONTINUE: if (ftwbuf->type == BFS_DIR && ftwbuf->depth + 1 >= state->max_depth) { state->bottom = false; ret = BFTW_PRUNE; } break; case BFTW_PRUNE: if (ftwbuf->type == BFS_DIR) { if (!trie_insert_str(&state->pruned, ftwbuf->path)) { state->error = errno; state->quit = true; ret = BFTW_STOP; } } break; case BFTW_STOP: state->quit = true; break; } return ret; } /** Initialize iterative deepening state. */ static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *state, struct bftw_args *ids_args) { state->delegate = args->callback; state->ptr = args->ptr; state->visit = BFTW_PRE; state->force_visit = false; state->min_depth = 0; state->max_depth = 1; trie_init(&state->pruned); state->error = 0; state->bottom = false; state->quit = false; *ids_args = *args; ids_args->callback = bftw_ids_callback; ids_args->ptr = state; ids_args->flags &= ~BFTW_POST_ORDER; ids_args->strategy = BFTW_DFS; } /** Finish an iterative deepening search. */ static int bftw_ids_finish(struct bftw_ids_state *state) { int ret = 0; if (state->error) { ret = -1; } else { state->error = errno; } trie_destroy(&state->pruned); errno = state->error; return ret; } /** * Iterative deepening bftw() wrapper. */ static int bftw_ids(const struct bftw_args *args) { struct bftw_ids_state state; struct bftw_args ids_args; bftw_ids_init(args, &state, &ids_args); while (!state.quit && !state.bottom) { state.bottom = true; if (bftw_auto(&ids_args) != 0) { state.error = errno; state.quit = true; } ++state.min_depth; ++state.max_depth; } if (args->flags & BFTW_POST_ORDER) { state.visit = BFTW_POST; state.force_visit = true; while (!state.quit && state.min_depth > 0) { --state.max_depth; --state.min_depth; if (bftw_auto(&ids_args) != 0) { state.error = errno; state.quit = true; } } } return bftw_ids_finish(&state); } /** * Exponential deepening bftw() wrapper. */ static int bftw_eds(const struct bftw_args *args) { struct bftw_ids_state state; struct bftw_args ids_args; bftw_ids_init(args, &state, &ids_args); while (!state.quit && !state.bottom) { state.bottom = true; if (bftw_auto(&ids_args) != 0) { state.error = errno; state.quit = true; } state.min_depth = state.max_depth; state.max_depth *= 2; } if (!state.quit && (args->flags & BFTW_POST_ORDER)) { state.visit = BFTW_POST; state.min_depth = 0; ids_args.flags |= BFTW_POST_ORDER; if (bftw_auto(&ids_args) != 0) { state.error = errno; } } return bftw_ids_finish(&state); } int bftw(const struct bftw_args *args) { switch (args->strategy) { case BFTW_BFS: return bftw_auto(args); case BFTW_DFS: return bftw_batch(args); case BFTW_IDS: return bftw_ids(args); case BFTW_EDS: return bftw_eds(args); } errno = EINVAL; return -1; }