From 72552f880f3ca52c0d98d875b1da783e5a2fa2e7 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Sun, 14 Jun 2015 18:41:16 -0400 Subject: Implement bftw(). --- .gitignore | 2 + COPYING | 13 ++ Makefile | 28 ++++ bfs.c | 34 +++++ bftw.c | 491 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bftw.h | 62 ++++++++ 6 files changed, 630 insertions(+) create mode 100644 .gitignore create mode 100644 COPYING create mode 100644 Makefile create mode 100644 bfs.c create mode 100644 bftw.c create mode 100644 bftw.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8e8b350 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +/bfs diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..c6c7def --- /dev/null +++ b/COPYING @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac6b2e7 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +##################################################################### +# bfs # +# Copyright (C) 2015 Tavian Barnes # +# # +# This program is free software. It comes without any warranty, to # +# the extent permitted by applicable law. You can redistribute it # +# and/or modify it under the terms of the Do What The Fuck You Want # +# To Public License, Version 2, as published by Sam Hocevar. See # +# the COPYING file or http://www.wtfpl.net/ for more details. # +##################################################################### + +CC := gcc +CFLAGS := -std=c99 -g -Og -Wall -D_DEFAULT_SOURCE +LDFLAGS := +RM := rm -f + +DEPS := bftw.h + +bfs: bfs.o bftw.o + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ + +%.o: %.c $(DEPS) + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + $(RM) bfs *.o + +.PHONY: clean diff --git a/bfs.c b/bfs.c new file mode 100644 index 0000000..e4296e6 --- /dev/null +++ b/bfs.c @@ -0,0 +1,34 @@ +/********************************************************************* + * bfs * + * Copyright (C) 2015 Tavian Barnes * + * * + * This program is free software. It comes without any warranty, to * + * the extent permitted by applicable law. You can redistribute it * + * and/or modify it under the terms of the Do What The Fuck You Want * + * To Public License, Version 2, as published by Sam Hocevar. See * + * the COPYING file or http://www.wtfpl.net/ for more details. * + *********************************************************************/ + +#include "bftw.h" +#include +#include + +static int callback(const char *fpath, int typeflag, void *ptr) { + printf("%s\n", fpath); + return BFTW_CONTINUE; +} + +int main(int argc, char* argv[]) { + const char* path = "."; + if (argc > 1) { + path = argv[1]; + } + + // TODO: getrlimit(RLIMIT_NOFILE) + if (bftw(path, callback, 1024, NULL) != 0) { + perror("bftw()"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/bftw.c b/bftw.c new file mode 100644 index 0000000..e8228c6 --- /dev/null +++ b/bftw.c @@ -0,0 +1,491 @@ +/********************************************************************* + * bfs * + * Copyright (C) 2015 Tavian Barnes * + * * + * This program is free software. It comes without any warranty, to * + * the extent permitted by applicable law. You can redistribute it * + * and/or modify it under the terms of the Do What The Fuck You Want * + * To Public License, Version 2, as published by Sam Hocevar. See * + * the COPYING file or http://www.wtfpl.net/ for more details. * + *********************************************************************/ + +/** + * bftw() implementation. + * + * The goal of this implementation is to avoid re-traversal by using openat() as + * much as possible. The 'dircache' attempts to accomplish this by storing a + * hierarchy of 'dircache_entry's, along with an LRU list of recently accessed + * entries. Every entry in the LRU list has an open DIR *; to open an entry, we + * traverse its chain of parents, hoping to find an open one. The size of the + * LRU list is limited, because so are the available file descriptors. + * + * The 'dirqueue' is a simple FIFO of 'dircache_entry's left to explore. + */ + +#include "bftw.h" +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Simple dynamically-sized string type. + */ +typedef struct { + char *str; + size_t size; +} dynstr; + +/** Initialize a dynstr. */ +static void dynstr_init(dynstr *dstr) { + dstr->str = NULL; + dstr->size = 0; +} + +/** Grow a dynstr to the gizen size if necessary. */ +static int dynstr_grow(dynstr *dstr, size_t size) { + if (size > dstr->size) { + size_t new_size = 3*(size + 1)/2; + char *new_str = realloc(dstr->str, new_size); + if (!new_str) { + return -1; + } + + dstr->str = new_str; + dstr->size = new_size; + } + + return 0; +} + +/** Concatenate a string to a dynstr at the given position. */ +static int dynstr_concat(dynstr *dstr, size_t pos, const char *more) { + size_t morelen = strlen(more); + size_t needed = pos + morelen + 1; + if (dynstr_grow(dstr, needed) != 0) { + return -1; + } + + memcpy(dstr->str + pos, more, morelen + 1); + return 0; +} + +/** Free a dynstr. */ +static void dynstr_free(dynstr *dstr) { + free(dstr->str); +} + +/** + * A single entry in the dircache. + */ +typedef struct dircache_entry dircache_entry; + +struct dircache_entry { + /** The parent entry, if any. */ + dircache_entry *parent; + + /** Previous node in the LRU list. */ + dircache_entry *lru_prev; + /** Next node in the LRU list. */ + dircache_entry *lru_next; + + /** The DIR pointer, if open. */ + DIR *dir; + + /** Reference count. */ + size_t refcount; + + /** The directory's name. */ + char name[]; +}; + +/** + * A directory cache. + */ +typedef struct { + /** Most recently used entry. */ + dircache_entry *lru_head; + /** Least recently used entry. */ + dircache_entry *lru_tail; + /** Remaining LRU list capacity. */ + size_t lru_remaining; +} dircache; + +/** Initialize a dircache. */ +static void dircache_init(dircache *cache, size_t lru_size) { + assert(lru_size > 0); + cache->lru_head = cache->lru_tail = NULL; + cache->lru_remaining = lru_size; +} + +/** Add an entry to the dircache. */ +static dircache_entry *dircache_add(dircache *cache, dircache_entry *parent, const char *path) { + size_t pathsize = strlen(path) + 1; + dircache_entry *entry = malloc(sizeof(dircache_entry) + pathsize); + if (entry) { + entry->parent = parent; + entry->lru_prev = entry->lru_next = NULL; + entry->dir = NULL; + entry->refcount = 1; + memcpy(entry->name, path, pathsize); + + while (parent) { + ++parent->refcount; + parent = parent->parent; + } + } + return entry; +} + +/** Add an entry to the head of the LRU list. */ +static void dircache_lru_add(dircache *cache, dircache_entry *entry) { + assert(entry->dir); + assert(entry->lru_prev == NULL); + assert(entry->lru_next == NULL); + + entry->lru_next = cache->lru_head; + cache->lru_head = entry; + + if (entry->lru_next) { + entry->lru_next->lru_prev = entry; + } + + if (!cache->lru_tail) { + cache->lru_tail = entry; + } + + --cache->lru_remaining; +} + +/** Remove an entry from the LRU list. */ +static void dircache_lru_remove(dircache *cache, dircache_entry *entry) { + if (entry->lru_prev) { + assert(cache->lru_head != entry); + entry->lru_prev->lru_next = entry->lru_next; + } else { + assert(cache->lru_head == entry); + cache->lru_head = entry->lru_next; + } + + if (entry->lru_next) { + assert(cache->lru_tail != entry); + entry->lru_next->lru_prev = entry->lru_prev; + } else { + assert(cache->lru_tail == entry); + cache->lru_tail = entry->lru_prev; + } + + entry->lru_prev = entry->lru_next = NULL; + + ++cache->lru_remaining; +} + +/** Close a dircache_entry and remove it from the LRU list. */ +static void dircache_entry_close(dircache *cache, dircache_entry *entry) { + dircache_lru_remove(cache, entry); + closedir(entry->dir); + entry->dir = NULL; +} + +/** POSIX doesn't have this?! */ +static DIR *opendirat(int fd, const char *name) { + int dfd = openat(fd, name, O_DIRECTORY); + if (dfd < 0) { + return NULL; + } + + return fdopendir(dfd); +} + +/** + * Open a dircache_entry. + * + * @param cache + * The cache containing the entry. + * @param entry + * The entry to open. + * @param[out] path + * Will hold the full path to the entry, with a trailing '/'. + * @return + * The opened DIR *, or NULL on error. + */ +static DIR *dircache_entry_open(dircache *cache, dircache_entry *entry, dynstr *path) { + assert(!entry->dir); + + if (cache->lru_remaining == 0) { + dircache_entry_close(cache, cache->lru_tail); + } + + // First, reserve enough space for the path + size_t pathsize = 1; // Terminating '\0' + + dircache_entry *parent = entry; + do { + pathsize += strlen(parent->name) + 1; // name + '/' + parent = parent->parent; + } while (parent); + + if (dynstr_grow(path, pathsize) != 0) { + return NULL; + } + + // Now, build the path backwards while looking for a parent + char *segment = path->str + pathsize - 1; + *segment = '\0'; + + int fd = AT_FDCWD; + const char *relpath = path->str; + + parent = entry; + while (1) { + size_t namelen = strlen(parent->name); + segment -= namelen + 1; + memcpy(segment, parent->name, namelen); + segment[namelen] = '/'; + + parent = parent->parent; + if (!parent) { + break; + } + + if (parent->dir && fd == AT_FDCWD) { + dircache_lru_remove(cache, parent); + dircache_lru_add(cache, parent); + fd = dirfd(parent->dir); + relpath = segment; + } + } + + DIR *dir = opendirat(fd, relpath); + if (dir) { + entry->dir = dir; + dircache_lru_add(cache, entry); + } + + return dir; +} + +/** Free a dircache_entry. */ +static void dircache_entry_free(dircache *cache, dircache_entry *entry) { + while (entry) { + dircache_entry *saved = entry; + entry = entry->parent; + + if (--saved->refcount == 0) { + if (saved->dir) { + dircache_entry_close(cache, saved); + } + free(saved); + } + } +} + +/** The size of a dirqueue block. */ +#define DIRQUEUE_BLOCK_SIZE 1023 + +/** + * A single block in the dirqueue chain. + */ +typedef struct dirqueue_block dirqueue_block; + +struct dirqueue_block { + /** The next block in the chain. */ + dirqueue_block *next; + /** The elements in the queue. */ + dircache_entry *entries[DIRQUEUE_BLOCK_SIZE]; +}; + +/** + * A queue of 'dircache_entry's to examine. + */ +typedef struct { + /** The first block. */ + dirqueue_block *head; + /** The last block. */ + dirqueue_block *tail; + /** The index in 'head' of the next entry to read. */ + size_t front; + /** The index in 'tail' of the next entry to write. */ + size_t back; +} dirqueue; + +/** Initialize a dirqueue. */ +static void dirqueue_init(dirqueue *queue) { + queue->head = queue->tail = NULL; + queue->front = 0; + queue->back = DIRQUEUE_BLOCK_SIZE; +} + +/** Add an entry to the dirqueue. */ +static int dirqueue_push(dirqueue *queue, dircache_entry *entry) { + if (queue->back == DIRQUEUE_BLOCK_SIZE) { + dirqueue_block *block = malloc(sizeof(dirqueue_block)); + if (!block) { + return -1; + } + + block->next = NULL; + + if (queue->tail) { + queue->tail->next = block; + } + queue->tail = block; + + if (!queue->head) { + queue->head = block; + } + + queue->back = 0; + } + + queue->tail->entries[queue->back++] = entry; + return 0; +} + +/** Remove an entry from the dirqueue. */ +static dircache_entry *dirqueue_pop(dirqueue *queue) { + if (!queue->head) { + return NULL; + } + + if (queue->head == queue->tail && queue->front == queue->back) { + free(queue->head); + dirqueue_init(queue); + return NULL; + } + + dirqueue_block *head = queue->head; + dircache_entry *entry = head->entries[queue->front]; + if (++queue->front == DIRQUEUE_BLOCK_SIZE) { + queue->head = head->next; + queue->front = 0; + free(head); + } + return entry; +} + +int bftw(const char *dirpath, bftw_fn *fn, int nopenfd, void *ptr) { + int ret = -1, err; + + dircache cache; + dircache_init(&cache, nopenfd); + + dirqueue queue; + dirqueue_init(&queue); + + dynstr path; + dynstr_init(&path); + + dircache_entry *current = dircache_add(&cache, NULL, dirpath); + if (!current) { + goto done; + } + + do { + DIR *dir = dircache_entry_open(&cache, current, &path); + if (!dir) { + goto done; + } + + size_t pathlen = strlen(path.str); + + struct dirent *de; + while ((de = readdir(dir)) != NULL) { + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) { + continue; + } + + if (dynstr_concat(&path, pathlen, de->d_name) != 0) { + goto done; + } + + int typeflag = BFTW_UNKNOWN; + +#ifdef _DIRENT_HAVE_D_TYPE + switch (de->d_type) { + case DT_DIR: + typeflag = BFTW_D; + break; + case DT_REG: + typeflag = BFTW_R; + break; + case DT_LNK: + typeflag = BFTW_SL; + break; + } +#endif + + if (typeflag == BFTW_UNKNOWN) { + struct stat sb; + if (fstatat(dirfd(dir), de->d_name, &sb, AT_SYMLINK_NOFOLLOW) == 0) { + switch (sb.st_mode & S_IFMT) { + case S_IFDIR: + typeflag = BFTW_D; + break; + case S_IFREG: + typeflag = BFTW_R; + break; + case S_IFLNK: + typeflag = BFTW_SL; + break; + } + } + } + + int action = fn(path.str, typeflag, ptr); + + switch (action) { + case BFTW_CONTINUE: + if (typeflag != BFTW_D) { + break; + } + + dircache_entry *next = dircache_add(&cache, current, de->d_name); + if (!next) { + goto done; + } + + if (dirqueue_push(&queue, next) != 0) { + goto done; + } + break; + + case BFTW_SKIP_SIBLINGS: + goto next; + + case BFTW_SKIP_SUBTREE: + break; + + case BFTW_STOP: + ret = 0; + goto done; + + default: + errno = EINVAL; + goto done; + } + } + + next: + dircache_entry_free(&cache, current); + current = dirqueue_pop(&queue); + } while (current); + + ret = 0; +done: + err = errno; + + while (current) { + dircache_entry_free(&cache, current); + current = dirqueue_pop(&queue); + } + + dynstr_free(&path); + + errno = err; + return ret; +} diff --git a/bftw.h b/bftw.h new file mode 100644 index 0000000..be538a8 --- /dev/null +++ b/bftw.h @@ -0,0 +1,62 @@ +/********************************************************************* + * bfs * + * Copyright (C) 2015 Tavian Barnes * + * * + * This program is free software. It comes without any warranty, to * + * the extent permitted by applicable law. You can redistribute it * + * and/or modify it under the terms of the Do What The Fuck You Want * + * To Public License, Version 2, as published by Sam Hocevar. See * + * the COPYING file or http://www.wtfpl.net/ for more details. * + *********************************************************************/ + +/** + * Callback function type for bftw(). + * + * @param fpath + * The path to the encountered file. + * @param typeflag + * A typeflag value (see below). + * @param ptr + * The pointer passed to bftw(). + * @return + * An action value (see below). + */ +typedef int bftw_fn(const char *fpath, int typeflag, void *ptr); + +/** + * Breadth First Tree Walk (or Better File Tree Walk). + * + * Like ftw(3) and nftw(3), this function walks a directory tree recursively, + * and invokes a callback for each path it encounters. However, bftw() operates + * breadth-first. + * + * @param dirpath + * The starting path. + * @param fn + * The callback to invoke. + * @param nopenfd + * The maximum number of file descriptors to keep open. + * @param ptr + * A generic pointer which is passed to fn(). + * @return + * 0 on success, or -1 on failure. + */ +int bftw(const char *dirpath, bftw_fn *fn, int nopenfd, void *ptr); + +/** typeflag: Directory. */ +#define BFTW_D 0 +/** typeflag: Regular file. */ +#define BFTW_R 1 +/** typeflag: Symbolic link. */ +#define BFTW_SL 2 +/** typeflag: Unknown type. */ +#define BFTW_UNKNOWN 3 + +/** action: Keep walking. */ +#define BFTW_CONTINUE 0 +/** action: Skip this path's siblings. */ +#define BFTW_SKIP_SIBLINGS 1 +/** action: Skip this path's children. */ +#define BFTW_SKIP_SUBTREE 2 +/** action: Stop walking. */ +#define BFTW_STOP 3 -- cgit v1.2.3