summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--COPYING13
-rw-r--r--Makefile28
-rw-r--r--bfs.c34
-rw-r--r--bftw.c491
-rw-r--r--bftw.h62
6 files changed, 630 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8e8b350
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+/bfs
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..c6c7def
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,13 @@
+ DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+ Version 2, December 2004
+
+ Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+
+ Everyone is permitted to copy and distribute verbatim or modified
+ copies of this license document, and changing it is allowed as long
+ as the name is changed.
+
+ DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. You just DO WHAT THE FUCK YOU WANT TO.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..ac6b2e7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,28 @@
+#####################################################################
+# bfs #
+# Copyright (C) 2015 Tavian Barnes <tavianator@tavianator.com> #
+# #
+# This program is free software. It comes without any warranty, to #
+# the extent permitted by applicable law. You can redistribute it #
+# and/or modify it under the terms of the Do What The Fuck You Want #
+# To Public License, Version 2, as published by Sam Hocevar. See #
+# the COPYING file or http://www.wtfpl.net/ for more details. #
+#####################################################################
+
+CC := gcc
+CFLAGS := -std=c99 -g -Og -Wall -D_DEFAULT_SOURCE
+LDFLAGS :=
+RM := rm -f
+
+DEPS := bftw.h
+
+bfs: bfs.o bftw.o
+ $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@
+
+%.o: %.c $(DEPS)
+ $(CC) $(CFLAGS) -c $< -o $@
+
+clean:
+ $(RM) bfs *.o
+
+.PHONY: clean
diff --git a/bfs.c b/bfs.c
new file mode 100644
index 0000000..e4296e6
--- /dev/null
+++ b/bfs.c
@@ -0,0 +1,34 @@
+/*********************************************************************
+ * bfs *
+ * Copyright (C) 2015 Tavian Barnes <tavianator@tavianator.com> *
+ * *
+ * This program is free software. It comes without any warranty, to *
+ * the extent permitted by applicable law. You can redistribute it *
+ * and/or modify it under the terms of the Do What The Fuck You Want *
+ * To Public License, Version 2, as published by Sam Hocevar. See *
+ * the COPYING file or http://www.wtfpl.net/ for more details. *
+ *********************************************************************/
+
+#include "bftw.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+static int callback(const char *fpath, int typeflag, void *ptr) {
+ printf("%s\n", fpath);
+ return BFTW_CONTINUE;
+}
+
+int main(int argc, char* argv[]) {
+ const char* path = ".";
+ if (argc > 1) {
+ path = argv[1];
+ }
+
+ // TODO: getrlimit(RLIMIT_NOFILE)
+ if (bftw(path, callback, 1024, NULL) != 0) {
+ perror("bftw()");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/bftw.c b/bftw.c
new file mode 100644
index 0000000..e8228c6
--- /dev/null
+++ b/bftw.c
@@ -0,0 +1,491 @@
+/*********************************************************************
+ * bfs *
+ * Copyright (C) 2015 Tavian Barnes <tavianator@tavianator.com> *
+ * *
+ * This program is free software. It comes without any warranty, to *
+ * the extent permitted by applicable law. You can redistribute it *
+ * and/or modify it under the terms of the Do What The Fuck You Want *
+ * To Public License, Version 2, as published by Sam Hocevar. See *
+ * the COPYING file or http://www.wtfpl.net/ for more details. *
+ *********************************************************************/
+
+/**
+ * bftw() implementation.
+ *
+ * The goal of this implementation is to avoid re-traversal by using openat() as
+ * much as possible. The 'dircache' attempts to accomplish this by storing a
+ * hierarchy of 'dircache_entry's, along with an LRU list of recently accessed
+ * entries. Every entry in the LRU list has an open DIR *; to open an entry, we
+ * traverse its chain of parents, hoping to find an open one. The size of the
+ * LRU list is limited, because so are the available file descriptors.
+ *
+ * The 'dirqueue' is a simple FIFO of 'dircache_entry's left to explore.
+ */
+
+#include "bftw.h"
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+/**
+ * Simple dynamically-sized string type.
+ */
+typedef struct {
+ char *str;
+ size_t size;
+} dynstr;
+
+/** Initialize a dynstr. */
+static void dynstr_init(dynstr *dstr) {
+ dstr->str = NULL;
+ dstr->size = 0;
+}
+
+/** Grow a dynstr to the gizen size if necessary. */
+static int dynstr_grow(dynstr *dstr, size_t size) {
+ if (size > dstr->size) {
+ size_t new_size = 3*(size + 1)/2;
+ char *new_str = realloc(dstr->str, new_size);
+ if (!new_str) {
+ return -1;
+ }
+
+ dstr->str = new_str;
+ dstr->size = new_size;
+ }
+
+ return 0;
+}
+
+/** Concatenate a string to a dynstr at the given position. */
+static int dynstr_concat(dynstr *dstr, size_t pos, const char *more) {
+ size_t morelen = strlen(more);
+ size_t needed = pos + morelen + 1;
+ if (dynstr_grow(dstr, needed) != 0) {
+ return -1;
+ }
+
+ memcpy(dstr->str + pos, more, morelen + 1);
+ return 0;
+}
+
+/** Free a dynstr. */
+static void dynstr_free(dynstr *dstr) {
+ free(dstr->str);
+}
+
+/**
+ * A single entry in the dircache.
+ */
+typedef struct dircache_entry dircache_entry;
+
+struct dircache_entry {
+ /** The parent entry, if any. */
+ dircache_entry *parent;
+
+ /** Previous node in the LRU list. */
+ dircache_entry *lru_prev;
+ /** Next node in the LRU list. */
+ dircache_entry *lru_next;
+
+ /** The DIR pointer, if open. */
+ DIR *dir;
+
+ /** Reference count. */
+ size_t refcount;
+
+ /** The directory's name. */
+ char name[];
+};
+
+/**
+ * A directory cache.
+ */
+typedef struct {
+ /** Most recently used entry. */
+ dircache_entry *lru_head;
+ /** Least recently used entry. */
+ dircache_entry *lru_tail;
+ /** Remaining LRU list capacity. */
+ size_t lru_remaining;
+} dircache;
+
+/** Initialize a dircache. */
+static void dircache_init(dircache *cache, size_t lru_size) {
+ assert(lru_size > 0);
+ cache->lru_head = cache->lru_tail = NULL;
+ cache->lru_remaining = lru_size;
+}
+
+/** Add an entry to the dircache. */
+static dircache_entry *dircache_add(dircache *cache, dircache_entry *parent, const char *path) {
+ size_t pathsize = strlen(path) + 1;
+ dircache_entry *entry = malloc(sizeof(dircache_entry) + pathsize);
+ if (entry) {
+ entry->parent = parent;
+ entry->lru_prev = entry->lru_next = NULL;
+ entry->dir = NULL;
+ entry->refcount = 1;
+ memcpy(entry->name, path, pathsize);
+
+ while (parent) {
+ ++parent->refcount;
+ parent = parent->parent;
+ }
+ }
+ return entry;
+}
+
+/** Add an entry to the head of the LRU list. */
+static void dircache_lru_add(dircache *cache, dircache_entry *entry) {
+ assert(entry->dir);
+ assert(entry->lru_prev == NULL);
+ assert(entry->lru_next == NULL);
+
+ entry->lru_next = cache->lru_head;
+ cache->lru_head = entry;
+
+ if (entry->lru_next) {
+ entry->lru_next->lru_prev = entry;
+ }
+
+ if (!cache->lru_tail) {
+ cache->lru_tail = entry;
+ }
+
+ --cache->lru_remaining;
+}
+
+/** Remove an entry from the LRU list. */
+static void dircache_lru_remove(dircache *cache, dircache_entry *entry) {
+ if (entry->lru_prev) {
+ assert(cache->lru_head != entry);
+ entry->lru_prev->lru_next = entry->lru_next;
+ } else {
+ assert(cache->lru_head == entry);
+ cache->lru_head = entry->lru_next;
+ }
+
+ if (entry->lru_next) {
+ assert(cache->lru_tail != entry);
+ entry->lru_next->lru_prev = entry->lru_prev;
+ } else {
+ assert(cache->lru_tail == entry);
+ cache->lru_tail = entry->lru_prev;
+ }
+
+ entry->lru_prev = entry->lru_next = NULL;
+
+ ++cache->lru_remaining;
+}
+
+/** Close a dircache_entry and remove it from the LRU list. */
+static void dircache_entry_close(dircache *cache, dircache_entry *entry) {
+ dircache_lru_remove(cache, entry);
+ closedir(entry->dir);
+ entry->dir = NULL;
+}
+
+/** POSIX doesn't have this?! */
+static DIR *opendirat(int fd, const char *name) {
+ int dfd = openat(fd, name, O_DIRECTORY);
+ if (dfd < 0) {
+ return NULL;
+ }
+
+ return fdopendir(dfd);
+}
+
+/**
+ * Open a dircache_entry.
+ *
+ * @param cache
+ * The cache containing the entry.
+ * @param entry
+ * The entry to open.
+ * @param[out] path
+ * Will hold the full path to the entry, with a trailing '/'.
+ * @return
+ * The opened DIR *, or NULL on error.
+ */
+static DIR *dircache_entry_open(dircache *cache, dircache_entry *entry, dynstr *path) {
+ assert(!entry->dir);
+
+ if (cache->lru_remaining == 0) {
+ dircache_entry_close(cache, cache->lru_tail);
+ }
+
+ // First, reserve enough space for the path
+ size_t pathsize = 1; // Terminating '\0'
+
+ dircache_entry *parent = entry;
+ do {
+ pathsize += strlen(parent->name) + 1; // name + '/'
+ parent = parent->parent;
+ } while (parent);
+
+ if (dynstr_grow(path, pathsize) != 0) {
+ return NULL;
+ }
+
+ // Now, build the path backwards while looking for a parent
+ char *segment = path->str + pathsize - 1;
+ *segment = '\0';
+
+ int fd = AT_FDCWD;
+ const char *relpath = path->str;
+
+ parent = entry;
+ while (1) {
+ size_t namelen = strlen(parent->name);
+ segment -= namelen + 1;
+ memcpy(segment, parent->name, namelen);
+ segment[namelen] = '/';
+
+ parent = parent->parent;
+ if (!parent) {
+ break;
+ }
+
+ if (parent->dir && fd == AT_FDCWD) {
+ dircache_lru_remove(cache, parent);
+ dircache_lru_add(cache, parent);
+ fd = dirfd(parent->dir);
+ relpath = segment;
+ }
+ }
+
+ DIR *dir = opendirat(fd, relpath);
+ if (dir) {
+ entry->dir = dir;
+ dircache_lru_add(cache, entry);
+ }
+
+ return dir;
+}
+
+/** Free a dircache_entry. */
+static void dircache_entry_free(dircache *cache, dircache_entry *entry) {
+ while (entry) {
+ dircache_entry *saved = entry;
+ entry = entry->parent;
+
+ if (--saved->refcount == 0) {
+ if (saved->dir) {
+ dircache_entry_close(cache, saved);
+ }
+ free(saved);
+ }
+ }
+}
+
+/** The size of a dirqueue block. */
+#define DIRQUEUE_BLOCK_SIZE 1023
+
+/**
+ * A single block in the dirqueue chain.
+ */
+typedef struct dirqueue_block dirqueue_block;
+
+struct dirqueue_block {
+ /** The next block in the chain. */
+ dirqueue_block *next;
+ /** The elements in the queue. */
+ dircache_entry *entries[DIRQUEUE_BLOCK_SIZE];
+};
+
+/**
+ * A queue of 'dircache_entry's to examine.
+ */
+typedef struct {
+ /** The first block. */
+ dirqueue_block *head;
+ /** The last block. */
+ dirqueue_block *tail;
+ /** The index in 'head' of the next entry to read. */
+ size_t front;
+ /** The index in 'tail' of the next entry to write. */
+ size_t back;
+} dirqueue;
+
+/** Initialize a dirqueue. */
+static void dirqueue_init(dirqueue *queue) {
+ queue->head = queue->tail = NULL;
+ queue->front = 0;
+ queue->back = DIRQUEUE_BLOCK_SIZE;
+}
+
+/** Add an entry to the dirqueue. */
+static int dirqueue_push(dirqueue *queue, dircache_entry *entry) {
+ if (queue->back == DIRQUEUE_BLOCK_SIZE) {
+ dirqueue_block *block = malloc(sizeof(dirqueue_block));
+ if (!block) {
+ return -1;
+ }
+
+ block->next = NULL;
+
+ if (queue->tail) {
+ queue->tail->next = block;
+ }
+ queue->tail = block;
+
+ if (!queue->head) {
+ queue->head = block;
+ }
+
+ queue->back = 0;
+ }
+
+ queue->tail->entries[queue->back++] = entry;
+ return 0;
+}
+
+/** Remove an entry from the dirqueue. */
+static dircache_entry *dirqueue_pop(dirqueue *queue) {
+ if (!queue->head) {
+ return NULL;
+ }
+
+ if (queue->head == queue->tail && queue->front == queue->back) {
+ free(queue->head);
+ dirqueue_init(queue);
+ return NULL;
+ }
+
+ dirqueue_block *head = queue->head;
+ dircache_entry *entry = head->entries[queue->front];
+ if (++queue->front == DIRQUEUE_BLOCK_SIZE) {
+ queue->head = head->next;
+ queue->front = 0;
+ free(head);
+ }
+ return entry;
+}
+
+int bftw(const char *dirpath, bftw_fn *fn, int nopenfd, void *ptr) {
+ int ret = -1, err;
+
+ dircache cache;
+ dircache_init(&cache, nopenfd);
+
+ dirqueue queue;
+ dirqueue_init(&queue);
+
+ dynstr path;
+ dynstr_init(&path);
+
+ dircache_entry *current = dircache_add(&cache, NULL, dirpath);
+ if (!current) {
+ goto done;
+ }
+
+ do {
+ DIR *dir = dircache_entry_open(&cache, current, &path);
+ if (!dir) {
+ goto done;
+ }
+
+ size_t pathlen = strlen(path.str);
+
+ struct dirent *de;
+ while ((de = readdir(dir)) != NULL) {
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) {
+ continue;
+ }
+
+ if (dynstr_concat(&path, pathlen, de->d_name) != 0) {
+ goto done;
+ }
+
+ int typeflag = BFTW_UNKNOWN;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ switch (de->d_type) {
+ case DT_DIR:
+ typeflag = BFTW_D;
+ break;
+ case DT_REG:
+ typeflag = BFTW_R;
+ break;
+ case DT_LNK:
+ typeflag = BFTW_SL;
+ break;
+ }
+#endif
+
+ if (typeflag == BFTW_UNKNOWN) {
+ struct stat sb;
+ if (fstatat(dirfd(dir), de->d_name, &sb, AT_SYMLINK_NOFOLLOW) == 0) {
+ switch (sb.st_mode & S_IFMT) {
+ case S_IFDIR:
+ typeflag = BFTW_D;
+ break;
+ case S_IFREG:
+ typeflag = BFTW_R;
+ break;
+ case S_IFLNK:
+ typeflag = BFTW_SL;
+ break;
+ }
+ }
+ }
+
+ int action = fn(path.str, typeflag, ptr);
+
+ switch (action) {
+ case BFTW_CONTINUE:
+ if (typeflag != BFTW_D) {
+ break;
+ }
+
+ dircache_entry *next = dircache_add(&cache, current, de->d_name);
+ if (!next) {
+ goto done;
+ }
+
+ if (dirqueue_push(&queue, next) != 0) {
+ goto done;
+ }
+ break;
+
+ case BFTW_SKIP_SIBLINGS:
+ goto next;
+
+ case BFTW_SKIP_SUBTREE:
+ break;
+
+ case BFTW_STOP:
+ ret = 0;
+ goto done;
+
+ default:
+ errno = EINVAL;
+ goto done;
+ }
+ }
+
+ next:
+ dircache_entry_free(&cache, current);
+ current = dirqueue_pop(&queue);
+ } while (current);
+
+ ret = 0;
+done:
+ err = errno;
+
+ while (current) {
+ dircache_entry_free(&cache, current);
+ current = dirqueue_pop(&queue);
+ }
+
+ dynstr_free(&path);
+
+ errno = err;
+ return ret;
+}
diff --git a/bftw.h b/bftw.h
new file mode 100644
index 0000000..be538a8
--- /dev/null
+++ b/bftw.h
@@ -0,0 +1,62 @@
+/*********************************************************************
+ * bfs *
+ * Copyright (C) 2015 Tavian Barnes <tavianator@tavianator.com> *
+ * *
+ * This program is free software. It comes without any warranty, to *
+ * the extent permitted by applicable law. You can redistribute it *
+ * and/or modify it under the terms of the Do What The Fuck You Want *
+ * To Public License, Version 2, as published by Sam Hocevar. See *
+ * the COPYING file or http://www.wtfpl.net/ for more details. *
+ *********************************************************************/
+
+/**
+ * Callback function type for bftw().
+ *
+ * @param fpath
+ * The path to the encountered file.
+ * @param typeflag
+ * A typeflag value (see below).
+ * @param ptr
+ * The pointer passed to bftw().
+ * @return
+ * An action value (see below).
+ */
+typedef int bftw_fn(const char *fpath, int typeflag, void *ptr);
+
+/**
+ * Breadth First Tree Walk (or Better File Tree Walk).
+ *
+ * Like ftw(3) and nftw(3), this function walks a directory tree recursively,
+ * and invokes a callback for each path it encounters. However, bftw() operates
+ * breadth-first.
+ *
+ * @param dirpath
+ * The starting path.
+ * @param fn
+ * The callback to invoke.
+ * @param nopenfd
+ * The maximum number of file descriptors to keep open.
+ * @param ptr
+ * A generic pointer which is passed to fn().
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int bftw(const char *dirpath, bftw_fn *fn, int nopenfd, void *ptr);
+
+/** typeflag: Directory. */
+#define BFTW_D 0
+/** typeflag: Regular file. */
+#define BFTW_R 1
+/** typeflag: Symbolic link. */
+#define BFTW_SL 2
+/** typeflag: Unknown type. */
+#define BFTW_UNKNOWN 3
+
+/** action: Keep walking. */
+#define BFTW_CONTINUE 0
+/** action: Skip this path's siblings. */
+#define BFTW_SKIP_SIBLINGS 1
+/** action: Skip this path's children. */
+#define BFTW_SKIP_SUBTREE 2
+/** action: Stop walking. */
+#define BFTW_STOP 3