summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2016-02-19 20:15:26 -0500
committerTavian Barnes <tavianator@tavianator.com>2016-02-19 20:15:26 -0500
commit8ac7f4f334668a9ad966f36bcf125e1b7652af80 (patch)
treea5e37b07e215aecc2dda5e1f70892d5cda304381
parentcf9fd80ece99d69cc809daf75f53705dbf64ea0e (diff)
downloadbfs-8ac7f4f334668a9ad966f36bcf125e1b7652af80.tar.xz
bftw: Don't keep DIR*'s around.
DIR*'s were being kept around so dirfd(dir) could be passed to future openat() calls. But DIR*'s are big, holding a cache of filenames etc. read by readdir(). Instead, store the raw fd and dup() it to open a DIR* with fdopendir(). This way we can call dirclose() as soon as possible, while still keeping an open fd. Ideally there would be a way to closedir() without invoking close() on the underlying fd, but this is a good approximation. Reduces memory footprint by around 64% in a large directory tree.
-rw-r--r--bftw.c115
1 files changed, 75 insertions, 40 deletions
diff --git a/bftw.c b/bftw.c
index 466a9d8..eb745fe 100644
--- a/bftw.c
+++ b/bftw.c
@@ -100,8 +100,8 @@ struct dircache_entry {
/** Next node in the LRU list. */
struct dircache_entry *lru_next;
- /** The DIR pointer, if open. */
- DIR *dir;
+ /** An open file descriptor to this directory, or 0. */
+ int fd;
/** Reference count. */
size_t refcount;
@@ -165,7 +165,7 @@ static struct dircache_entry *dircache_add(struct dircache *cache, struct dircac
}
entry->lru_prev = entry->lru_next = NULL;
- entry->dir = NULL;
+ entry->fd = 0;
entry->refcount = 1;
memcpy(entry->name, name, namelen);
@@ -185,7 +185,7 @@ static struct dircache_entry *dircache_add(struct dircache *cache, struct dircac
/** Add an entry to the head of the LRU list. */
static void dircache_lru_add(struct dircache *cache, struct dircache_entry *entry) {
- assert(entry->dir);
+ assert(entry->fd);
assert(entry->lru_prev == NULL);
assert(entry->lru_next == NULL);
@@ -229,22 +229,8 @@ static void dircache_lru_remove(struct dircache *cache, struct dircache_entry *e
/** Close a dircache_entry and remove it from the LRU list. */
static void dircache_entry_close(struct dircache *cache, struct dircache_entry *entry) {
dircache_lru_remove(cache, entry);
- closedir(entry->dir);
- entry->dir = NULL;
-}
-
-/** POSIX doesn't have this?! */
-static DIR *opendirat(int fd, const char *name) {
- int dfd = openat(fd, name, O_DIRECTORY);
- if (dfd < 0) {
- return NULL;
- }
-
- DIR *dir = fdopendir(dfd);
- if (!dir) {
- close(dfd);
- }
- return dir;
+ close(entry->fd);
+ entry->fd = 0;
}
/**
@@ -295,13 +281,13 @@ static struct dircache_entry *dircache_entry_base(struct dircache *cache, struct
do {
base = base->parent;
- } while (base && !base->dir);
+ } while (base && !base->fd);
if (base) {
dircache_lru_remove(cache, base);
dircache_lru_add(cache, base);
- *at_fd = dirfd(base->dir);
+ *at_fd = base->fd;
*at_path += base->nameoff + base->namelen;
}
@@ -309,6 +295,25 @@ static struct dircache_entry *dircache_entry_base(struct dircache *cache, struct
}
/**
+ * Check if we should retry an operation due to EMFILE.
+ *
+ * @param cache
+ * The cache in question.
+ * @param save
+ * A dircache_entry that must be preserved.
+ */
+static bool dircache_should_retry(struct dircache *cache, const struct dircache_entry *save) {
+ if (errno == EMFILE && cache->lru_tail && cache->lru_tail != save) {
+ // Too many open files, shrink the LRU cache
+ dircache_entry_close(cache, cache->lru_tail);
+ --cache->lru_remaining;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/**
* Open a dircache_entry.
*
* @param cache
@@ -321,33 +326,51 @@ static struct dircache_entry *dircache_entry_base(struct dircache *cache, struct
* The opened DIR *, or NULL on error.
*/
static DIR *dircache_entry_open(struct dircache *cache, struct dircache_entry *entry, const char *path) {
- assert(!entry->dir);
+ assert(!entry->fd);
- if (cache->lru_remaining == 0) {
- dircache_entry_close(cache, cache->lru_tail);
- }
+ DIR *dir = NULL;
int at_fd = AT_FDCWD;
const char *at_path = path;
struct dircache_entry *base = dircache_entry_base(cache, entry, &at_fd, &at_path);
- DIR *dir = opendirat(at_fd, at_path);
-
- if (!dir
- && errno == EMFILE
- && cache->lru_tail
- && cache->lru_tail != base) {
- // Too many open files, shrink the LRU cache
+ if (cache->lru_remaining == 0) {
dircache_entry_close(cache, cache->lru_tail);
- --cache->lru_remaining;
- dir = opendirat(at_fd, at_path);
}
- if (dir) {
- entry->dir = dir;
- dircache_lru_add(cache, entry);
+ int flags = O_DIRECTORY;
+ int fd = openat(at_fd, at_path, flags);
+
+ if (fd < 0 && dircache_should_retry(cache, base)) {
+ fd = openat(at_fd, at_path, flags);
}
+ if (fd < 0) {
+ goto done;
+ }
+
+ entry->fd = fd;
+ dircache_lru_add(cache, entry);
+
+ // Now we dup() the fd and pass it to fdopendir(). This way we can
+ // close the DIR* as soon as we're done with it, reducing the memory
+ // footprint significantly, while keeping the fd around for future
+ // openat() calls.
+ fd = dup(entry->fd);
+
+ if (fd < 0 && dircache_should_retry(cache, entry)) {
+ fd = dup(entry->fd);
+ }
+ if (fd < 0) {
+ goto done;
+ }
+
+ dir = fdopendir(fd);
+ if (!dir) {
+ close(fd);
+ }
+
+done:
return dir;
}
@@ -356,9 +379,10 @@ static void dircache_entry_free(struct dircache *cache, struct dircache_entry *e
if (entry) {
assert(entry->refcount == 0);
- if (entry->dir) {
+ if (entry->fd) {
dircache_entry_close(cache, entry);
}
+
free(entry);
}
}
@@ -556,7 +580,12 @@ static void bftw_state_init(struct bftw_state *state, bftw_fn *fn, int nopenfd,
state->error = 0;
- dircache_init(&state->cache, nopenfd);
+ size_t lru_size = nopenfd;
+ if (lru_size > 1) {
+ // 1 extra to account for dup()
+ --lru_size;
+ }
+ dircache_init(&state->cache, lru_size);
dirqueue_init(&state->queue);
state->current = NULL;
@@ -890,15 +919,18 @@ int bftw(const char *path, bftw_fn *fn, int nopenfd, enum bftw_flags flags, void
break;
case BFTW_SKIP_SIBLINGS:
+ closedir(dir);
goto next;
case BFTW_SKIP_SUBTREE:
continue;
case BFTW_STOP:
+ closedir(dir);
goto done;
case BFTW_FAIL:
+ closedir(dir);
goto fail;
}
@@ -911,11 +943,14 @@ int bftw(const char *path, bftw_fn *fn, int nopenfd, enum bftw_flags flags, void
}
if (bftw_push(&state, de->d_name) != 0) {
+ closedir(dir);
goto fail;
}
}
}
+ closedir(dir);
+
next:
switch (bftw_pop(&state, true)) {
case BFTW_CONTINUE: