summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2019-03-01 21:53:13 -0800
committerTavian Barnes <tavianator@tavianator.com>2019-03-04 23:34:39 -0800
commit8ec4f46f31b1d2f71d193952e595e2037fe7f22d (patch)
tree3e94a9a405587319168ba58558cc1d1681dabb85
parentb921fb2f43869ed12822ee99a80ccba9fd67c640 (diff)
downloadbfs-8ec4f46f31b1d2f71d193952e595e2037fe7f22d.tar.xz
trie: Revamp the API to support mappings
-rw-r--r--eval.c13
-rw-r--r--trie.c217
-rw-r--r--trie.h66
3 files changed, 127 insertions, 169 deletions
diff --git a/eval.c b/eval.c
index 432adfc..d4097ef 100644
--- a/eval.c
+++ b/eval.c
@@ -1147,14 +1147,17 @@ static bool eval_file_unique(struct eval_state *state, struct trie *seen) {
memcpy(id, &statbuf->dev, sizeof(statbuf->dev));
memcpy(id + sizeof(statbuf->dev), &statbuf->ino, sizeof(statbuf->ino));
- int ret = trie_meminsert(seen, id, sizeof(id));
- if (ret > 0) {
- state->action = BFTW_SKIP_SUBTREE;
- return false;
- } else if (ret < 0) {
+ struct trie_leaf *leaf = trie_insert_mem(seen, id, sizeof(id));
+ if (!leaf) {
eval_report_error(state);
return false;
+ }
+
+ if (leaf->value) {
+ state->action = BFTW_SKIP_SUBTREE;
+ return false;
} else {
+ leaf->value = leaf;
return true;
}
}
diff --git a/trie.c b/trie.c
index 75acd22..fffa4ac 100644
--- a/trie.c
+++ b/trie.c
@@ -96,7 +96,7 @@
#define OFFSET_MAX (((size_t)1 << OFFSET_BITS) - 1)
/**
- * A node in the trie.
+ * An internal node of the trie.
*/
struct trie_node {
/**
@@ -126,13 +126,13 @@ static bool trie_is_leaf(uintptr_t ptr) {
}
/** Decode a pointer to a leaf. */
-static void *trie_decode_leaf(uintptr_t ptr) {
+static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
assert(trie_is_leaf(ptr));
- return (void *)(ptr ^ 1);
+ return (struct trie_leaf *)(ptr ^ 1);
}
/** Encode a pointer to a leaf. */
-static uintptr_t trie_encode_leaf(const void *leaf) {
+static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
uintptr_t ptr = (uintptr_t)leaf ^ 1;
assert(trie_is_leaf(ptr));
return ptr;
@@ -186,8 +186,14 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) {
return (bytes[byte] >> shift) & 0xF;
}
-/** Find a leaf that might be the given key, if present. */
-static const void *trie_search(const struct trie *trie, const void *key, size_t size) {
+/**
+ * Finds a leaf in the trie that matches the key at every branch. If the key
+ * exists in the trie, the representative will match the searched key. But
+ * since only branch points are tested, it can be different from the key. In
+ * that case, the first mismatch between the key and the representative will be
+ * the depth at which to make a new branch to insert the key.
+ */
+static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
uintptr_t ptr = trie->root;
if (!ptr) {
return NULL;
@@ -197,45 +203,44 @@ static const void *trie_search(const struct trie *trie, const void *key, size_t
while (!trie_is_leaf(ptr)) {
struct trie_node *node = trie_decode_node(ptr);
offset += node->offset;
- if ((offset >> 1) >= size) {
- return NULL;
- }
- unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned char nibble = 0;
+ if ((offset >> 1) < length) {
+ nibble = trie_key_nibble(key, offset);
+ }
unsigned int bit = 1U << nibble;
+ unsigned int index = 0;
if (node->bitmap & bit) {
- unsigned int index = trie_popcount(node->bitmap & (bit - 1));
- ptr = node->children[index];
- } else {
- return NULL;
+ index = trie_popcount(node->bitmap & (bit - 1));
}
+ ptr = node->children[index];
}
return trie_decode_leaf(ptr);
}
-bool trie_strsearch(const struct trie *trie, const char *key) {
- const char *rep = trie_search(trie, key, strlen(key) + 1);
- return rep && strcmp(key, rep) == 0;
+struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) {
+ return trie_find_mem(trie, key, strlen(key) + 1);
}
-bool trie_strnsearch(const struct trie *trie, const char *key, size_t size) {
- const char *rep = trie_search(trie, key, size);
- return rep && strncmp(key, rep, size) == 0;
-}
-
-bool trie_memsearch(const struct trie *trie, const void *key, size_t size) {
- const void *rep = trie_search(trie, key, size);
- return rep && memcmp(key, rep, size) == 0;
+struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) {
+ struct trie_leaf *rep = trie_representative(trie, key, length);
+ if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) {
+ return rep;
+ } else {
+ return NULL;
+ }
}
/** Create a new leaf, holding a copy of the given key. */
-static uintptr_t new_trie_leaf(const void *key, size_t size) {
- void *leaf = malloc(size);
+static struct trie_leaf *new_trie_leaf(const void *key, size_t length) {
+ struct trie_leaf *leaf = malloc(sizeof(*leaf) + length);
if (leaf) {
- memcpy(leaf, key, size);
+ leaf->value = NULL;
+ leaf->length = length;
+ memcpy(leaf->key, key, length);
}
- return trie_encode_leaf(leaf);
+ return leaf;
}
/** Compute the size of a trie node with a certain number of children. */
@@ -248,50 +253,21 @@ static size_t trie_node_size(unsigned int size) {
return sizeof(struct trie_node) + size*sizeof(uintptr_t);
}
-/**
- * Finds a leaf in the trie that matches the key at every branch. If the key
- * exists in the trie, the representative will match the searched key. But
- * since only branch points are tested, it can be different from the key. In
- * that case, the first mismatch between the key and the representative will be
- * the depth at which to make a new branch to insert the key.
- */
-static const void *trie_representative(const struct trie *trie, const void *key, size_t size) {
- size_t offset = 0;
- uintptr_t ptr = trie->root;
- while (!trie_is_leaf(ptr)) {
- struct trie_node *node = trie_decode_node(ptr);
- offset += node->offset;
-
- unsigned char nibble = 0;
- if ((offset >> 1) < size) {
- nibble = trie_key_nibble(key, offset);
- }
- unsigned int bit = 1U << nibble;
- unsigned int index = 0;
- if (node->bitmap & bit) {
- index = trie_popcount(node->bitmap & (bit - 1));
- }
- ptr = node->children[index];
- }
-
- return trie_decode_leaf(ptr);
-}
-
/** Find the offset of the first nibble that differs between two keys. */
-static size_t trie_key_mismatch(const void *key1, const void *key2, size_t size) {
+static size_t trie_key_mismatch(const void *key1, const void *key2, size_t length) {
const unsigned char *bytes1 = key1;
const unsigned char *bytes2 = key2;
size_t i = 0;
size_t offset = 0;
const size_t chunk = sizeof(size_t);
- for (; i + chunk <= size; i += chunk) {
+ for (; i + chunk <= length; i += chunk) {
if (memcmp(bytes1 + i, bytes2 + i, chunk) != 0) {
break;
}
}
- for (; i < size; ++i) {
+ for (; i < length; ++i) {
unsigned char b1 = bytes1[i], b2 = bytes2[i];
if (b1 != b2) {
offset = (b1 & 0xF) == (b2 & 0xF);
@@ -325,7 +301,7 @@ static size_t trie_key_mismatch(const void *key1, const void *key2, size_t size)
* | Z
* +--->...
*/
-static int trie_node_insert(uintptr_t *ptr, const void *key, size_t key_size, size_t offset) {
+static struct trie_leaf *trie_node_insert(uintptr_t *ptr, const void *key, size_t length, size_t offset) {
struct trie_node *node = trie_decode_node(*ptr);
unsigned int size = trie_popcount(node->bitmap);
@@ -333,14 +309,14 @@ static int trie_node_insert(uintptr_t *ptr, const void *key, size_t key_size, si
if ((size & (size - 1)) == 0) {
node = realloc(node, trie_node_size(2*size));
if (!node) {
- return -1;
+ return NULL;
}
*ptr = trie_encode_node(node);
}
- uintptr_t leaf = new_trie_leaf(key, key_size);
+ struct trie_leaf *leaf = new_trie_leaf(key, length);
if (!leaf) {
- return -1;
+ return NULL;
}
unsigned char nibble = trie_key_nibble(key, offset);
@@ -355,8 +331,8 @@ static int trie_node_insert(uintptr_t *ptr, const void *key, size_t key_size, si
if (index < size) {
memmove(child + 1, child, (size - index)*sizeof(*child));
}
- *child = leaf;
- return 0;
+ *child = trie_encode_leaf(leaf);
+ return leaf;
}
/**
@@ -370,21 +346,21 @@ static int trie_node_insert(uintptr_t *ptr, const void *key, size_t key_size, si
*
* and changes it to:
*
- * ptr
- * |
- * v
- * *--->*--->rep
+ * ptr ret
+ * | |
+ * v v
+ * *--->*--->rep
*
* so that a new key can be inserted like:
*
- * ptr
- * |
- * v X
- * *--->*--->rep
- * | Y
- * +--->key
+ * ptr ret
+ * | |
+ * v v X
+ * *--->*--->rep
+ * | Y
+ * +--->key
*/
-static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t size, size_t *offset) {
+static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t *offset) {
// We only ever need to jump to leaf nodes, since internal nodes are
// guaranteed to be within OFFSET_MAX anyway
assert(trie_is_leaf(*ptr));
@@ -422,20 +398,20 @@ static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t size, size_t
* | Y
* +--->key
*/
-static int trie_split(uintptr_t *ptr, const void *key, size_t size, const void *rep, size_t offset, size_t mismatch) {
+static struct trie_leaf *trie_split(uintptr_t *ptr, const void *key, size_t length, struct trie_leaf *rep, size_t offset, size_t mismatch) {
unsigned char key_nibble = trie_key_nibble(key, mismatch);
- unsigned char rep_nibble = trie_key_nibble(rep, mismatch);
+ unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch);
assert(key_nibble != rep_nibble);
struct trie_node *node = malloc(trie_node_size(2));
if (!node) {
- return -1;
+ return NULL;
}
- uintptr_t leaf = new_trie_leaf(key, size);
+ struct trie_leaf *leaf = new_trie_leaf(key, length);
if (!leaf) {
free(node);
- return -1;
+ return NULL;
}
node->bitmap = (1 << key_nibble) | (1 << rep_nibble);
@@ -448,32 +424,30 @@ static int trie_split(uintptr_t *ptr, const void *key, size_t size, const void *
node->offset = delta;
unsigned int key_index = key_nibble > rep_nibble;
- node->children[key_index] = leaf;
+ node->children[key_index] = trie_encode_leaf(leaf);
node->children[key_index ^ 1] = *ptr;
*ptr = trie_encode_node(node);
- return 0;
+ return leaf;
}
-/**
- * Insert a key into the trie.
- *
- * @param trie
- * The trie to modify.
- * @param key
- * The key to insert.
- * @param size
- * The size of the key in bytes.
- * @param rep
- * The representative for the key.
- * @param mismatch
- * The index of the first mismatched nibble between the key and the
- * representative.
- * @return
- * 0 on success, 1 if the key was already present, or -1 on error.
- */
-static int trie_insert(struct trie *trie, const void *key, size_t size, const void *rep, size_t mismatch) {
- if ((mismatch >> 1) >= size) {
- return 1;
+struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) {
+ return trie_insert_mem(trie, key, strlen(key) + 1);
+}
+
+struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) {
+ struct trie_leaf *rep = trie_representative(trie, key, length);
+ if (!rep) {
+ struct trie_leaf *leaf = new_trie_leaf(key, length);
+ if (leaf) {
+ trie->root = trie_encode_leaf(leaf);
+ }
+ return leaf;
+ }
+
+ size_t limit = length < rep->length ? length : rep->length;
+ size_t mismatch = trie_key_mismatch(key, rep->key, limit);
+ if ((mismatch >> 1) >= length) {
+ return rep;
}
size_t offset = 0;
@@ -493,43 +467,18 @@ static int trie_insert(struct trie *trie, const void *key, size_t size, const vo
ptr = node->children + index;
} else {
assert(offset == mismatch);
- return trie_node_insert(ptr, key, size, offset);
+ return trie_node_insert(ptr, key, length, offset);
}
}
while (mismatch - offset > OFFSET_MAX) {
- ptr = trie_jump(ptr, key, size, &offset);
+ ptr = trie_jump(ptr, key, &offset);
if (!ptr) {
- return -1;
+ return NULL;
}
}
- return trie_split(ptr, key, size, rep, offset, mismatch);
-}
-
-int trie_strinsert(struct trie *trie, const char *key) {
- size_t size = strlen(key) + 1;
-
- if (!trie->root) {
- trie->root = new_trie_leaf(key, size);
- return trie->root ? 0 : -1;
- }
-
- const char *rep = trie_representative(trie, key, size);
- size_t limit = strnlen(rep, size - 1) + 1;
- size_t mismatch = trie_key_mismatch(key, rep, limit);
- return trie_insert(trie, key, size, rep, mismatch);
-}
-
-int trie_meminsert(struct trie *trie, const void *key, size_t size) {
- if (!trie->root) {
- trie->root = new_trie_leaf(key, size);
- return trie->root ? 0 : -1;
- }
-
- const void *rep = trie_representative(trie, key, size);
- size_t mismatch = trie_key_mismatch(key, rep, size);
- return trie_insert(trie, key, size, rep, mismatch);
+ return trie_split(ptr, key, length, rep, offset, mismatch);
}
/** Free an encoded pointer to a node. */
diff --git a/trie.h b/trie.h
index e81f80e..fd91f0c 100644
--- a/trie.h
+++ b/trie.h
@@ -22,82 +22,88 @@
#include <stdint.h>
/**
- * A trie that holds a set of fixed- or dynamic-length strings.
+ * A trie that holds a set of fixed- or variable-length strings.
*/
struct trie {
uintptr_t root;
};
/**
- * Initialize an empty trie.
+ * A leaf of a trie.
*/
-void trie_init(struct trie *trie);
+struct trie_leaf {
+ /**
+ * An arbitrary value associated with this leaf.
+ */
+ const void *value;
+
+ /**
+ * The length of the key in bytes.
+ */
+ size_t length;
+
+ /**
+ * The key itself, stored inline.
+ */
+ char key[];
+};
/**
- * Check if a NUL-terminated string key exists in a trie.
- *
- * @param trie
- * The trie to search.
- * @param key
- * The key to search for.
- * @return
- * Whether the key was found.
+ * Initialize an empty trie.
*/
-bool trie_strsearch(const struct trie *trie, const char *key);
+void trie_init(struct trie *trie);
/**
- * Check if a prefix of a NUL-terminated string key exists in a trie.
+ * Find the leaf for a string key.
*
* @param trie
* The trie to search.
* @param key
- * The key to search for.
- * @param size
- * The length of the prefix to consider, in bytes.
+ * The key to look up.
* @return
- * Whether the key was found.
+ * The found leaf, or NULL if the key is not present.
*/
-bool trie_strnsearch(const struct trie *trie, const char *key, size_t size);
+struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
/**
- * Check if a fixed-length key exists in a trie.
+ * Find the leaf for a fixed-size key.
*
* @param trie
* The trie to search.
* @param key
- * The key to search for.
- * @param size
+ * The key to look up.
+ * @param length
* The length of the key in bytes.
* @return
- * Whether the key was found.
+ * The found leaf, or NULL if the key is not present.
*/
-bool trie_memsearch(const struct trie *trie, const void *key, size_t size);
+struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length);
/**
- * Insert a NUL-terminated string key in the trie.
+ * Insert a string key into the trie.
*
* @param trie
* The trie to modify.
* @param key
* The key to insert.
* @return
- * 0 on success, 1 if the key was already present, or -1 on error.
+ * The inserted leaf, or NULL on failure.
*/
-int trie_strinsert(struct trie *trie, const char *key);
+struct trie_leaf *trie_insert_str(struct trie *trie, const char *key);
/**
- * Insert a fixed-length key in the trie.
+ * Insert a fixed-size key into the trie.
*
* @param trie
* The trie to modify.
* @param key
* The key to insert.
- * @param size
+ * @param length
* The length of the key in bytes.
* @return
- * 0 on success, 1 if the key was already present, or -1 on error.
+ * The inserted leaf, or NULL on failure.
*/
-int trie_meminsert(struct trie *trie, const void *key, size_t size);
+struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length);
/**
* Destroy a trie and its contents.