summaryrefslogtreecommitdiffstats
path: root/src/trie.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/trie.c')
-rw-r--r--src/trie.c138
1 files changed, 74 insertions, 64 deletions
diff --git a/src/trie.c b/src/trie.c
index 7504d53..a7498d4 100644
--- a/src/trie.c
+++ b/src/trie.c
@@ -81,21 +81,23 @@
* and insert intermediate singleton "jump" nodes when necessary.
*/
-#include "prelude.h"
#include "trie.h"
+
#include "alloc.h"
+#include "bfs.h"
#include "bit.h"
#include "diag.h"
#include "list.h"
+
#include <stdint.h>
#include <string.h>
-bfs_static_assert(CHAR_WIDTH == 8);
+static_assert(CHAR_WIDTH == 8, "This trie implementation assumes 8-bit bytes.");
#if __i386__ || __x86_64__
-# define trie_clones attr(target_clones("popcnt", "default"))
+# define _trie_clones _target_clones("popcnt", "default")
#else
-# define trie_clones
+# define _trie_clones
#endif
/** Number of bits for the sparse array bitmap, aka the range of a nibble. */
@@ -130,34 +132,34 @@ struct trie_node {
uintptr_t children[];
};
-/** Check if an encoded pointer is to a leaf. */
-static bool trie_is_leaf(uintptr_t ptr) {
+/** Check if an encoded pointer is to an internal node. */
+static bool trie_is_node(uintptr_t ptr) {
return ptr & 1;
}
-/** Decode a pointer to a leaf. */
-static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
- bfs_assert(trie_is_leaf(ptr));
- return (struct trie_leaf *)(ptr ^ 1);
+/** Decode a pointer to an internal node. */
+static struct trie_node *trie_decode_node(uintptr_t ptr) {
+ bfs_assert(trie_is_node(ptr));
+ return (struct trie_node *)(ptr - 1);
}
-/** Encode a pointer to a leaf. */
-static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
- uintptr_t ptr = (uintptr_t)leaf ^ 1;
- bfs_assert(trie_is_leaf(ptr));
+/** Encode a pointer to an internal node. */
+static uintptr_t trie_encode_node(const struct trie_node *node) {
+ uintptr_t ptr = (uintptr_t)node + 1;
+ bfs_assert(trie_is_node(ptr));
return ptr;
}
-/** Decode a pointer to an internal node. */
-static struct trie_node *trie_decode_node(uintptr_t ptr) {
- bfs_assert(!trie_is_leaf(ptr));
- return (struct trie_node *)ptr;
+/** Decode a pointer to a leaf. */
+static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
+ bfs_assert(!trie_is_node(ptr));
+ return (struct trie_leaf *)ptr;
}
-/** Encode a pointer to an internal node. */
-static uintptr_t trie_encode_node(const struct trie_node *node) {
- uintptr_t ptr = (uintptr_t)node;
- bfs_assert(!trie_is_leaf(ptr));
+/** Encode a pointer to a leaf. */
+static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
+ uintptr_t ptr = (uintptr_t)leaf;
+ bfs_assert(!trie_is_node(ptr));
return ptr;
}
@@ -169,9 +171,10 @@ void trie_init(struct trie *trie) {
}
/** Extract the nibble at a certain offset from a byte sequence. */
-static unsigned char trie_key_nibble(const void *key, size_t offset) {
+static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) {
const unsigned char *bytes = key;
size_t byte = offset >> 1;
+ bfs_assert(byte < length);
// A branchless version of
// if (offset & 1) {
@@ -183,6 +186,11 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) {
return (bytes[byte] >> shift) & 0xF;
}
+/** Extract the nibble at a certain offset from a leaf. */
+static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offset) {
+ return trie_key_nibble(leaf->key, leaf->length, offset);
+}
+
/**
* Finds a leaf in the trie that matches the key at every branch. If the key
* exists in the trie, the representative will match the searched key. But
@@ -190,21 +198,18 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) {
* that case, the first mismatch between the key and the representative will be
* the depth at which to make a new branch to insert the key.
*/
-trie_clones
+_trie_clones
static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
uintptr_t ptr = trie->root;
- if (!ptr) {
- return NULL;
- }
size_t offset = 0;
- while (!trie_is_leaf(ptr)) {
+ while (trie_is_node(ptr)) {
struct trie_node *node = trie_decode_node(ptr);
offset += node->offset;
unsigned int index = 0;
if ((offset >> 1) < length) {
- unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned char nibble = trie_key_nibble(key, length, offset);
unsigned int bit = 1U << nibble;
// bits = bitmap & bit ? bitmap & (bit - 1) : 0
unsigned int mask = -!!(node->bitmap & bit);
@@ -221,7 +226,7 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) {
return trie_find_mem(trie, key, strlen(key) + 1);
}
-trie_clones
+_trie_clones
static struct trie_leaf *trie_find_mem_impl(const struct trie *trie, const void *key, size_t length) {
struct trie_leaf *rep = trie_representative(trie, key, length);
if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) {
@@ -235,7 +240,7 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t
return trie_find_mem_impl(trie, key, length);
}
-trie_clones
+_trie_clones
static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) {
size_t length = strlen(key);
struct trie_leaf *rep = trie_representative(trie, key, length + 1);
@@ -261,10 +266,10 @@ static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) {
}
uintptr_t ptr = node->children[0];
- if (trie_is_leaf(ptr)) {
- return trie_decode_leaf(ptr);
- } else {
+ if (trie_is_node(ptr)) {
node = trie_decode_node(ptr);
+ } else {
+ return trie_decode_leaf(ptr);
}
}
@@ -280,7 +285,7 @@ static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *k
}
}
-trie_clones
+_trie_clones
static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) {
uintptr_t ptr = trie->root;
if (!ptr) {
@@ -292,7 +297,7 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch
size_t length = strlen(key) + 1;
size_t offset = 0;
- while (!trie_is_leaf(ptr)) {
+ while (trie_is_node(ptr)) {
struct trie_node *node = trie_decode_node(ptr);
offset += node->offset;
if ((offset >> 1) >= length) {
@@ -305,7 +310,7 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch
skip = offset >> 1;
}
- unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned char nibble = trie_key_nibble(key, length, offset);
unsigned int bit = 1U << nibble;
if (node->bitmap & bit) {
unsigned int index = count_ones(node->bitmap & (bit - 1));
@@ -438,7 +443,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
* | Z
* +--->...
*/
-trie_clones
+_trie_clones
static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) {
struct trie_node *node = trie_decode_node(*ptr);
unsigned int size = count_ones(node->bitmap);
@@ -492,10 +497,10 @@ static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, str
* | Y
* +--->key
*/
-static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) {
+static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, size_t *offset) {
// We only ever need to jump to leaf nodes, since internal nodes are
// guaranteed to be within OFFSET_MAX anyway
- bfs_assert(trie_is_leaf(*ptr));
+ struct trie_leaf *leaf = trie_decode_leaf(*ptr);
struct trie_node *node = trie_node_alloc(trie, 1);
if (!node) {
@@ -505,7 +510,7 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key,
*offset += OFFSET_MAX;
node->offset = OFFSET_MAX;
- unsigned char nibble = trie_key_nibble(key, *offset);
+ unsigned char nibble = trie_leaf_nibble(leaf, *offset);
node->bitmap = 1 << nibble;
node->children[0] = *ptr;
@@ -531,8 +536,8 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key,
* +--->leaf
*/
static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) {
- unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch);
- unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch);
+ unsigned char key_nibble = trie_leaf_nibble(leaf, mismatch);
+ unsigned char rep_nibble = trie_leaf_nibble(rep, mismatch);
bfs_assert(key_nibble != rep_nibble);
struct trie_node *node = trie_node_alloc(trie, 2);
@@ -544,7 +549,7 @@ static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct tr
node->bitmap = (1 << key_nibble) | (1 << rep_nibble);
size_t delta = mismatch - offset;
- if (!trie_is_leaf(*ptr)) {
+ if (trie_is_node(*ptr)) {
struct trie_node *child = trie_decode_node(*ptr);
child->offset -= delta;
}
@@ -561,12 +566,18 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) {
return trie_insert_mem(trie, key, strlen(key) + 1);
}
-trie_clones
+_trie_clones
static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) {
struct trie_leaf *rep = trie_representative(trie, key, length);
size_t mismatch = trie_mismatch(rep, key, length);
- if (mismatch >= (length << 1)) {
+ size_t misbyte = mismatch / 2;
+ if (misbyte >= length) {
+ bfs_assert(misbyte == length);
return rep;
+ } else if (rep && misbyte >= rep->length) {
+ bfs_bug("trie keys must be prefix-free");
+ errno = EINVAL;
+ return NULL;
}
struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length);
@@ -581,14 +592,14 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key
size_t offset = 0;
uintptr_t *ptr = &trie->root;
- while (!trie_is_leaf(*ptr)) {
+ while (trie_is_node(*ptr)) {
struct trie_node *node = trie_decode_node(*ptr);
if (offset + node->offset > mismatch) {
break;
}
offset += node->offset;
- unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned char nibble = trie_leaf_nibble(leaf, offset);
unsigned int bit = 1U << nibble;
if (node->bitmap & bit) {
bfs_assert(offset < mismatch);
@@ -601,7 +612,7 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key
}
while (mismatch - offset > OFFSET_MAX) {
- ptr = trie_jump(trie, ptr, key, &offset);
+ ptr = trie_jump(trie, ptr, &offset);
if (!ptr) {
trie_leaf_free(trie, leaf);
return NULL;
@@ -617,11 +628,11 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len
/** Free a chain of singleton nodes. */
static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
- while (!trie_is_leaf(ptr)) {
+ while (trie_is_node(ptr)) {
struct trie_node *node = trie_decode_node(ptr);
// Make sure the bitmap is a power of two, i.e. it has just one child
- bfs_assert(has_single_bit(node->bitmap));
+ bfs_assert(has_single_bit((size_t)node->bitmap));
ptr = node->children[0];
trie_node_free(trie, node, 1);
@@ -649,7 +660,7 @@ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
*/
static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) {
uintptr_t other = parent_node->children[child_index ^ 1];
- if (!trie_is_leaf(other)) {
+ if (trie_is_node(other)) {
struct trie_node *other_node = trie_decode_node(other);
if (other_node->offset + parent_node->offset <= OFFSET_MAX) {
other_node->offset += parent_node->offset;
@@ -659,22 +670,21 @@ static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_
}
*parent = other;
- trie_node_free(trie, parent_node, 1);
+ trie_node_free(trie, parent_node, 2);
return 0;
}
-trie_clones
+_trie_clones
static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
uintptr_t *child = &trie->root;
uintptr_t *parent = NULL;
unsigned int child_bit = 0, child_index = 0;
size_t offset = 0;
- while (!trie_is_leaf(*child)) {
+ while (trie_is_node(*child)) {
struct trie_node *node = trie_decode_node(*child);
offset += node->offset;
- bfs_assert((offset >> 1) < leaf->length);
- unsigned char nibble = trie_key_nibble(leaf->key, offset);
+ unsigned char nibble = trie_leaf_nibble(leaf, offset);
unsigned int bit = 1U << nibble;
unsigned int bitmap = node->bitmap;
bfs_assert(bitmap & bit);
@@ -699,19 +709,19 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
}
struct trie_node *node = trie_decode_node(*parent);
- child = node->children + child_index;
- trie_free_singletons(trie, *child);
+ trie_free_singletons(trie, node->children[child_index]);
- node->bitmap ^= child_bit;
unsigned int parent_size = count_ones(node->bitmap);
- bfs_assert(parent_size > 0);
- if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) {
+ bfs_assert(parent_size > 1);
+ if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) {
return;
}
- if (child_index < parent_size) {
- memmove(child, child + 1, (parent_size - child_index) * sizeof(*child));
+ for (size_t i = child_index; i + 1 < parent_size; ++i) {
+ node->children[i] = node->children[i + 1];
}
+ node->bitmap &= ~child_bit;
+ --parent_size;
if (has_single_bit(parent_size)) {
node = trie_node_realloc(trie, node, 2 * parent_size, parent_size);