diff options
Diffstat (limited to 'src/trie.c')
-rw-r--r-- | src/trie.c | 138 |
1 files changed, 74 insertions, 64 deletions
@@ -81,21 +81,23 @@ * and insert intermediate singleton "jump" nodes when necessary. */ -#include "prelude.h" #include "trie.h" + #include "alloc.h" +#include "bfs.h" #include "bit.h" #include "diag.h" #include "list.h" + #include <stdint.h> #include <string.h> -bfs_static_assert(CHAR_WIDTH == 8); +static_assert(CHAR_WIDTH == 8, "This trie implementation assumes 8-bit bytes."); #if __i386__ || __x86_64__ -# define trie_clones attr(target_clones("popcnt", "default")) +# define _trie_clones _target_clones("popcnt", "default") #else -# define trie_clones +# define _trie_clones #endif /** Number of bits for the sparse array bitmap, aka the range of a nibble. */ @@ -130,34 +132,34 @@ struct trie_node { uintptr_t children[]; }; -/** Check if an encoded pointer is to a leaf. */ -static bool trie_is_leaf(uintptr_t ptr) { +/** Check if an encoded pointer is to an internal node. */ +static bool trie_is_node(uintptr_t ptr) { return ptr & 1; } -/** Decode a pointer to a leaf. */ -static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { - bfs_assert(trie_is_leaf(ptr)); - return (struct trie_leaf *)(ptr ^ 1); +/** Decode a pointer to an internal node. */ +static struct trie_node *trie_decode_node(uintptr_t ptr) { + bfs_assert(trie_is_node(ptr)); + return (struct trie_node *)(ptr - 1); } -/** Encode a pointer to a leaf. */ -static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { - uintptr_t ptr = (uintptr_t)leaf ^ 1; - bfs_assert(trie_is_leaf(ptr)); +/** Encode a pointer to an internal node. */ +static uintptr_t trie_encode_node(const struct trie_node *node) { + uintptr_t ptr = (uintptr_t)node + 1; + bfs_assert(trie_is_node(ptr)); return ptr; } -/** Decode a pointer to an internal node. */ -static struct trie_node *trie_decode_node(uintptr_t ptr) { - bfs_assert(!trie_is_leaf(ptr)); - return (struct trie_node *)ptr; +/** Decode a pointer to a leaf. */ +static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { + bfs_assert(!trie_is_node(ptr)); + return (struct trie_leaf *)ptr; } -/** Encode a pointer to an internal node. */ -static uintptr_t trie_encode_node(const struct trie_node *node) { - uintptr_t ptr = (uintptr_t)node; - bfs_assert(!trie_is_leaf(ptr)); +/** Encode a pointer to a leaf. */ +static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { + uintptr_t ptr = (uintptr_t)leaf; + bfs_assert(!trie_is_node(ptr)); return ptr; } @@ -169,9 +171,10 @@ void trie_init(struct trie *trie) { } /** Extract the nibble at a certain offset from a byte sequence. */ -static unsigned char trie_key_nibble(const void *key, size_t offset) { +static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) { const unsigned char *bytes = key; size_t byte = offset >> 1; + bfs_assert(byte < length); // A branchless version of // if (offset & 1) { @@ -183,6 +186,11 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) { return (bytes[byte] >> shift) & 0xF; } +/** Extract the nibble at a certain offset from a leaf. */ +static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offset) { + return trie_key_nibble(leaf->key, leaf->length, offset); +} + /** * Finds a leaf in the trie that matches the key at every branch. If the key * exists in the trie, the representative will match the searched key. But @@ -190,21 +198,18 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) { * that case, the first mismatch between the key and the representative will be * the depth at which to make a new branch to insert the key. */ -trie_clones +_trie_clones static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) { uintptr_t ptr = trie->root; - if (!ptr) { - return NULL; - } size_t offset = 0; - while (!trie_is_leaf(ptr)) { + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; unsigned int index = 0; if ((offset >> 1) < length) { - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; // bits = bitmap & bit ? bitmap & (bit - 1) : 0 unsigned int mask = -!!(node->bitmap & bit); @@ -221,7 +226,7 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) { return trie_find_mem(trie, key, strlen(key) + 1); } -trie_clones +_trie_clones static struct trie_leaf *trie_find_mem_impl(const struct trie *trie, const void *key, size_t length) { struct trie_leaf *rep = trie_representative(trie, key, length); if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) { @@ -235,7 +240,7 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t return trie_find_mem_impl(trie, key, length); } -trie_clones +_trie_clones static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) { size_t length = strlen(key); struct trie_leaf *rep = trie_representative(trie, key, length + 1); @@ -261,10 +266,10 @@ static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) { } uintptr_t ptr = node->children[0]; - if (trie_is_leaf(ptr)) { - return trie_decode_leaf(ptr); - } else { + if (trie_is_node(ptr)) { node = trie_decode_node(ptr); + } else { + return trie_decode_leaf(ptr); } } @@ -280,7 +285,7 @@ static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *k } } -trie_clones +_trie_clones static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) { uintptr_t ptr = trie->root; if (!ptr) { @@ -292,7 +297,7 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch size_t length = strlen(key) + 1; size_t offset = 0; - while (!trie_is_leaf(ptr)) { + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; if ((offset >> 1) >= length) { @@ -305,7 +310,7 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch skip = offset >> 1; } - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; if (node->bitmap & bit) { unsigned int index = count_ones(node->bitmap & (bit - 1)); @@ -438,7 +443,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t * | Z * +--->... */ -trie_clones +_trie_clones static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) { struct trie_node *node = trie_decode_node(*ptr); unsigned int size = count_ones(node->bitmap); @@ -492,10 +497,10 @@ static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, str * | Y * +--->key */ -static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) { +static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, size_t *offset) { // We only ever need to jump to leaf nodes, since internal nodes are // guaranteed to be within OFFSET_MAX anyway - bfs_assert(trie_is_leaf(*ptr)); + struct trie_leaf *leaf = trie_decode_leaf(*ptr); struct trie_node *node = trie_node_alloc(trie, 1); if (!node) { @@ -505,7 +510,7 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, *offset += OFFSET_MAX; node->offset = OFFSET_MAX; - unsigned char nibble = trie_key_nibble(key, *offset); + unsigned char nibble = trie_leaf_nibble(leaf, *offset); node->bitmap = 1 << nibble; node->children[0] = *ptr; @@ -531,8 +536,8 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, * +--->leaf */ static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) { - unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch); - unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch); + unsigned char key_nibble = trie_leaf_nibble(leaf, mismatch); + unsigned char rep_nibble = trie_leaf_nibble(rep, mismatch); bfs_assert(key_nibble != rep_nibble); struct trie_node *node = trie_node_alloc(trie, 2); @@ -544,7 +549,7 @@ static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct tr node->bitmap = (1 << key_nibble) | (1 << rep_nibble); size_t delta = mismatch - offset; - if (!trie_is_leaf(*ptr)) { + if (trie_is_node(*ptr)) { struct trie_node *child = trie_decode_node(*ptr); child->offset -= delta; } @@ -561,12 +566,18 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) { return trie_insert_mem(trie, key, strlen(key) + 1); } -trie_clones +_trie_clones static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) { struct trie_leaf *rep = trie_representative(trie, key, length); size_t mismatch = trie_mismatch(rep, key, length); - if (mismatch >= (length << 1)) { + size_t misbyte = mismatch / 2; + if (misbyte >= length) { + bfs_assert(misbyte == length); return rep; + } else if (rep && misbyte >= rep->length) { + bfs_bug("trie keys must be prefix-free"); + errno = EINVAL; + return NULL; } struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length); @@ -581,14 +592,14 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key size_t offset = 0; uintptr_t *ptr = &trie->root; - while (!trie_is_leaf(*ptr)) { + while (trie_is_node(*ptr)) { struct trie_node *node = trie_decode_node(*ptr); if (offset + node->offset > mismatch) { break; } offset += node->offset; - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_leaf_nibble(leaf, offset); unsigned int bit = 1U << nibble; if (node->bitmap & bit) { bfs_assert(offset < mismatch); @@ -601,7 +612,7 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key } while (mismatch - offset > OFFSET_MAX) { - ptr = trie_jump(trie, ptr, key, &offset); + ptr = trie_jump(trie, ptr, &offset); if (!ptr) { trie_leaf_free(trie, leaf); return NULL; @@ -617,11 +628,11 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len /** Free a chain of singleton nodes. */ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { - while (!trie_is_leaf(ptr)) { + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); // Make sure the bitmap is a power of two, i.e. it has just one child - bfs_assert(has_single_bit(node->bitmap)); + bfs_assert(has_single_bit((size_t)node->bitmap)); ptr = node->children[0]; trie_node_free(trie, node, 1); @@ -649,7 +660,7 @@ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { */ static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) { uintptr_t other = parent_node->children[child_index ^ 1]; - if (!trie_is_leaf(other)) { + if (trie_is_node(other)) { struct trie_node *other_node = trie_decode_node(other); if (other_node->offset + parent_node->offset <= OFFSET_MAX) { other_node->offset += parent_node->offset; @@ -659,22 +670,21 @@ static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_ } *parent = other; - trie_node_free(trie, parent_node, 1); + trie_node_free(trie, parent_node, 2); return 0; } -trie_clones +_trie_clones static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { uintptr_t *child = &trie->root; uintptr_t *parent = NULL; unsigned int child_bit = 0, child_index = 0; size_t offset = 0; - while (!trie_is_leaf(*child)) { + while (trie_is_node(*child)) { struct trie_node *node = trie_decode_node(*child); offset += node->offset; - bfs_assert((offset >> 1) < leaf->length); - unsigned char nibble = trie_key_nibble(leaf->key, offset); + unsigned char nibble = trie_leaf_nibble(leaf, offset); unsigned int bit = 1U << nibble; unsigned int bitmap = node->bitmap; bfs_assert(bitmap & bit); @@ -699,19 +709,19 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { } struct trie_node *node = trie_decode_node(*parent); - child = node->children + child_index; - trie_free_singletons(trie, *child); + trie_free_singletons(trie, node->children[child_index]); - node->bitmap ^= child_bit; unsigned int parent_size = count_ones(node->bitmap); - bfs_assert(parent_size > 0); - if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) { + bfs_assert(parent_size > 1); + if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) { return; } - if (child_index < parent_size) { - memmove(child, child + 1, (parent_size - child_index) * sizeof(*child)); + for (size_t i = child_index; i + 1 < parent_size; ++i) { + node->children[i] = node->children[i + 1]; } + node->bitmap &= ~child_bit; + --parent_size; if (has_single_bit(parent_size)) { node = trie_node_realloc(trie, node, 2 * parent_size, parent_size); |