diff options
Diffstat (limited to 'src/trie.c')
-rw-r--r-- | src/trie.c | 589 |
1 files changed, 339 insertions, 250 deletions
@@ -1,18 +1,5 @@ -/**************************************************************************** - * bfs * - * Copyright (C) 2019 Tavian Barnes <tavianator@tavianator.com> * - * * - * Permission to use, copy, modify, and/or distribute this software for any * - * purpose with or without fee is hereby granted. * - * * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * - ****************************************************************************/ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD /** * This is an implementation of a "qp trie," as documented at @@ -22,29 +9,29 @@ * look like * * A A A A - * *--->*--->*--->*--->$ - * | | | D D - * | | +--->*--->$ - * | | B C D - * | +--->*--->*--->$ - * | D D A A - * +--->*--->*--->*--->$ - * | D D - * +--->*--->$ + * ●───→●───→●───→●───→○ + * │ │ │ D D + * │ │ └───→●───→○ + * │ │ B C D + * │ └───→●───→●───→○ + * │ D D A A + * └───→●───→●───→●───→○ + * │ D D + * └───→●───→○ * * A compressed (PATRICIA) trie collapses internal nodes that have only a single * child, like this: * * A A AA - * *--->*--->*---->$ - * | | | DD - * | | +---->$ - * | | BCD - * | +----->$ - * | DD AA - * +---->*---->$ - * | DD - * +---->$ + * ●───→●───→●────→○ + * │ │ │ DD + * │ │ └────→○ + * │ │ BCD + * │ └─────→○ + * │ DD AA + * └────→●────→○ + * │ DD + * └────→○ * * The nodes can be compressed further by dropping the actual compressed * sequences from the nodes, storing it only in the leaves. This is the @@ -53,21 +40,39 @@ * branch on, need to be stored in each node. * * A A A - * 0--->1--->2--->AAAA - * | | | D - * | | +--->AADD - * | | B - * | +--->ABCD - * | D A - * +--->2--->DDAA - * | D - * +--->DDDD + * 0───→1───→2───→AAAA + * │ │ │ D + * │ │ └───→AADD + * │ │ B + * │ └───→ABCD + * │ D A + * └───→2───→DDAA + * │ D + * └───→DDDD * * Nodes are represented very compactly. Rather than a dense array of children, * a sparse array of only the non-NULL children directly follows the node in - * memory. A bitmap is used to track which children exist; the index of a child - * i is found by counting the number of bits below bit i that are set. A tag - * bit is used to tell pointers to internal nodes apart from pointers to leaves. + * memory. A bitmap is used to track which children exist. + * + * ┌────────────┐ + * │ [4] [3] [2][1][0] ←─ children + * │ ↓ ↓ ↓ ↓ ↓ + * │ 14 10 6 3 0 ←─ sparse index + * │ ↓ ↓ ↓ ↓ ↓ + * │ 0100010001001001 ←─ bitmap + * │ + * │ To convert a sparse index to a dense index, mask off the bits above it, and + * │ count the remaining bits. + * │ + * │ 10 ←─ sparse index + * │ ↓ + * │ 0000001111111111 ←─ mask + * │ & 0100010001001001 ←─ bitmap + * │ ──────────────── + * │ = 0000000001001001 + * │ └──┼──┘ + * │ [3] ←─ dense index + * └───────────────────┘ * * This implementation tests a whole nibble (half byte/hex digit) at every * branch, so the bitmap takes up 16 bits. The remainder of a machine word is @@ -77,24 +82,30 @@ */ #include "trie.h" -#include "util.h" -#include <assert.h> -#include <limits.h> -#include <stdbool.h> + +#include "alloc.h" +#include "bfs.h" +#include "bit.h" +#include "diag.h" +#include "list.h" + #include <stdint.h> -#include <stdlib.h> #include <string.h> -#if CHAR_BIT != 8 -# error "This trie implementation assumes 8-bit bytes." +static_assert(CHAR_WIDTH == 8, "This trie implementation assumes 8-bit bytes."); + +#if __i386__ || __x86_64__ +# define _trie_clones _target_clones("popcnt", "default") +#else +# define _trie_clones #endif /** Number of bits for the sparse array bitmap, aka the range of a nibble. */ -#define BITMAP_BITS 16 +#define BITMAP_WIDTH 16 /** The number of remaining bits in a word, to hold the offset. */ -#define OFFSET_BITS (sizeof(size_t)*CHAR_BIT - BITMAP_BITS) +#define OFFSET_WIDTH (SIZE_WIDTH - BITMAP_WIDTH) /** The highest representable offset (only 64k on a 32-bit architecture). */ -#define OFFSET_MAX (((size_t)1 << OFFSET_BITS) - 1) +#define OFFSET_MAX (((size_t)1 << OFFSET_WIDTH) - 1) /** * An internal node of the trie. @@ -105,88 +116,87 @@ struct trie_node { * Bit i will be set if a child exists at logical index i, and its index * into the array will be popcount(bitmap & ((1 << i) - 1)). */ - size_t bitmap : BITMAP_BITS; + size_t bitmap : BITMAP_WIDTH; /** * The offset into the key in nibbles. This is relative to the parent * node, to support offsets larger than OFFSET_MAX. */ - size_t offset : OFFSET_BITS; + size_t offset : OFFSET_WIDTH; /** * Flexible array of children. Each pointer uses the lowest bit as a * tag to distinguish internal nodes from leaves. This is safe as long * as all dynamic allocations are aligned to more than a single byte. */ - uintptr_t children[]; + uintptr_t children[]; // _counted_by(count_ones(bitmap)) }; -/** Check if an encoded pointer is to a leaf. */ -static bool trie_is_leaf(uintptr_t ptr) { +/** Check if an encoded pointer is to an internal node. */ +static bool trie_is_node(uintptr_t ptr) { return ptr & 1; } -/** Decode a pointer to a leaf. */ -static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { - assert(trie_is_leaf(ptr)); - return (struct trie_leaf *)(ptr ^ 1); +/** Decode a pointer to an internal node. */ +static struct trie_node *trie_decode_node(uintptr_t ptr) { + bfs_assert(trie_is_node(ptr)); + return (struct trie_node *)(ptr - 1); } -/** Encode a pointer to a leaf. */ -static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { - uintptr_t ptr = (uintptr_t)leaf ^ 1; - assert(trie_is_leaf(ptr)); +/** Encode a pointer to an internal node. */ +static uintptr_t trie_encode_node(const struct trie_node *node) { + uintptr_t ptr = (uintptr_t)node + 1; + bfs_assert(trie_is_node(ptr)); return ptr; } -/** Decode a pointer to an internal node. */ -static struct trie_node *trie_decode_node(uintptr_t ptr) { - assert(!trie_is_leaf(ptr)); - return (struct trie_node *)ptr; +/** Decode a pointer to a leaf. */ +static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { + bfs_assert(!trie_is_node(ptr)); + return (struct trie_leaf *)ptr; } -/** Encode a pointer to an internal node. */ -static uintptr_t trie_encode_node(const struct trie_node *node) { - uintptr_t ptr = (uintptr_t)node; - assert(!trie_is_leaf(ptr)); +/** Encode a pointer to a leaf. */ +static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { + uintptr_t ptr = (uintptr_t)leaf; + bfs_assert(!trie_is_node(ptr)); return ptr; } void trie_init(struct trie *trie) { trie->root = 0; -} - -/** Compute the popcount (Hamming weight) of a bitmap. */ -static unsigned int trie_popcount(unsigned int n) { -#if __POPCNT__ - // Use the x86 instruction if we have it. Otherwise, GCC generates a - // library call, so use the below implementation instead. - return __builtin_popcount(n); -#else - // See https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation - n -= (n >> 1) & 0x5555; - n = (n & 0x3333) + ((n >> 2) & 0x3333); - n = (n + (n >> 4)) & 0x0F0F; - n = (n + (n >> 8)) & 0xFF; - return n; -#endif + LIST_INIT(trie); + VARENA_INIT(&trie->nodes, struct trie_node, children); + VARENA_INIT(&trie->leaves, struct trie_leaf, key); } /** Extract the nibble at a certain offset from a byte sequence. */ -static unsigned char trie_key_nibble(const void *key, size_t offset) { +static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) { const unsigned char *bytes = key; - size_t byte = offset >> 1; + size_t byte = offset / 2; + bfs_assert(byte < length); // A branchless version of // if (offset & 1) { - // return bytes[byte] >> 4; - // } else { // return bytes[byte] & 0xF; + // } else { + // return bytes[byte] >> 4; // } - unsigned int shift = (offset & 1) << 2; + unsigned int shift = 4 * ((offset + 1) % 2); return (bytes[byte] >> shift) & 0xF; } +/** Extract the nibble at a certain offset from a leaf. */ +static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offset) { + return trie_key_nibble(leaf->key, leaf->length, offset); +} + +/** Get the number of children of an internal node. */ +_trie_clones +static unsigned int trie_node_size(const struct trie_node *node) { + return count_ones((unsigned int)node->bitmap); +} + /** * Finds a leaf in the trie that matches the key at every branch. If the key * exists in the trie, the representative will match the searched key. But @@ -194,24 +204,24 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) { * that case, the first mismatch between the key and the representative will be * the depth at which to make a new branch to insert the key. */ +_trie_clones static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) { uintptr_t ptr = trie->root; - if (!ptr) { - return NULL; - } - size_t offset = 0; - while (!trie_is_leaf(ptr)) { + size_t offset = 0, limit = 2 * length; + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; unsigned int index = 0; - if ((offset >> 1) < length) { - unsigned char nibble = trie_key_nibble(key, offset); + if (offset < limit) { + unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; - if (node->bitmap & bit) { - index = trie_popcount(node->bitmap & (bit - 1)); - } + unsigned int map = node->bitmap; + unsigned int bits = map & (bit - 1); + unsigned int mask = -!!(map & bit); + // index = (map & bit) ? count_ones(bits) : 0; + index = count_ones(bits) & mask; } ptr = node->children[index]; } @@ -219,15 +229,12 @@ static struct trie_leaf *trie_representative(const struct trie *trie, const void return trie_decode_leaf(ptr); } -struct trie_leaf *trie_first_leaf(const struct trie *trie) { - return trie_representative(trie, NULL, 0); -} - struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) { return trie_find_mem(trie, key, strlen(key) + 1); } -struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) { +_trie_clones +static struct trie_leaf *trie_find_mem_impl(const struct trie *trie, const void *key, size_t length) { struct trie_leaf *rep = trie_representative(trie, key, length); if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) { return rep; @@ -236,7 +243,22 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t } } -struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { +struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) { + return trie_find_mem_impl(trie, key, length); +} + +void *trie_get_str(const struct trie *trie, const char *key) { + const struct trie_leaf *leaf = trie_find_str(trie, key); + return leaf ? leaf->value : NULL; +} + +void *trie_get_mem(const struct trie *trie, const void *key, size_t length) { + const struct trie_leaf *leaf = trie_find_mem(trie, key, length); + return leaf ? leaf->value : NULL; +} + +_trie_clones +static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) { size_t length = strlen(key); struct trie_leaf *rep = trie_representative(trie, key, length + 1); if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) { @@ -246,6 +268,10 @@ struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { } } +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { + return trie_find_postfix_impl(trie, key); +} + /** * Find a leaf that may end at the current node. */ @@ -257,10 +283,10 @@ static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) { } uintptr_t ptr = node->children[0]; - if (trie_is_leaf(ptr)) { - return trie_decode_leaf(ptr); - } else { + if (trie_is_node(ptr)) { node = trie_decode_node(ptr); + } else { + return trie_decode_leaf(ptr); } } @@ -276,7 +302,8 @@ static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *k } } -struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { +_trie_clones +static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) { uintptr_t ptr = trie->root; if (!ptr) { return NULL; @@ -286,24 +313,24 @@ struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { size_t skip = 0; size_t length = strlen(key) + 1; - size_t offset = 0; - while (!trie_is_leaf(ptr)) { + size_t offset = 0, limit = 2 * length; + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; - if ((offset >> 1) >= length) { + if (offset >= limit) { return best; } struct trie_leaf *leaf = trie_terminal_leaf(node); if (trie_check_prefix(leaf, skip, key, length)) { best = leaf; - skip = offset >> 1; + skip = offset / 2; } - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; if (node->bitmap & bit) { - unsigned int index = trie_popcount(node->bitmap & (bit - 1)); + unsigned int index = count_ones(node->bitmap & (bit - 1)); ptr = node->children[index]; } else { return best; @@ -318,55 +345,97 @@ struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { return best; } +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { + return trie_find_prefix_impl(trie, key); +} + /** Create a new leaf, holding a copy of the given key. */ -static struct trie_leaf *new_trie_leaf(const void *key, size_t length) { - struct trie_leaf *leaf = malloc(BFS_FLEX_SIZEOF(struct trie_leaf, key, length)); - if (leaf) { - leaf->value = NULL; - leaf->length = length; - memcpy(leaf->key, key, length); +static struct trie_leaf *trie_leaf_alloc(struct trie *trie, const void *key, size_t length) { + struct trie_leaf *leaf = varena_alloc(&trie->leaves, length); + if (!leaf) { + return NULL; } + + LIST_ITEM_INIT(leaf); + LIST_APPEND(trie, leaf); + + leaf->value = NULL; + leaf->length = length; + memcpy(leaf->key, key, length); + return leaf; } -/** Compute the size of a trie node with a certain number of children. */ -static size_t trie_node_size(unsigned int size) { - // Empty nodes aren't supported - assert(size > 0); - // Node size must be a power of two - assert((size & (size - 1)) == 0); +/** Free a leaf. */ +static void trie_leaf_free(struct trie *trie, struct trie_leaf *leaf) { + LIST_REMOVE(trie, leaf); + varena_free(&trie->leaves, leaf, leaf->length); +} + +/** Create a new node. */ +static struct trie_node *trie_node_alloc(struct trie *trie, size_t size) { + bfs_assert(has_single_bit(size)); + return varena_alloc(&trie->nodes, size); +} + +/** Reallocate a trie node. */ +static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *node, size_t old_size, size_t new_size) { + bfs_assert(has_single_bit(old_size)); + bfs_assert(has_single_bit(new_size)); + return varena_realloc(&trie->nodes, node, old_size, new_size); +} - return BFS_FLEX_SIZEOF(struct trie_node, children, size); +/** Free a node. */ +static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) { + bfs_assert(size == trie_node_size(node)); + varena_free(&trie->nodes, node, size); } /** Find the offset of the first nibble that differs between two keys. */ -static size_t trie_key_mismatch(const void *key1, const void *key2, size_t length) { - const unsigned char *bytes1 = key1; - const unsigned char *bytes2 = key2; - size_t i = 0; - size_t offset = 0; - const size_t chunk = sizeof(size_t); +static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) { + if (!rep) { + return 0; + } - for (; i + chunk <= length; i += chunk) { - if (memcmp(bytes1 + i, bytes2 + i, chunk) != 0) { - break; - } + if (rep->length < length) { + length = rep->length; } - for (; i < length; ++i) { - unsigned char b1 = bytes1[i], b2 = bytes2[i]; - if (b1 != b2) { - offset = (b1 & 0xF) == (b2 & 0xF); - break; - } + const char *rep_bytes = rep->key; + const char *key_bytes = key; + + size_t ret = 0, i = 0; + +#define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n) +#define CHUNK_(type, load8) \ + (length - i >= sizeof(type)) { \ + type rep_chunk = load8(rep_bytes + i); \ + type key_chunk = load8(key_bytes + i); \ + type diff = rep_chunk ^ key_chunk; \ + ret += leading_zeros(diff) / 4; \ + if (diff) { \ + return ret; \ + } \ + i += sizeof(type); \ } - offset |= i << 1; - return offset; +#if SIZE_WIDTH >= 64 + while CHUNK(64); + if CHUNK(32); +#else + while CHUNK(32); +#endif + if CHUNK(16); + if CHUNK(8); + +#undef CHUNK_ +#undef CHUNK + + return ret; } /** - * Insert a key into a node. The node must not have a child in that position + * Insert a leaf into a node. The node must not have a child in that position * already. Effectively takes a subtrie like this: * * ptr @@ -383,41 +452,36 @@ static size_t trie_key_mismatch(const void *key1, const void *key2, size_t lengt * v X * *--->... * | Y - * +--->key + * +--->leaf * | Z * +--->... */ -static struct trie_leaf *trie_node_insert(uintptr_t *ptr, const void *key, size_t length, size_t offset) { +_trie_clones +static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) { struct trie_node *node = trie_decode_node(*ptr); - unsigned int size = trie_popcount(node->bitmap); + unsigned int size = trie_node_size(node); // Double the capacity every power of two - if ((size & (size - 1)) == 0) { - node = realloc(node, trie_node_size(2*size)); + if (has_single_bit(size)) { + node = trie_node_realloc(trie, node, size, 2 * size); if (!node) { + trie_leaf_free(trie, leaf); return NULL; } *ptr = trie_encode_node(node); } - struct trie_leaf *leaf = new_trie_leaf(key, length); - if (!leaf) { - return NULL; - } - - unsigned char nibble = trie_key_nibble(key, offset); unsigned int bit = 1U << nibble; // The child must not already be present - assert(!(node->bitmap & bit)); + bfs_assert(!(node->bitmap & bit)); node->bitmap |= bit; - unsigned int index = trie_popcount(node->bitmap & (bit - 1)); - uintptr_t *child = &node->children[index]; - if (index < size) { - memmove(child + 1, child, (size - index)*sizeof(*child)); + unsigned int target = count_ones(node->bitmap & (bit - 1)); + for (size_t i = size; i > target; --i) { + node->children[i] = node->children[i - 1]; } - *child = trie_encode_leaf(leaf); + node->children[target] = trie_encode_leaf(leaf); return leaf; } @@ -446,12 +510,12 @@ static struct trie_leaf *trie_node_insert(uintptr_t *ptr, const void *key, size_ * | Y * +--->key */ -static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t *offset) { +static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, size_t *offset) { // We only ever need to jump to leaf nodes, since internal nodes are // guaranteed to be within OFFSET_MAX anyway - assert(trie_is_leaf(*ptr)); + struct trie_leaf *leaf = trie_decode_leaf(*ptr); - struct trie_node *node = malloc(trie_node_size(1)); + struct trie_node *node = trie_node_alloc(trie, 1); if (!node) { return NULL; } @@ -459,7 +523,7 @@ static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t *offset) { *offset += OFFSET_MAX; node->offset = OFFSET_MAX; - unsigned char nibble = trie_key_nibble(key, *offset); + unsigned char nibble = trie_leaf_nibble(leaf, *offset); node->bitmap = 1 << nibble; node->children[0] = *ptr; @@ -482,28 +546,23 @@ static uintptr_t *trie_jump(uintptr_t *ptr, const char *key, size_t *offset) { * v X * *--->*...>--->rep * | Y - * +--->key + * +--->leaf */ -static struct trie_leaf *trie_split(uintptr_t *ptr, const void *key, size_t length, struct trie_leaf *rep, size_t offset, size_t mismatch) { - unsigned char key_nibble = trie_key_nibble(key, mismatch); - unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch); - assert(key_nibble != rep_nibble); +static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) { + unsigned char key_nibble = trie_leaf_nibble(leaf, mismatch); + unsigned char rep_nibble = trie_leaf_nibble(rep, mismatch); + bfs_assert(key_nibble != rep_nibble); - struct trie_node *node = malloc(trie_node_size(2)); + struct trie_node *node = trie_node_alloc(trie, 2); if (!node) { - return NULL; - } - - struct trie_leaf *leaf = new_trie_leaf(key, length); - if (!leaf) { - free(node); + trie_leaf_free(trie, leaf); return NULL; } node->bitmap = (1 << key_nibble) | (1 << rep_nibble); size_t delta = mismatch - offset; - if (!trie_is_leaf(*ptr)) { + if (trie_is_node(*ptr)) { struct trie_node *child = trie_decode_node(*ptr); child->offset -= delta; } @@ -520,66 +579,99 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) { return trie_insert_mem(trie, key, strlen(key) + 1); } -struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) { +_trie_clones +static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) { struct trie_leaf *rep = trie_representative(trie, key, length); - if (!rep) { - struct trie_leaf *leaf = new_trie_leaf(key, length); - if (leaf) { - trie->root = trie_encode_leaf(leaf); - } - return leaf; + size_t mismatch = trie_mismatch(rep, key, length); + size_t misbyte = mismatch / 2; + if (misbyte >= length) { + bfs_assert(misbyte == length); + return rep; + } else if (rep && misbyte >= rep->length) { + bfs_bug("trie keys must be prefix-free"); + errno = EINVAL; + return NULL; } - size_t limit = length < rep->length ? length : rep->length; - size_t mismatch = trie_key_mismatch(key, rep->key, limit); - if ((mismatch >> 1) >= length) { - return rep; + struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length); + if (!leaf) { + return NULL; + } + + if (!rep) { + trie->root = trie_encode_leaf(leaf); + return leaf; } size_t offset = 0; uintptr_t *ptr = &trie->root; - while (!trie_is_leaf(*ptr)) { + while (trie_is_node(*ptr)) { struct trie_node *node = trie_decode_node(*ptr); if (offset + node->offset > mismatch) { break; } offset += node->offset; - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_leaf_nibble(leaf, offset); unsigned int bit = 1U << nibble; if (node->bitmap & bit) { - assert(offset < mismatch); - unsigned int index = trie_popcount(node->bitmap & (bit - 1)); + bfs_assert(offset < mismatch); + unsigned int index = count_ones(node->bitmap & (bit - 1)); ptr = &node->children[index]; } else { - assert(offset == mismatch); - return trie_node_insert(ptr, key, length, offset); + bfs_assert(offset == mismatch); + return trie_node_insert(trie, ptr, leaf, nibble); } } while (mismatch - offset > OFFSET_MAX) { - ptr = trie_jump(ptr, key, &offset); + ptr = trie_jump(trie, ptr, &offset); if (!ptr) { + trie_leaf_free(trie, leaf); return NULL; } } - return trie_split(ptr, key, length, rep, offset, mismatch); + return trie_split(trie, ptr, leaf, rep, offset, mismatch); +} + +struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) { + return trie_insert_mem_impl(trie, key, length); +} + +int trie_set_str(struct trie *trie, const char *key, const void *value) { + struct trie_leaf *leaf = trie_insert_str(trie, key); + if (leaf) { + leaf->value = (void *)value; + return 0; + } else { + return -1; + } +} + +int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value) { + struct trie_leaf *leaf = trie_insert_mem(trie, key, length); + if (leaf) { + leaf->value = (void *)value; + return 0; + } else { + return -1; + } } /** Free a chain of singleton nodes. */ -static void trie_free_singletons(uintptr_t ptr) { - while (!trie_is_leaf(ptr)) { +static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); // Make sure the bitmap is a power of two, i.e. it has just one child - assert((node->bitmap & (node->bitmap - 1)) == 0); + bfs_assert(has_single_bit((size_t)node->bitmap)); ptr = node->children[0]; - free(node); + trie_node_free(trie, node, 1); } - free(trie_decode_leaf(ptr)); + trie_leaf_free(trie, trie_decode_leaf(ptr)); } /** @@ -599,9 +691,9 @@ static void trie_free_singletons(uintptr_t ptr) { * v * other */ -static int trie_collapse_node(uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) { +static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) { uintptr_t other = parent_node->children[child_index ^ 1]; - if (!trie_is_leaf(other)) { + if (trie_is_node(other)) { struct trie_node *other_node = trie_decode_node(other); if (other_node->offset + parent_node->offset <= OFFSET_MAX) { other_node->offset += parent_node->offset; @@ -611,28 +703,28 @@ static int trie_collapse_node(uintptr_t *parent, struct trie_node *parent_node, } *parent = other; - free(parent_node); + trie_node_free(trie, parent_node, 2); return 0; } -void trie_remove(struct trie *trie, struct trie_leaf *leaf) { +_trie_clones +static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { uintptr_t *child = &trie->root; uintptr_t *parent = NULL; unsigned int child_bit = 0, child_index = 0; size_t offset = 0; - while (!trie_is_leaf(*child)) { + while (trie_is_node(*child)) { struct trie_node *node = trie_decode_node(*child); offset += node->offset; - assert((offset >> 1) < leaf->length); - unsigned char nibble = trie_key_nibble(leaf->key, offset); + unsigned char nibble = trie_leaf_nibble(leaf, offset); unsigned int bit = 1U << nibble; unsigned int bitmap = node->bitmap; - assert(bitmap & bit); - unsigned int index = trie_popcount(bitmap & (bit - 1)); + bfs_assert(bitmap & bit); + unsigned int index = count_ones(bitmap & (bit - 1)); // Advance the parent pointer, unless this node had only one child - if (bitmap & (bitmap - 1)) { + if (!has_single_bit(bitmap)) { parent = child; child_bit = bit; child_index = index; @@ -641,53 +733,50 @@ void trie_remove(struct trie *trie, struct trie_leaf *leaf) { child = &node->children[index]; } - assert(trie_decode_leaf(*child) == leaf); + bfs_assert(trie_decode_leaf(*child) == leaf); if (!parent) { - trie_free_singletons(trie->root); + trie_free_singletons(trie, trie->root); trie->root = 0; return; } struct trie_node *node = trie_decode_node(*parent); - child = node->children + child_index; - trie_free_singletons(*child); + trie_free_singletons(trie, node->children[child_index]); - node->bitmap ^= child_bit; - unsigned int parent_size = trie_popcount(node->bitmap); - assert(parent_size > 0); - if (parent_size == 1 && trie_collapse_node(parent, node, child_index) == 0) { + unsigned int parent_size = trie_node_size(node); + bfs_assert(parent_size > 1); + if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) { return; } - if (child_index < parent_size) { - memmove(child, child + 1, (parent_size - child_index)*sizeof(*child)); + for (size_t i = child_index; i + 1 < parent_size; ++i) { + node->children[i] = node->children[i + 1]; } + node->bitmap &= ~child_bit; + --parent_size; - if ((parent_size & (parent_size - 1)) == 0) { - node = realloc(node, trie_node_size(parent_size)); + if (has_single_bit(parent_size)) { + node = trie_node_realloc(trie, node, 2 * parent_size, parent_size); if (node) { *parent = trie_encode_node(node); } } } -/** Free an encoded pointer to a node. */ -static void free_trie_ptr(uintptr_t ptr) { - if (trie_is_leaf(ptr)) { - free(trie_decode_leaf(ptr)); - } else { - struct trie_node *node = trie_decode_node(ptr); - size_t size = trie_popcount(node->bitmap); - for (size_t i = 0; i < size; ++i) { - free_trie_ptr(node->children[i]); - } - free(node); - } +void trie_remove(struct trie *trie, struct trie_leaf *leaf) { + trie_remove_impl(trie, leaf); +} + +void trie_clear(struct trie *trie) { + trie->root = 0; + LIST_INIT(trie); + + varena_clear(&trie->leaves); + varena_clear(&trie->nodes); } void trie_destroy(struct trie *trie) { - if (trie->root) { - free_trie_ptr(trie->root); - } + varena_destroy(&trie->leaves); + varena_destroy(&trie->nodes); } |