From 47f232e8fd307794e20bf0d6c84152f48be18cb3 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Mon, 4 Mar 2019 23:30:33 -0800 Subject: trie: Implement prefix/postfix search --- trie.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ trie.h | 24 ++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/trie.c b/trie.c index e1bc6b2..425cfe1 100644 --- a/trie.c +++ b/trie.c @@ -232,6 +232,88 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t } } +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { + size_t length = strlen(key); + struct trie_leaf *rep = trie_representative(trie, key, length + 1); + if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) { + return rep; + } else { + return NULL; + } +} + +/** + * Find a leaf that may end at the current node. + */ +static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) { + // Finding a terminating NUL byte may take two nibbles + for (int i = 0; i < 2; ++i) { + if (!(node->bitmap & 1)) { + break; + } + + uintptr_t ptr = node->children[0]; + if (trie_is_leaf(ptr)) { + return trie_decode_leaf(ptr); + } else { + node = trie_decode_node(ptr); + } + } + + return NULL; +} + +/** Check if a leaf is a prefix of a search key. */ +static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) { + if (leaf && leaf->length <= length) { + return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0; + } else { + return false; + } +} + +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { + uintptr_t ptr = trie->root; + if (!ptr) { + return NULL; + } + + struct trie_leaf *best = NULL; + size_t skip = 0; + size_t length = strlen(key) + 1; + + size_t offset = 0; + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + offset += node->offset; + if ((offset >> 1) >= length) { + return best; + } + + struct trie_leaf *leaf = trie_terminal_leaf(node); + if (trie_check_prefix(leaf, skip, key, length)) { + best = leaf; + skip = offset >> 1; + } + + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + if (node->bitmap & bit) { + unsigned int index = trie_popcount(node->bitmap & (bit - 1)); + ptr = node->children[index]; + } else { + return best; + } + } + + struct trie_leaf *leaf = trie_decode_leaf(ptr); + if (trie_check_prefix(leaf, skip, key, length)) { + best = leaf; + } + + return best; +} + /** Create a new leaf, holding a copy of the given key. */ static struct trie_leaf *new_trie_leaf(const void *key, size_t length) { struct trie_leaf *leaf = malloc(sizeof(*leaf) + length); diff --git a/trie.h b/trie.h index 8fcf04b..fcdd630 100644 --- a/trie.h +++ b/trie.h @@ -79,6 +79,30 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key); */ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length); +/** + * Find the shortest leaf that starts with a given key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * A leaf that starts with the given key, or NULL. + */ +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key); + +/** + * Find the leaf that is the longest prefix of the given key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * The longest prefix match for the given key, or NULL. + */ +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key); + /** * Insert a string key into the trie. * -- cgit v1.2.3