summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2019-03-04 23:30:33 -0800
committerTavian Barnes <tavianator@tavianator.com>2019-03-04 23:35:04 -0800
commit47f232e8fd307794e20bf0d6c84152f48be18cb3 (patch)
tree279320689d2fdfe749695a436a58714877e6aafc
parent21a9a9bb078c062b7cb13f8d176a3375d7b01eb2 (diff)
downloadbfs-47f232e8fd307794e20bf0d6c84152f48be18cb3.tar.xz
trie: Implement prefix/postfix search
-rw-r--r--trie.c82
-rw-r--r--trie.h24
2 files changed, 106 insertions, 0 deletions
diff --git a/trie.c b/trie.c
index e1bc6b2..425cfe1 100644
--- a/trie.c
+++ b/trie.c
@@ -232,6 +232,88 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t
}
}
+struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
+ size_t length = strlen(key);
+ struct trie_leaf *rep = trie_representative(trie, key, length + 1);
+ if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) {
+ return rep;
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Find a leaf that may end at the current node.
+ */
+static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) {
+ // Finding a terminating NUL byte may take two nibbles
+ for (int i = 0; i < 2; ++i) {
+ if (!(node->bitmap & 1)) {
+ break;
+ }
+
+ uintptr_t ptr = node->children[0];
+ if (trie_is_leaf(ptr)) {
+ return trie_decode_leaf(ptr);
+ } else {
+ node = trie_decode_node(ptr);
+ }
+ }
+
+ return NULL;
+}
+
+/** Check if a leaf is a prefix of a search key. */
+static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) {
+ if (leaf && leaf->length <= length) {
+ return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0;
+ } else {
+ return false;
+ }
+}
+
+struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) {
+ uintptr_t ptr = trie->root;
+ if (!ptr) {
+ return NULL;
+ }
+
+ struct trie_leaf *best = NULL;
+ size_t skip = 0;
+ size_t length = strlen(key) + 1;
+
+ size_t offset = 0;
+ while (!trie_is_leaf(ptr)) {
+ struct trie_node *node = trie_decode_node(ptr);
+ offset += node->offset;
+ if ((offset >> 1) >= length) {
+ return best;
+ }
+
+ struct trie_leaf *leaf = trie_terminal_leaf(node);
+ if (trie_check_prefix(leaf, skip, key, length)) {
+ best = leaf;
+ skip = offset >> 1;
+ }
+
+ unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned int bit = 1U << nibble;
+ if (node->bitmap & bit) {
+ unsigned int index = trie_popcount(node->bitmap & (bit - 1));
+ ptr = node->children[index];
+ } else {
+ return best;
+ }
+ }
+
+ struct trie_leaf *leaf = trie_decode_leaf(ptr);
+ if (trie_check_prefix(leaf, skip, key, length)) {
+ best = leaf;
+ }
+
+ return best;
+}
+
/** Create a new leaf, holding a copy of the given key. */
static struct trie_leaf *new_trie_leaf(const void *key, size_t length) {
struct trie_leaf *leaf = malloc(sizeof(*leaf) + length);
diff --git a/trie.h b/trie.h
index 8fcf04b..fcdd630 100644
--- a/trie.h
+++ b/trie.h
@@ -80,6 +80,30 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length);
/**
+ * Find the shortest leaf that starts with a given key.
+ *
+ * @param trie
+ * The trie to search.
+ * @param key
+ * The key to look up.
+ * @return
+ * A leaf that starts with the given key, or NULL.
+ */
+struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key);
+
+/**
+ * Find the leaf that is the longest prefix of the given key.
+ *
+ * @param trie
+ * The trie to search.
+ * @param key
+ * The key to look up.
+ * @return
+ * The longest prefix match for the given key, or NULL.
+ */
+struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key);
+
+/**
* Insert a string key into the trie.
*
* @param trie