From c27c7d3648c645deff1f184a7c2b6dc14206ab28 Mon Sep 17 00:00:00 2001 From: Joeri Exelmans Date: Sat, 10 May 2025 02:03:41 +0200 Subject: [PATCH] add purely functional Trie --- lib/util/trie.js | 161 +++++++++++++++++++++++++++++++++++++++++++++++ tests/trie.js | 32 ++++++++++ 2 files changed, 193 insertions(+) create mode 100644 lib/util/trie.js create mode 100644 tests/trie.js diff --git a/lib/util/trie.js b/lib/util/trie.js new file mode 100644 index 0000000..7222583 --- /dev/null +++ b/lib/util/trie.js @@ -0,0 +1,161 @@ +// A purely functional Trie +// https://en.wikipedia.org/wiki/Trie + + +export const emptyTrie = { + children: [], +}; + +// find maximal common prefix, and whether string A is smaller than B +const commonPrefix = (strA, strB) => { + let i=0 + for (; i charB) { + return [strA.slice(0, i), false]; + } + } + return [strA.slice(0, i), false]; +} + +// a funny kind of binary search, that assumes 'ls' is a sorted list of strings, and none of the strings have a common prefix +const binarySearch = (ls, key) => { + return __binarySearch(ls, key, 0, ls.length); +} +const __binarySearch = (ls, key, min, max) => { + if (min === max) { + return [max, ""]; // otherwise we go out of bounds + } + const middle = Math.floor((min+max)/2); + const [prefix, smaller] = commonPrefix(key, ls[middle][0]); + if (prefix.length > 0) { + return [middle, prefix]; + } + if (smaller) { + // key was smaller than middle + return __binarySearch(ls, key, min, middle); + } + return __binarySearch(ls, key, middle+1, max); +} + +// insert (key,value) into trie. +export const insert = trie => key => value => { + if (key.length === 0) { + // set value of current node + return { + value, + children: trie.children, + }; + } + + const [insertPos, prefix] = binarySearch(trie.children, key); + if (insertPos === trie.children.length) { + // insert node at end + return { + value: trie.value, + children: [ + ...trie.children, + [key, {value, children:[]}], + ], + }; + } + if (prefix.length === 0) { + // nothing in common... + // insert new node into children + return { + value: trie.value, + children: trie.children.toSpliced( + insertPos, // insert position + 0, // delete nothing + [key, {value, children:[]}], + ), + }; + } + + const [haveKey, haveChildNode] = trie.children[insertPos]; + if (prefix.length === haveKey.length) { + // recurse + return { + value: trie.value, + children: trie.children.with( + insertPos, // position to update + [haveKey, insert(haveChildNode)(key.slice(prefix.length))(value)], + ) + } + } + + // otherwise, split entry: + const havePostFix = haveKey.slice(prefix.length); + const postFix = key.slice(prefix.length); + return { + value: trie.value, + children: trie.children.with( + insertPos, // position to update + [prefix, { + children: [ + [havePostFix, haveChildNode], + [postFix, {value, children: []} + ], + ]}], + ), + }; +}; + +// given a prefix, return a string X such that prefix+X is a possibly larger prefix for the same entries as the original prefix. +export const growKey = trie => key => { + const [pos, prefix] = binarySearch(trie.children, key); + if (prefix.length === 0) { + return ""; + } + if (pos === trie.children.length) { + return ""; + } + const [haveKey, haveChildNode] = trie.children[pos]; + if (key.length < haveKey.length) { + if (haveKey.startsWith(key)) { + return haveKey.slice(key.length); + } + } + if (key.length > haveKey.length) { + if (key.startsWith(haveKey)) { + return growKey(haveChildNode)(key.slice(haveKey.length)); + } + } + return ""; +} + +// get array of (key, value) entries whose keys are prefixed by 'key'. +export const suggest = trie => key => maxSuggestions => { + return __suggest(trie, "", key, maxSuggestions); +} + +const __suggest = (trie, path, remaining, maxSuggestions) => { + if (maxSuggestions === 0) { + return []; + } + if (remaining === "") { + const results = []; + if (trie.value !== undefined) { + results.push([path, trie.value]); + maxSuggestions--; + } + for (const [haveKey, haveChildNode] of trie.children) { + const moreSuggestions = __suggest(haveChildNode, path+haveKey, remaining, maxSuggestions); + results.push(...moreSuggestions); + maxSuggestions -= moreSuggestions.length; + if (maxSuggestions === 0) { + break; + } + } + return results; + } + const [pos, prefix] = binarySearch(trie.children, remaining); + if (prefix.length === 0) { + return []; + } + const [haveKey, haveChildNode] = trie.children[pos]; + return __suggest(haveChildNode, path+haveKey, remaining.slice(haveKey.length), maxSuggestions); +} diff --git a/tests/trie.js b/tests/trie.js new file mode 100644 index 0000000..01ca3d5 --- /dev/null +++ b/tests/trie.js @@ -0,0 +1,32 @@ +import { pretty } from "../lib/util/pretty.js"; +import { insert, emptyTrie, growKey, suggest } from "../lib/util/trie.js"; + +// insertion +const with1Item = insert(emptyTrie)('abba')('dancing queen'); +console.log(pretty(with1Item)); +const with2Items = insert(with1Item)('aboriginal')('australia'); +console.log(pretty(with2Items)); +const with3Items = insert(with2Items)('food')('pizza'); +console.log(pretty(with3Items)); +const with4Items = insert(with3Items)('absent')('not here'); +console.log(pretty(with4Items)); +const with5Items = insert(with4Items)('000')('000'); +console.log(pretty(with5Items)); +const with6Items = insert(with5Items)('aboriginally')('??'); +console.log(pretty(with6Items)); +const with7Items = insert(with6Items)('ab')('yup'); +console.log(pretty(with7Items)); +const with8Items = insert(with7Items)('')('hi!'); +console.log(pretty(with8Items)); + +// grow key (for auto-complete) +console.log(growKey(with6Items)("a")); // b +console.log(growKey(with6Items)("ab")); // (empty string) +console.log(growKey(with6Items)("abb")); // a +console.log(growKey(with6Items)("f")); // ood +console.log(growKey(with6Items)("abo")); // riginal + +// suggest (also for auto-complete) +console.log(suggest(with8Items)("a")(3)); // 'ab', 'abba', 'aboriginal' +console.log(suggest(with8Items)("a")(4)); // 'ab', 'abba', 'aboriginal', 'aboriginally' +console.log(suggest(with8Items)("a")(5)); // 'ab', 'abba', 'aboriginal', 'aboriginally', 'absent'