Edit on GitHub

sqlglot.trie

 1import typing as t
 2from enum import Enum, auto
 3from collections.abc import Sequence, Iterable
 4
 5key = Sequence[t.Hashable]
 6
 7
 8class TrieResult(Enum):
 9    FAILED = auto()
10    PREFIX = auto()
11    EXISTS = auto()
12
13
14def new_trie(keywords: Iterable[key], trie: t.Optional[dict] = None) -> dict:
15    """
16    Creates a new trie out of a collection of keywords.
17
18    The trie is represented as a sequence of nested dictionaries keyed by either single
19    character strings, or by 0, which is used to designate that a keyword is in the trie.
20
21    Example:
22        >>> new_trie(["bla", "foo", "blab"])
23        {'b': {'l': {'a': {0: True, 'b': {0: True}}}}, 'f': {'o': {'o': {0: True}}}}
24
25    Args:
26        keywords: the keywords to create the trie from.
27        trie: a trie to mutate instead of creating a new one
28
29    Returns:
30        The trie corresponding to `keywords`.
31    """
32    trie = {} if trie is None else trie
33
34    for key in keywords:
35        current = trie
36        for char in key:
37            current = current.setdefault(char, {})
38
39        current[0] = True
40
41    return trie
42
43
44def in_trie(trie: dict, key: key) -> tuple[TrieResult, dict]:
45    """
46    Checks whether a key is in a trie.
47
48    Examples:
49        >>> in_trie(new_trie(["cat"]), "bob")
50        (<TrieResult.FAILED: 1>, {'c': {'a': {'t': {0: True}}}})
51
52        >>> in_trie(new_trie(["cat"]), "ca")
53        (<TrieResult.PREFIX: 2>, {'t': {0: True}})
54
55        >>> in_trie(new_trie(["cat"]), "cat")
56        (<TrieResult.EXISTS: 3>, {0: True})
57
58    Args:
59        trie: The trie to be searched.
60        key: The target key.
61
62    Returns:
63        A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point
64        where the search stops, and `value` is a TrieResult value that can be one of:
65
66        - TrieResult.FAILED: the search was unsuccessful
67        - TrieResult.PREFIX: `value` is a prefix of a keyword in `trie`
68        - TrieResult.EXISTS: `key` exists in `trie`
69    """
70    if not key:
71        return (TrieResult.FAILED, trie)
72
73    current = trie
74    for char in key:
75        if char not in current:
76            return (TrieResult.FAILED, current)
77        current = current[char]
78
79    if 0 in current:
80        return (TrieResult.EXISTS, current)
81
82    return (TrieResult.PREFIX, current)
key = collections.abc.Sequence[typing.Hashable]
class TrieResult(enum.Enum):
 9class TrieResult(Enum):
10    FAILED = auto()
11    PREFIX = auto()
12    EXISTS = auto()

An enumeration.

FAILED = <TrieResult.FAILED: 1>
PREFIX = <TrieResult.PREFIX: 2>
EXISTS = <TrieResult.EXISTS: 3>
def new_trie( keywords: Iterable[Sequence[typing.Hashable]], trie: Optional[dict] = None) -> dict:
15def new_trie(keywords: Iterable[key], trie: t.Optional[dict] = None) -> dict:
16    """
17    Creates a new trie out of a collection of keywords.
18
19    The trie is represented as a sequence of nested dictionaries keyed by either single
20    character strings, or by 0, which is used to designate that a keyword is in the trie.
21
22    Example:
23        >>> new_trie(["bla", "foo", "blab"])
24        {'b': {'l': {'a': {0: True, 'b': {0: True}}}}, 'f': {'o': {'o': {0: True}}}}
25
26    Args:
27        keywords: the keywords to create the trie from.
28        trie: a trie to mutate instead of creating a new one
29
30    Returns:
31        The trie corresponding to `keywords`.
32    """
33    trie = {} if trie is None else trie
34
35    for key in keywords:
36        current = trie
37        for char in key:
38            current = current.setdefault(char, {})
39
40        current[0] = True
41
42    return trie

Creates a new trie out of a collection of keywords.

The trie is represented as a sequence of nested dictionaries keyed by either single character strings, or by 0, which is used to designate that a keyword is in the trie.

Example:
>>> new_trie(["bla", "foo", "blab"])
{'b': {'l': {'a': {0: True, 'b': {0: True}}}}, 'f': {'o': {'o': {0: True}}}}
Arguments:
  • keywords: the keywords to create the trie from.
  • trie: a trie to mutate instead of creating a new one
Returns:

The trie corresponding to keywords.

def in_trie( trie: dict, key: Sequence[typing.Hashable]) -> tuple[TrieResult, dict]:
45def in_trie(trie: dict, key: key) -> tuple[TrieResult, dict]:
46    """
47    Checks whether a key is in a trie.
48
49    Examples:
50        >>> in_trie(new_trie(["cat"]), "bob")
51        (<TrieResult.FAILED: 1>, {'c': {'a': {'t': {0: True}}}})
52
53        >>> in_trie(new_trie(["cat"]), "ca")
54        (<TrieResult.PREFIX: 2>, {'t': {0: True}})
55
56        >>> in_trie(new_trie(["cat"]), "cat")
57        (<TrieResult.EXISTS: 3>, {0: True})
58
59    Args:
60        trie: The trie to be searched.
61        key: The target key.
62
63    Returns:
64        A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point
65        where the search stops, and `value` is a TrieResult value that can be one of:
66
67        - TrieResult.FAILED: the search was unsuccessful
68        - TrieResult.PREFIX: `value` is a prefix of a keyword in `trie`
69        - TrieResult.EXISTS: `key` exists in `trie`
70    """
71    if not key:
72        return (TrieResult.FAILED, trie)
73
74    current = trie
75    for char in key:
76        if char not in current:
77            return (TrieResult.FAILED, current)
78        current = current[char]
79
80    if 0 in current:
81        return (TrieResult.EXISTS, current)
82
83    return (TrieResult.PREFIX, current)

Checks whether a key is in a trie.

Examples:
>>> in_trie(new_trie(["cat"]), "bob")
(<TrieResult.FAILED: 1>, {'c': {'a': {'t': {0: True}}}})
>>> in_trie(new_trie(["cat"]), "ca")
(<TrieResult.PREFIX: 2>, {'t': {0: True}})
>>> in_trie(new_trie(["cat"]), "cat")
(<TrieResult.EXISTS: 3>, {0: True})
Arguments:
  • trie: The trie to be searched.
  • key: The target key.
Returns:

A pair (value, subtrie), where subtrie is the sub-trie we get at the point where the search stops, and value is a TrieResult value that can be one of: