Edit on GitHub

sqlglot.dialects.athena

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens
  6from sqlglot.dialects import Dialect, Hive, Trino
  7from sqlglot.tokens import TokenType, Token
  8
  9
 10class Athena(Dialect):
 11    """
 12    Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific
 13    modifications and then built the Athena service around them.
 14
 15    Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an
 16    execution engine depending on the query type.
 17
 18    As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3",
 19    the following engines exist:
 20
 21    Hive:
 22     - Accepts mostly the same syntax as Hadoop / Hive
 23     - Uses backticks to quote identifiers
 24     - Has a distinctive DDL syntax (around things like setting table properties, storage locations etc)
 25       that is different from Trino
 26     - Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
 27        - CREATE [EXTERNAL] TABLE (without AS SELECT)
 28        - ALTER
 29        - DROP
 30
 31    Trino:
 32      - Uses double quotes to quote identifiers
 33      - Used for DDL operations that involve SELECT queries, eg:
 34        - CREATE VIEW / DROP VIEW
 35        - CREATE TABLE... AS SELECT
 36      - Used for DML operations
 37        - SELECT, INSERT, UPDATE, DELETE, MERGE
 38
 39    The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the
 40    tokenizer / parser / generator for that engine. This is unfortunately necessary, as there are certain
 41    incompatibilities between the engines' dialects and thus can't be handled by a single, unifying dialect.
 42
 43    References:
 44    - https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
 45    - https://docs.aws.amazon.com/athena/latest/ug/dml-queries-functions-operators.html
 46    """
 47
 48    def __init__(self, **kwargs):
 49        super().__init__(**kwargs)
 50
 51        self._hive = Hive(**kwargs)
 52        self._trino = Trino(**kwargs)
 53
 54    def tokenize(self, sql: str, **opts) -> t.List[Token]:
 55        opts["hive"] = self._hive
 56        opts["trino"] = self._trino
 57        return super().tokenize(sql, **opts)
 58
 59    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
 60        opts["hive"] = self._hive
 61        opts["trino"] = self._trino
 62        return super().parse(sql, **opts)
 63
 64    def parse_into(
 65        self, expression_type: exp.IntoType, sql: str, **opts
 66    ) -> t.List[t.Optional[exp.Expression]]:
 67        opts["hive"] = self._hive
 68        opts["trino"] = self._trino
 69        return super().parse_into(expression_type, sql, **opts)
 70
 71    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
 72        opts["hive"] = self._hive
 73        opts["trino"] = self._trino
 74        return super().generate(expression, copy=copy, **opts)
 75
 76    # This Tokenizer consumes a combination of HiveQL and Trino SQL and then processes the tokens
 77    # to disambiguate which dialect needs to be actually used in order to tokenize correctly.
 78    class Tokenizer(tokens.Tokenizer):
 79        IDENTIFIERS = Trino.Tokenizer.IDENTIFIERS + Hive.Tokenizer.IDENTIFIERS
 80        STRING_ESCAPES = Trino.Tokenizer.STRING_ESCAPES + Hive.Tokenizer.STRING_ESCAPES
 81        HEX_STRINGS = Trino.Tokenizer.HEX_STRINGS + Hive.Tokenizer.HEX_STRINGS
 82        UNICODE_STRINGS = Trino.Tokenizer.UNICODE_STRINGS + Hive.Tokenizer.UNICODE_STRINGS
 83
 84        NUMERIC_LITERALS = {
 85            **Trino.Tokenizer.NUMERIC_LITERALS,
 86            **Hive.Tokenizer.NUMERIC_LITERALS,
 87        }
 88
 89        KEYWORDS = {
 90            **Hive.Tokenizer.KEYWORDS,
 91            **Trino.Tokenizer.KEYWORDS,
 92            "UNLOAD": TokenType.COMMAND,
 93        }
 94
 95        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
 96            hive = kwargs.pop("hive", None) or Hive()
 97            trino = kwargs.pop("trino", None) or Trino()
 98
 99            super().__init__(*args, **kwargs)
100
101            self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
102            self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
103
104        def tokenize(self, sql: str) -> t.List[Token]:
105            tokens = super().tokenize(sql)
106
107            if _tokenize_as_hive(tokens):
108                return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
109
110            return self._trino_tokenizer.tokenize(sql)
111
112    class Parser(parser.Parser):
113        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
114            hive = kwargs.pop("hive", None) or Hive()
115            trino = kwargs.pop("trino", None) or Trino()
116
117            super().__init__(*args, **kwargs)
118
119            self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
120            self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
121
122        def parse(
123            self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
124        ) -> t.List[t.Optional[exp.Expression]]:
125            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
126                return self._hive_parser.parse(raw_tokens[1:], sql)
127
128            return self._trino_parser.parse(raw_tokens, sql)
129
130        def parse_into(
131            self,
132            expression_types: exp.IntoType,
133            raw_tokens: t.List[Token],
134            sql: t.Optional[str] = None,
135        ) -> t.List[t.Optional[exp.Expression]]:
136            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
137                return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
138
139            return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
140
141    class Generator(generator.Generator):
142        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
143            hive = kwargs.pop("hive", None) or Hive()
144            trino = kwargs.pop("trino", None) or Trino()
145
146            super().__init__(*args, **kwargs)
147
148            self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
149            self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
150
151        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
152            if _generate_as_hive(expression):
153                generator = self._hive_generator
154            else:
155                generator = self._trino_generator
156
157            return generator.generate(expression, copy=copy)
158
159
160def _tokenize_as_hive(tokens: t.List[Token]) -> bool:
161    if len(tokens) < 2:
162        return False
163
164    first, second, *rest = tokens
165
166    first_type = first.token_type
167    first_text = first.text.upper()
168    second_type = second.token_type
169    second_text = second.text.upper()
170
171    if first_type in (TokenType.DESCRIBE, TokenType.SHOW) or first_text == "MSCK REPAIR":
172        return True
173
174    if first_type in (TokenType.ALTER, TokenType.CREATE, TokenType.DROP):
175        if second_text in ("DATABASE", "EXTERNAL", "SCHEMA"):
176            return True
177        if second_type == TokenType.VIEW:
178            return False
179
180        return all(t.token_type != TokenType.SELECT for t in rest)
181
182    return False
183
184
185def _generate_as_hive(expression: exp.Expression) -> bool:
186    if isinstance(expression, exp.Create):
187        if expression.kind == "TABLE":
188            properties = expression.args.get("properties")
189
190            # CREATE EXTERNAL TABLE is Hive
191            if properties and properties.find(exp.ExternalProperty):
192                return True
193
194            # Any CREATE TABLE other than CREATE TABLE ... AS <query> is Hive
195            if not isinstance(expression.expression, exp.Query):
196                return True
197        else:
198            # CREATE VIEW is Trino, but CREATE SCHEMA, CREATE DATABASE, etc, is Hive
199            return expression.kind != "VIEW"
200    elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe, exp.Show)):
201        if isinstance(expression, exp.Drop) and expression.kind == "VIEW":
202            # DROP VIEW is Trino, because CREATE VIEW is as well
203            return False
204
205        # Everything else, e.g., ALTER statements, is Hive
206        return True
207
208    return False
209
210
211def _is_iceberg_table(properties: exp.Properties) -> bool:
212    for p in properties.expressions:
213        if isinstance(p, exp.Property) and p.name == "table_type":
214            return p.text("value").lower() == "iceberg"
215
216    return False
217
218
219def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
220    # If table_type='iceberg', the LocationProperty is called 'location'
221    # Otherwise, it's called 'external_location'
222    # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html
223
224    prop_name = "external_location"
225
226    if isinstance(e.parent, exp.Properties):
227        if _is_iceberg_table(e.parent):
228            prop_name = "location"
229
230    return f"{prop_name}={self.sql(e, 'this')}"
231
232
233def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByProperty) -> str:
234    # If table_type='iceberg' then the table property for partitioning is called 'partitioning'
235    # If table_type='hive' it's called 'partitioned_by'
236    # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
237
238    prop_name = "partitioned_by"
239
240    if isinstance(e.parent, exp.Properties):
241        if _is_iceberg_table(e.parent):
242            prop_name = "partitioning"
243
244    return f"{prop_name}={self.sql(e, 'this')}"
245
246
247# Athena extensions to Hive's generator
248class _HiveGenerator(Hive.Generator):
249    def alter_sql(self, expression: exp.Alter) -> str:
250        # Package any ALTER TABLE ADD actions into a Schema object, so it gets generated as
251        # `ALTER TABLE .. ADD COLUMNS(...)`, instead of `ALTER TABLE ... ADD COLUMN`, which
252        # is invalid syntax on Athena
253        if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
254            if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
255                new_actions = exp.Schema(expressions=expression.actions)
256                expression.set("actions", [new_actions])
257
258        return super().alter_sql(expression)
259
260
261# Athena extensions to Trino's tokenizer
262class _TrinoTokenizer(Trino.Tokenizer):
263    KEYWORDS = {
264        **Trino.Tokenizer.KEYWORDS,
265        "UNLOAD": TokenType.COMMAND,
266    }
267
268
269# Athena extensions to Trino's parser
270class _TrinoParser(Trino.Parser):
271    STATEMENT_PARSERS = {
272        **Trino.Parser.STATEMENT_PARSERS,
273        TokenType.USING: lambda self: self._parse_as_command(self._prev),
274    }
275
276
277# Athena extensions to Trino's generator
278class _TrinoGenerator(Trino.Generator):
279    PROPERTIES_LOCATION = {
280        **Trino.Generator.PROPERTIES_LOCATION,
281        exp.LocationProperty: exp.Properties.Location.POST_WITH,
282    }
283
284    TRANSFORMS = {
285        **Trino.Generator.TRANSFORMS,
286        exp.PartitionedByProperty: _partitioned_by_property_sql,
287        exp.LocationProperty: _location_property_sql,
288    }
class Athena(sqlglot.dialects.dialect.Dialect):
 11class Athena(Dialect):
 12    """
 13    Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific
 14    modifications and then built the Athena service around them.
 15
 16    Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an
 17    execution engine depending on the query type.
 18
 19    As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3",
 20    the following engines exist:
 21
 22    Hive:
 23     - Accepts mostly the same syntax as Hadoop / Hive
 24     - Uses backticks to quote identifiers
 25     - Has a distinctive DDL syntax (around things like setting table properties, storage locations etc)
 26       that is different from Trino
 27     - Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
 28        - CREATE [EXTERNAL] TABLE (without AS SELECT)
 29        - ALTER
 30        - DROP
 31
 32    Trino:
 33      - Uses double quotes to quote identifiers
 34      - Used for DDL operations that involve SELECT queries, eg:
 35        - CREATE VIEW / DROP VIEW
 36        - CREATE TABLE... AS SELECT
 37      - Used for DML operations
 38        - SELECT, INSERT, UPDATE, DELETE, MERGE
 39
 40    The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the
 41    tokenizer / parser / generator for that engine. This is unfortunately necessary, as there are certain
 42    incompatibilities between the engines' dialects and thus can't be handled by a single, unifying dialect.
 43
 44    References:
 45    - https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
 46    - https://docs.aws.amazon.com/athena/latest/ug/dml-queries-functions-operators.html
 47    """
 48
 49    def __init__(self, **kwargs):
 50        super().__init__(**kwargs)
 51
 52        self._hive = Hive(**kwargs)
 53        self._trino = Trino(**kwargs)
 54
 55    def tokenize(self, sql: str, **opts) -> t.List[Token]:
 56        opts["hive"] = self._hive
 57        opts["trino"] = self._trino
 58        return super().tokenize(sql, **opts)
 59
 60    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
 61        opts["hive"] = self._hive
 62        opts["trino"] = self._trino
 63        return super().parse(sql, **opts)
 64
 65    def parse_into(
 66        self, expression_type: exp.IntoType, sql: str, **opts
 67    ) -> t.List[t.Optional[exp.Expression]]:
 68        opts["hive"] = self._hive
 69        opts["trino"] = self._trino
 70        return super().parse_into(expression_type, sql, **opts)
 71
 72    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
 73        opts["hive"] = self._hive
 74        opts["trino"] = self._trino
 75        return super().generate(expression, copy=copy, **opts)
 76
 77    # This Tokenizer consumes a combination of HiveQL and Trino SQL and then processes the tokens
 78    # to disambiguate which dialect needs to be actually used in order to tokenize correctly.
 79    class Tokenizer(tokens.Tokenizer):
 80        IDENTIFIERS = Trino.Tokenizer.IDENTIFIERS + Hive.Tokenizer.IDENTIFIERS
 81        STRING_ESCAPES = Trino.Tokenizer.STRING_ESCAPES + Hive.Tokenizer.STRING_ESCAPES
 82        HEX_STRINGS = Trino.Tokenizer.HEX_STRINGS + Hive.Tokenizer.HEX_STRINGS
 83        UNICODE_STRINGS = Trino.Tokenizer.UNICODE_STRINGS + Hive.Tokenizer.UNICODE_STRINGS
 84
 85        NUMERIC_LITERALS = {
 86            **Trino.Tokenizer.NUMERIC_LITERALS,
 87            **Hive.Tokenizer.NUMERIC_LITERALS,
 88        }
 89
 90        KEYWORDS = {
 91            **Hive.Tokenizer.KEYWORDS,
 92            **Trino.Tokenizer.KEYWORDS,
 93            "UNLOAD": TokenType.COMMAND,
 94        }
 95
 96        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
 97            hive = kwargs.pop("hive", None) or Hive()
 98            trino = kwargs.pop("trino", None) or Trino()
 99
100            super().__init__(*args, **kwargs)
101
102            self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
103            self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
104
105        def tokenize(self, sql: str) -> t.List[Token]:
106            tokens = super().tokenize(sql)
107
108            if _tokenize_as_hive(tokens):
109                return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
110
111            return self._trino_tokenizer.tokenize(sql)
112
113    class Parser(parser.Parser):
114        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
115            hive = kwargs.pop("hive", None) or Hive()
116            trino = kwargs.pop("trino", None) or Trino()
117
118            super().__init__(*args, **kwargs)
119
120            self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
121            self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
122
123        def parse(
124            self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
125        ) -> t.List[t.Optional[exp.Expression]]:
126            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
127                return self._hive_parser.parse(raw_tokens[1:], sql)
128
129            return self._trino_parser.parse(raw_tokens, sql)
130
131        def parse_into(
132            self,
133            expression_types: exp.IntoType,
134            raw_tokens: t.List[Token],
135            sql: t.Optional[str] = None,
136        ) -> t.List[t.Optional[exp.Expression]]:
137            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
138                return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
139
140            return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
141
142    class Generator(generator.Generator):
143        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
144            hive = kwargs.pop("hive", None) or Hive()
145            trino = kwargs.pop("trino", None) or Trino()
146
147            super().__init__(*args, **kwargs)
148
149            self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
150            self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
151
152        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
153            if _generate_as_hive(expression):
154                generator = self._hive_generator
155            else:
156                generator = self._trino_generator
157
158            return generator.generate(expression, copy=copy)

Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific modifications and then built the Athena service around them.

Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an execution engine depending on the query type.

As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3", the following engines exist:

Hive:
  • Accepts mostly the same syntax as Hadoop / Hive
  • Uses backticks to quote identifiers
  • Has a distinctive DDL syntax (around things like setting table properties, storage locations etc) that is different from Trino
  • Used for most DDL, with some exceptions that get routed to the Trino engine instead:
    • CREATE [EXTERNAL] TABLE (without AS SELECT)
    • ALTER
    • DROP
Trino:
  • Uses double quotes to quote identifiers
  • Used for DDL operations that involve SELECT queries, eg:
    • CREATE VIEW / DROP VIEW
    • CREATE TABLE... AS SELECT
  • Used for DML operations
    • SELECT, INSERT, UPDATE, DELETE, MERGE

The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the tokenizer / parser / generator for that engine. This is unfortunately necessary, as there are certain incompatibilities between the engines' dialects and thus can't be handled by a single, unifying dialect.

References:

Athena(**kwargs)
49    def __init__(self, **kwargs):
50        super().__init__(**kwargs)
51
52        self._hive = Hive(**kwargs)
53        self._trino = Trino(**kwargs)
def tokenize(self, sql: str, **opts) -> List[sqlglot.tokens.Token]:
55    def tokenize(self, sql: str, **opts) -> t.List[Token]:
56        opts["hive"] = self._hive
57        opts["trino"] = self._trino
58        return super().tokenize(sql, **opts)
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
60    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
61        opts["hive"] = self._hive
62        opts["trino"] = self._trino
63        return super().parse(sql, **opts)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
65    def parse_into(
66        self, expression_type: exp.IntoType, sql: str, **opts
67    ) -> t.List[t.Optional[exp.Expression]]:
68        opts["hive"] = self._hive
69        opts["trino"] = self._trino
70        return super().parse_into(expression_type, sql, **opts)
def generate( self, expression: sqlglot.expressions.Expression, copy: bool = True, **opts) -> str:
72    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
73        opts["hive"] = self._hive
74        opts["trino"] = self._trino
75        return super().generate(expression, copy=copy, **opts)
SUPPORTS_COLUMN_JOIN_MARKS = False

Whether the old-style outer join (+) syntax is supported.

UNESCAPED_SEQUENCES: Dict[str, str] = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}

Mapping of an escaped sequence (\n) to its unescaped version ( ).

tokenizer_class = <class 'Athena.Tokenizer'>
jsonpath_tokenizer_class = <class 'sqlglot.tokens.JSONPathTokenizer'>
parser_class = <class 'Athena.Parser'>
generator_class = <class 'Athena.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
INVERSE_FORMAT_MAPPING: Dict[str, str] = {}
INVERSE_FORMAT_TRIE: Dict = {}
INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
ESCAPED_SEQUENCES: Dict[str, str] = {'\x07': '\\a', '\x08': '\\b', '\x0c': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', '\x0b': '\\v', '\\': '\\\\'}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '"'
IDENTIFIER_END = '"'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = "x'"
HEX_END: Optional[str] = "'"
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = "U&'"
UNICODE_END: Optional[str] = "'"
class Athena.Tokenizer(sqlglot.tokens.Tokenizer):
 79    class Tokenizer(tokens.Tokenizer):
 80        IDENTIFIERS = Trino.Tokenizer.IDENTIFIERS + Hive.Tokenizer.IDENTIFIERS
 81        STRING_ESCAPES = Trino.Tokenizer.STRING_ESCAPES + Hive.Tokenizer.STRING_ESCAPES
 82        HEX_STRINGS = Trino.Tokenizer.HEX_STRINGS + Hive.Tokenizer.HEX_STRINGS
 83        UNICODE_STRINGS = Trino.Tokenizer.UNICODE_STRINGS + Hive.Tokenizer.UNICODE_STRINGS
 84
 85        NUMERIC_LITERALS = {
 86            **Trino.Tokenizer.NUMERIC_LITERALS,
 87            **Hive.Tokenizer.NUMERIC_LITERALS,
 88        }
 89
 90        KEYWORDS = {
 91            **Hive.Tokenizer.KEYWORDS,
 92            **Trino.Tokenizer.KEYWORDS,
 93            "UNLOAD": TokenType.COMMAND,
 94        }
 95
 96        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
 97            hive = kwargs.pop("hive", None) or Hive()
 98            trino = kwargs.pop("trino", None) or Trino()
 99
100            super().__init__(*args, **kwargs)
101
102            self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
103            self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
104
105        def tokenize(self, sql: str) -> t.List[Token]:
106            tokens = super().tokenize(sql)
107
108            if _tokenize_as_hive(tokens):
109                return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
110
111            return self._trino_tokenizer.tokenize(sql)
Athena.Tokenizer(*args: Any, **kwargs: Any)
 96        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
 97            hive = kwargs.pop("hive", None) or Hive()
 98            trino = kwargs.pop("trino", None) or Trino()
 99
100            super().__init__(*args, **kwargs)
101
102            self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
103            self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
IDENTIFIERS = ['"', '`']
STRING_ESCAPES = ["'", '\\']
HEX_STRINGS = [("x'", "'"), ("X'", "'")]
UNICODE_STRINGS = [("U&'", "'"), ("u&'", "'")]
NUMERIC_LITERALS = {'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
KEYWORDS = {'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '|>': <TokenType.PIPE_GT: 'PIPE_GT'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, '~~~': <TokenType.GLOB: 'GLOB'>, '~~': <TokenType.LIKE: 'LIKE'>, '~~*': <TokenType.ILIKE: 'ILIKE'>, '~*': <TokenType.IRLIKE: 'IRLIKE'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'COPY': <TokenType.COPY: 'COPY'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_SCHEMA': <TokenType.CURRENT_SCHEMA: 'CURRENT_SCHEMA'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ENUM': <TokenType.ENUM: 'ENUM'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NAMESPACE': <TokenType.NAMESPACE: 'NAMESPACE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'RENAME': <TokenType.RENAME: 'RENAME'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.STRUCT: 'STRUCT'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'STRAIGHT_JOIN': <TokenType.STRAIGHT_JOIN: 'STRAIGHT_JOIN'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'TRUNCATE': <TokenType.TRUNCATE: 'TRUNCATE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'UHUGEINT': <TokenType.UINT128: 'UINT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT256': <TokenType.INT256: 'INT256'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'UINT': <TokenType.UINT: 'UINT'>, 'UINT128': <TokenType.UINT128: 'UINT128'>, 'UINT256': <TokenType.UINT256: 'UINT256'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL32': <TokenType.DECIMAL32: 'DECIMAL32'>, 'DECIMAL64': <TokenType.DECIMAL64: 'DECIMAL64'>, 'DECIMAL128': <TokenType.DECIMAL128: 'DECIMAL128'>, 'DECIMAL256': <TokenType.DECIMAL256: 'DECIMAL256'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'LIST': <TokenType.LIST: 'LIST'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'JSONB': <TokenType.JSONB: 'JSONB'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'CHAR VARYING': <TokenType.VARCHAR: 'VARCHAR'>, 'CHARACTER VARYING': <TokenType.VARCHAR: 'VARCHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'TIMESTAMP_LTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'TIMESTAMPNTZ': <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, 'TIMESTAMP_NTZ': <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'VECTOR': <TokenType.VECTOR: 'VECTOR'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'SEQUENCE': <TokenType.SEQUENCE: 'SEQUENCE'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.ANALYZE: 'ANALYZE'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.GRANT: 'GRANT'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MINUS': <TokenType.EXCEPT: 'EXCEPT'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'DEALLOCATE PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'DESCRIBE INPUT': <TokenType.COMMAND: 'COMMAND'>, 'DESCRIBE OUTPUT': <TokenType.COMMAND: 'COMMAND'>, 'RESET SESSION': <TokenType.COMMAND: 'COMMAND'>, 'START': <TokenType.BEGIN: 'BEGIN'>, 'MATCH_RECOGNIZE': <TokenType.MATCH_RECOGNIZE: 'MATCH_RECOGNIZE'>, 'IPADDRESS': <TokenType.IPADDRESS: 'IPADDRESS'>, 'IPPREFIX': <TokenType.IPPREFIX: 'IPPREFIX'>, 'TDIGEST': <TokenType.TDIGEST: 'TDIGEST'>, 'HYPERLOGLOG': <TokenType.HLLSKETCH: 'HLLSKETCH'>, 'UNLOAD': <TokenType.COMMAND: 'COMMAND'>}
def tokenize(self, sql: str) -> List[sqlglot.tokens.Token]:
105        def tokenize(self, sql: str) -> t.List[Token]:
106            tokens = super().tokenize(sql)
107
108            if _tokenize_as_hive(tokens):
109                return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
110
111            return self._trino_tokenizer.tokenize(sql)

Returns a list of tokens corresponding to the SQL string sql.

class Athena.Parser(sqlglot.parser.Parser):
113    class Parser(parser.Parser):
114        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
115            hive = kwargs.pop("hive", None) or Hive()
116            trino = kwargs.pop("trino", None) or Trino()
117
118            super().__init__(*args, **kwargs)
119
120            self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
121            self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
122
123        def parse(
124            self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
125        ) -> t.List[t.Optional[exp.Expression]]:
126            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
127                return self._hive_parser.parse(raw_tokens[1:], sql)
128
129            return self._trino_parser.parse(raw_tokens, sql)
130
131        def parse_into(
132            self,
133            expression_types: exp.IntoType,
134            raw_tokens: t.List[Token],
135            sql: t.Optional[str] = None,
136        ) -> t.List[t.Optional[exp.Expression]]:
137            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
138                return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
139
140            return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Athena.Parser(*args: Any, **kwargs: Any)
114        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
115            hive = kwargs.pop("hive", None) or Hive()
116            trino = kwargs.pop("trino", None) or Trino()
117
118            super().__init__(*args, **kwargs)
119
120            self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
121            self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
123        def parse(
124            self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
125        ) -> t.List[t.Optional[exp.Expression]]:
126            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
127                return self._hive_parser.parse(raw_tokens[1:], sql)
128
129            return self._trino_parser.parse(raw_tokens, sql)

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: The list of tokens.
  • sql: The original SQL string, used to produce helpful debug messages.
Returns:

The list of the produced syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
131        def parse_into(
132            self,
133            expression_types: exp.IntoType,
134            raw_tokens: t.List[Token],
135            sql: t.Optional[str] = None,
136        ) -> t.List[t.Optional[exp.Expression]]:
137            if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
138                return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
139
140            return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: The expression type(s) to try and parse the token list into.
  • raw_tokens: The list of tokens.
  • sql: The original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

ID_VAR_TOKENS = {<TokenType.HSTORE: 'HSTORE'>, <TokenType.IDENTIFIER: 'IDENTIFIER'>, <TokenType.DEFAULT: 'DEFAULT'>, <TokenType.SHOW: 'SHOW'>, <TokenType.SIMPLEAGGREGATEFUNCTION: 'SIMPLEAGGREGATEFUNCTION'>, <TokenType.COMMAND: 'COMMAND'>, <TokenType.FIRST: 'FIRST'>, <TokenType.KILL: 'KILL'>, <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, <TokenType.WINDOW: 'WINDOW'>, <TokenType.TABLE: 'TABLE'>, <TokenType.INT128: 'INT128'>, <TokenType.TRUE: 'TRUE'>, <TokenType.UTINYINT: 'UTINYINT'>, <TokenType.DECIMAL128: 'DECIMAL128'>, <TokenType.UINT128: 'UINT128'>, <TokenType.TIMESTAMP_MS: 'TIMESTAMP_MS'>, <TokenType.ASC: 'ASC'>, <TokenType.FALSE: 'FALSE'>, <TokenType.OVERWRITE: 'OVERWRITE'>, <TokenType.ENUM: 'ENUM'>, <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, <TokenType.FIXEDSTRING: 'FIXEDSTRING'>, <TokenType.ARRAY: 'ARRAY'>, <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, <TokenType.MERGE: 'MERGE'>, <TokenType.LINESTRING: 'LINESTRING'>, <TokenType.JSON: 'JSON'>, <TokenType.IS: 'IS'>, <TokenType.DECIMAL32: 'DECIMAL32'>, <TokenType.DECIMAL64: 'DECIMAL64'>, <TokenType.FLOAT: 'FLOAT'>, <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, <TokenType.SCHEMA: 'SCHEMA'>, <TokenType.RENAME: 'RENAME'>, <TokenType.DOUBLE: 'DOUBLE'>, <TokenType.STORAGE_INTEGRATION: 'STORAGE_INTEGRATION'>, <TokenType.HLLSKETCH: 'HLLSKETCH'>, <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, <TokenType.TDIGEST: 'TDIGEST'>, <TokenType.PUT: 'PUT'>, <TokenType.YEAR: 'YEAR'>, <TokenType.PRAGMA: 'PRAGMA'>, <TokenType.SERIAL: 'SERIAL'>, <TokenType.MEDIUMINT: 'MEDIUMINT'>, <TokenType.TINYBLOB: 'TINYBLOB'>, <TokenType.ASOF: 'ASOF'>, <TokenType.TOP: 'TOP'>, <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, <TokenType.UDECIMAL: 'UDECIMAL'>, <TokenType.COMMENT: 'COMMENT'>, <TokenType.INET: 'INET'>, <TokenType.MAP: 'MAP'>, <TokenType.GET: 'GET'>, <TokenType.CACHE: 'CACHE'>, <TokenType.PERCENT: 'PERCENT'>, <TokenType.DATETIME2: 'DATETIME2'>, <TokenType.OBJECT_IDENTIFIER: 'OBJECT_IDENTIFIER'>, <TokenType.UNIQUE: 'UNIQUE'>, <TokenType.DATE: 'DATE'>, <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, <TokenType.PIVOT: 'PIVOT'>, <TokenType.INTERVAL: 'INTERVAL'>, <TokenType.SMALLINT: 'SMALLINT'>, <TokenType.FINAL: 'FINAL'>, <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, <TokenType.TSTZRANGE: 'TSTZRANGE'>, <TokenType.LOWCARDINALITY: 'LOWCARDINALITY'>, <TokenType.VOLATILE: 'VOLATILE'>, <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, <TokenType.POLYGON: 'POLYGON'>, <TokenType.DESC: 'DESC'>, <TokenType.CURRENT_USER: 'CURRENT_USER'>, <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, <TokenType.NULL: 'NULL'>, <TokenType.FORMAT: 'FORMAT'>, <TokenType.UNNEST: 'UNNEST'>, <TokenType.UPDATE: 'UPDATE'>, <TokenType.UNPIVOT: 'UNPIVOT'>, <TokenType.LONGBLOB: 'LONGBLOB'>, <TokenType.FILTER: 'FILTER'>, <TokenType.SETTINGS: 'SETTINGS'>, <TokenType.TINYINT: 'TINYINT'>, <TokenType.SEMI: 'SEMI'>, <TokenType.SINK: 'SINK'>, <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <TokenType.INT4RANGE: 'INT4RANGE'>, <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, <TokenType.DIV: 'DIV'>, <TokenType.PSEUDO_TYPE: 'PSEUDO_TYPE'>, <TokenType.POINT: 'POINT'>, <TokenType.TEXT: 'TEXT'>, <TokenType.VECTOR: 'VECTOR'>, <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, <TokenType.BIT: 'BIT'>, <TokenType.TIMESTAMP_S: 'TIMESTAMP_S'>, <TokenType.MULTILINESTRING: 'MULTILINESTRING'>, <TokenType.BLOB: 'BLOB'>, <TokenType.NESTED: 'NESTED'>, <TokenType.VARCHAR: 'VARCHAR'>, <TokenType.COPY: 'COPY'>, <TokenType.SOME: 'SOME'>, <TokenType.TAG: 'TAG'>, <TokenType.INT256: 'INT256'>, <TokenType.ALL: 'ALL'>, <TokenType.DETACH: 'DETACH'>, <TokenType.IPPREFIX: 'IPPREFIX'>, <TokenType.LEFT: 'LEFT'>, <TokenType.NOTHING: 'NOTHING'>, <TokenType.DATERANGE: 'DATERANGE'>, <TokenType.CUBE: 'CUBE'>, <TokenType.USMALLINT: 'USMALLINT'>, <TokenType.ISNULL: 'ISNULL'>, <TokenType.DECIMAL256: 'DECIMAL256'>, <TokenType.DECIMAL: 'DECIMAL'>, <TokenType.TIMESTAMP: 'TIMESTAMP'>, <TokenType.REFERENCES: 'REFERENCES'>, <TokenType.DICTIONARY: 'DICTIONARY'>, <TokenType.UINT256: 'UINT256'>, <TokenType.SMALLSERIAL: 'SMALLSERIAL'>, <TokenType.LOAD: 'LOAD'>, <TokenType.ANY: 'ANY'>, <TokenType.ENUM16: 'ENUM16'>, <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, <TokenType.BPCHAR: 'BPCHAR'>, <TokenType.RIGHT: 'RIGHT'>, <TokenType.DYNAMIC: 'DYNAMIC'>, <TokenType.COLUMN: 'COLUMN'>, <TokenType.ROW: 'ROW'>, <TokenType.ATTACH: 'ATTACH'>, <TokenType.ROWS: 'ROWS'>, <TokenType.TINYTEXT: 'TINYTEXT'>, <TokenType.VAR: 'VAR'>, <TokenType.IPADDRESS: 'IPADDRESS'>, <TokenType.IMAGE: 'IMAGE'>, <TokenType.REFRESH: 'REFRESH'>, <TokenType.USERDEFINED: 'USERDEFINED'>, <TokenType.ENUM8: 'ENUM8'>, <TokenType.MONEY: 'MONEY'>, <TokenType.OPERATOR: 'OPERATOR'>, <TokenType.UBIGINT: 'UBIGINT'>, <TokenType.BIGINT: 'BIGINT'>, <TokenType.TIME: 'TIME'>, <TokenType.DATETIME64: 'DATETIME64'>, <TokenType.DATE32: 'DATE32'>, <TokenType.VOID: 'VOID'>, <TokenType.UDOUBLE: 'UDOUBLE'>, <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, <TokenType.ROWVERSION: 'ROWVERSION'>, <TokenType.COMMIT: 'COMMIT'>, <TokenType.SOURCE: 'SOURCE'>, <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, <TokenType.END: 'END'>, <TokenType.CONSTRAINT: 'CONSTRAINT'>, <TokenType.NULLABLE: 'NULLABLE'>, <TokenType.UMEDIUMINT: 'UMEDIUMINT'>, <TokenType.TRUNCATE: 'TRUNCATE'>, <TokenType.MODEL: 'MODEL'>, <TokenType.DESCRIBE: 'DESCRIBE'>, <TokenType.OVERLAPS: 'OVERLAPS'>, <TokenType.USE: 'USE'>, <TokenType.TSRANGE: 'TSRANGE'>, <TokenType.SEQUENCE: 'SEQUENCE'>, <TokenType.SMALLDATETIME: 'SMALLDATETIME'>, <TokenType.DELETE: 'DELETE'>, <TokenType.EXISTS: 'EXISTS'>, <TokenType.RING: 'RING'>, <TokenType.XML: 'XML'>, <TokenType.IPV4: 'IPV4'>, <TokenType.WAREHOUSE: 'WAREHOUSE'>, <TokenType.STREAMLIT: 'STREAMLIT'>, <TokenType.LIMIT: 'LIMIT'>, <TokenType.STRAIGHT_JOIN: 'STRAIGHT_JOIN'>, <TokenType.APPLY: 'APPLY'>, <TokenType.STAGE: 'STAGE'>, <TokenType.SUPER: 'SUPER'>, <TokenType.EXPORT: 'EXPORT'>, <TokenType.DATETIME: 'DATETIME'>, <TokenType.RANGE: 'RANGE'>, <TokenType.INDEX: 'INDEX'>, <TokenType.DATABASE: 'DATABASE'>, <TokenType.CURRENT_DATETIME: 'CURRENT_DATETIME'>, <TokenType.BIGSERIAL: 'BIGSERIAL'>, <TokenType.ESCAPE: 'ESCAPE'>, <TokenType.VARIANT: 'VARIANT'>, <TokenType.SET: 'SET'>, <TokenType.LIST: 'LIST'>, <TokenType.CASE: 'CASE'>, <TokenType.COLLATE: 'COLLATE'>, <TokenType.INT8RANGE: 'INT8RANGE'>, <TokenType.NATURAL: 'NATURAL'>, <TokenType.EXECUTE: 'EXECUTE'>, <TokenType.PARTITION: 'PARTITION'>, <TokenType.AGGREGATEFUNCTION: 'AGGREGATEFUNCTION'>, <TokenType.UUID: 'UUID'>, <TokenType.NEXT: 'NEXT'>, <TokenType.FUNCTION: 'FUNCTION'>, <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, <TokenType.NVARCHAR: 'NVARCHAR'>, <TokenType.UINT: 'UINT'>, <TokenType.NAME: 'NAME'>, <TokenType.ANTI: 'ANTI'>, <TokenType.ROLLUP: 'ROLLUP'>, <TokenType.VARBINARY: 'VARBINARY'>, <TokenType.OFFSET: 'OFFSET'>, <TokenType.UNKNOWN: 'UNKNOWN'>, <TokenType.NAMESPACE: 'NAMESPACE'>, <TokenType.TIMETZ: 'TIMETZ'>, <TokenType.STRUCT: 'STRUCT'>, <TokenType.REPLACE: 'REPLACE'>, <TokenType.BINARY: 'BINARY'>, <TokenType.TEMPORARY: 'TEMPORARY'>, <TokenType.BOOLEAN: 'BOOLEAN'>, <TokenType.SMALLMONEY: 'SMALLMONEY'>, <TokenType.RECURSIVE: 'RECURSIVE'>, <TokenType.FULL: 'FULL'>, <TokenType.INT: 'INT'>, <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, <TokenType.NUMRANGE: 'NUMRANGE'>, <TokenType.CURRENT_SCHEMA: 'CURRENT_SCHEMA'>, <TokenType.LONGTEXT: 'LONGTEXT'>, <TokenType.IPV6: 'IPV6'>, <TokenType.CHAR: 'CHAR'>, <TokenType.KEEP: 'KEEP'>, <TokenType.MULTIPOLYGON: 'MULTIPOLYGON'>, <TokenType.JSONB: 'JSONB'>, <TokenType.PROCEDURE: 'PROCEDURE'>, <TokenType.GEOMETRY: 'GEOMETRY'>, <TokenType.FILE_FORMAT: 'FILE_FORMAT'>, <TokenType.BEGIN: 'BEGIN'>, <TokenType.TIMESTAMP_NS: 'TIMESTAMP_NS'>, <TokenType.OBJECT: 'OBJECT'>, <TokenType.ORDINALITY: 'ORDINALITY'>, <TokenType.VIEW: 'VIEW'>, <TokenType.NCHAR: 'NCHAR'>}
TABLE_ALIAS_TOKENS = {<TokenType.HSTORE: 'HSTORE'>, <TokenType.IDENTIFIER: 'IDENTIFIER'>, <TokenType.DEFAULT: 'DEFAULT'>, <TokenType.SHOW: 'SHOW'>, <TokenType.SIMPLEAGGREGATEFUNCTION: 'SIMPLEAGGREGATEFUNCTION'>, <TokenType.COMMAND: 'COMMAND'>, <TokenType.FIRST: 'FIRST'>, <TokenType.KILL: 'KILL'>, <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, <TokenType.TABLE: 'TABLE'>, <TokenType.INT128: 'INT128'>, <TokenType.TRUE: 'TRUE'>, <TokenType.UTINYINT: 'UTINYINT'>, <TokenType.DECIMAL128: 'DECIMAL128'>, <TokenType.UINT128: 'UINT128'>, <TokenType.TIMESTAMP_MS: 'TIMESTAMP_MS'>, <TokenType.ASC: 'ASC'>, <TokenType.FALSE: 'FALSE'>, <TokenType.OVERWRITE: 'OVERWRITE'>, <TokenType.ENUM: 'ENUM'>, <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, <TokenType.FIXEDSTRING: 'FIXEDSTRING'>, <TokenType.ARRAY: 'ARRAY'>, <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, <TokenType.MERGE: 'MERGE'>, <TokenType.LINESTRING: 'LINESTRING'>, <TokenType.JSON: 'JSON'>, <TokenType.IS: 'IS'>, <TokenType.DECIMAL32: 'DECIMAL32'>, <TokenType.DECIMAL64: 'DECIMAL64'>, <TokenType.FLOAT: 'FLOAT'>, <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, <TokenType.SCHEMA: 'SCHEMA'>, <TokenType.RENAME: 'RENAME'>, <TokenType.DOUBLE: 'DOUBLE'>, <TokenType.STORAGE_INTEGRATION: 'STORAGE_INTEGRATION'>, <TokenType.HLLSKETCH: 'HLLSKETCH'>, <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, <TokenType.TDIGEST: 'TDIGEST'>, <TokenType.PUT: 'PUT'>, <TokenType.YEAR: 'YEAR'>, <TokenType.PRAGMA: 'PRAGMA'>, <TokenType.SERIAL: 'SERIAL'>, <TokenType.MEDIUMINT: 'MEDIUMINT'>, <TokenType.TINYBLOB: 'TINYBLOB'>, <TokenType.TOP: 'TOP'>, <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, <TokenType.UDECIMAL: 'UDECIMAL'>, <TokenType.COMMENT: 'COMMENT'>, <TokenType.INET: 'INET'>, <TokenType.MAP: 'MAP'>, <TokenType.GET: 'GET'>, <TokenType.CACHE: 'CACHE'>, <TokenType.PERCENT: 'PERCENT'>, <TokenType.DATETIME2: 'DATETIME2'>, <TokenType.OBJECT_IDENTIFIER: 'OBJECT_IDENTIFIER'>, <TokenType.UNIQUE: 'UNIQUE'>, <TokenType.DATE: 'DATE'>, <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, <TokenType.PIVOT: 'PIVOT'>, <TokenType.INTERVAL: 'INTERVAL'>, <TokenType.SMALLINT: 'SMALLINT'>, <TokenType.FINAL: 'FINAL'>, <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, <TokenType.TSTZRANGE: 'TSTZRANGE'>, <TokenType.LOWCARDINALITY: 'LOWCARDINALITY'>, <TokenType.VOLATILE: 'VOLATILE'>, <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, <TokenType.POLYGON: 'POLYGON'>, <TokenType.DESC: 'DESC'>, <TokenType.CURRENT_USER: 'CURRENT_USER'>, <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, <TokenType.NULL: 'NULL'>, <TokenType.FORMAT: 'FORMAT'>, <TokenType.UNNEST: 'UNNEST'>, <TokenType.UPDATE: 'UPDATE'>, <TokenType.UNPIVOT: 'UNPIVOT'>, <TokenType.LONGBLOB: 'LONGBLOB'>, <TokenType.FILTER: 'FILTER'>, <TokenType.SETTINGS: 'SETTINGS'>, <TokenType.TINYINT: 'TINYINT'>, <TokenType.SINK: 'SINK'>, <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <TokenType.INT4RANGE: 'INT4RANGE'>, <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, <TokenType.PSEUDO_TYPE: 'PSEUDO_TYPE'>, <TokenType.DIV: 'DIV'>, <TokenType.POINT: 'POINT'>, <TokenType.TEXT: 'TEXT'>, <TokenType.VECTOR: 'VECTOR'>, <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, <TokenType.BIT: 'BIT'>, <TokenType.TIMESTAMP_S: 'TIMESTAMP_S'>, <TokenType.MULTILINESTRING: 'MULTILINESTRING'>, <TokenType.BLOB: 'BLOB'>, <TokenType.NESTED: 'NESTED'>, <TokenType.VARCHAR: 'VARCHAR'>, <TokenType.COPY: 'COPY'>, <TokenType.SOME: 'SOME'>, <TokenType.TAG: 'TAG'>, <TokenType.INT256: 'INT256'>, <TokenType.ALL: 'ALL'>, <TokenType.DETACH: 'DETACH'>, <TokenType.IPPREFIX: 'IPPREFIX'>, <TokenType.NOTHING: 'NOTHING'>, <TokenType.DATERANGE: 'DATERANGE'>, <TokenType.CUBE: 'CUBE'>, <TokenType.USMALLINT: 'USMALLINT'>, <TokenType.ISNULL: 'ISNULL'>, <TokenType.DECIMAL256: 'DECIMAL256'>, <TokenType.DECIMAL: 'DECIMAL'>, <TokenType.TIMESTAMP: 'TIMESTAMP'>, <TokenType.REFERENCES: 'REFERENCES'>, <TokenType.DICTIONARY: 'DICTIONARY'>, <TokenType.UINT256: 'UINT256'>, <TokenType.SMALLSERIAL: 'SMALLSERIAL'>, <TokenType.LOAD: 'LOAD'>, <TokenType.ANY: 'ANY'>, <TokenType.ENUM16: 'ENUM16'>, <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, <TokenType.BPCHAR: 'BPCHAR'>, <TokenType.DYNAMIC: 'DYNAMIC'>, <TokenType.COLUMN: 'COLUMN'>, <TokenType.ROW: 'ROW'>, <TokenType.ATTACH: 'ATTACH'>, <TokenType.ROWS: 'ROWS'>, <TokenType.TINYTEXT: 'TINYTEXT'>, <TokenType.VAR: 'VAR'>, <TokenType.IPADDRESS: 'IPADDRESS'>, <TokenType.IMAGE: 'IMAGE'>, <TokenType.REFRESH: 'REFRESH'>, <TokenType.USERDEFINED: 'USERDEFINED'>, <TokenType.ENUM8: 'ENUM8'>, <TokenType.MONEY: 'MONEY'>, <TokenType.OPERATOR: 'OPERATOR'>, <TokenType.UBIGINT: 'UBIGINT'>, <TokenType.BIGINT: 'BIGINT'>, <TokenType.TIME: 'TIME'>, <TokenType.DATETIME64: 'DATETIME64'>, <TokenType.DATE32: 'DATE32'>, <TokenType.VOID: 'VOID'>, <TokenType.UDOUBLE: 'UDOUBLE'>, <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, <TokenType.ROWVERSION: 'ROWVERSION'>, <TokenType.COMMIT: 'COMMIT'>, <TokenType.SOURCE: 'SOURCE'>, <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, <TokenType.END: 'END'>, <TokenType.CONSTRAINT: 'CONSTRAINT'>, <TokenType.NULLABLE: 'NULLABLE'>, <TokenType.UMEDIUMINT: 'UMEDIUMINT'>, <TokenType.TRUNCATE: 'TRUNCATE'>, <TokenType.MODEL: 'MODEL'>, <TokenType.DESCRIBE: 'DESCRIBE'>, <TokenType.OVERLAPS: 'OVERLAPS'>, <TokenType.USE: 'USE'>, <TokenType.TSRANGE: 'TSRANGE'>, <TokenType.SEQUENCE: 'SEQUENCE'>, <TokenType.SMALLDATETIME: 'SMALLDATETIME'>, <TokenType.DELETE: 'DELETE'>, <TokenType.EXISTS: 'EXISTS'>, <TokenType.RING: 'RING'>, <TokenType.XML: 'XML'>, <TokenType.IPV4: 'IPV4'>, <TokenType.WAREHOUSE: 'WAREHOUSE'>, <TokenType.STREAMLIT: 'STREAMLIT'>, <TokenType.LIMIT: 'LIMIT'>, <TokenType.STRAIGHT_JOIN: 'STRAIGHT_JOIN'>, <TokenType.STAGE: 'STAGE'>, <TokenType.SUPER: 'SUPER'>, <TokenType.EXPORT: 'EXPORT'>, <TokenType.DATETIME: 'DATETIME'>, <TokenType.RANGE: 'RANGE'>, <TokenType.INDEX: 'INDEX'>, <TokenType.DATABASE: 'DATABASE'>, <TokenType.CURRENT_DATETIME: 'CURRENT_DATETIME'>, <TokenType.BIGSERIAL: 'BIGSERIAL'>, <TokenType.ESCAPE: 'ESCAPE'>, <TokenType.VARIANT: 'VARIANT'>, <TokenType.SET: 'SET'>, <TokenType.LIST: 'LIST'>, <TokenType.CASE: 'CASE'>, <TokenType.COLLATE: 'COLLATE'>, <TokenType.INT8RANGE: 'INT8RANGE'>, <TokenType.EXECUTE: 'EXECUTE'>, <TokenType.PARTITION: 'PARTITION'>, <TokenType.AGGREGATEFUNCTION: 'AGGREGATEFUNCTION'>, <TokenType.UUID: 'UUID'>, <TokenType.NEXT: 'NEXT'>, <TokenType.FUNCTION: 'FUNCTION'>, <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, <TokenType.NVARCHAR: 'NVARCHAR'>, <TokenType.UINT: 'UINT'>, <TokenType.NAME: 'NAME'>, <TokenType.ROLLUP: 'ROLLUP'>, <TokenType.VARBINARY: 'VARBINARY'>, <TokenType.OFFSET: 'OFFSET'>, <TokenType.UNKNOWN: 'UNKNOWN'>, <TokenType.NAMESPACE: 'NAMESPACE'>, <TokenType.TIMETZ: 'TIMETZ'>, <TokenType.STRUCT: 'STRUCT'>, <TokenType.REPLACE: 'REPLACE'>, <TokenType.BINARY: 'BINARY'>, <TokenType.TEMPORARY: 'TEMPORARY'>, <TokenType.BOOLEAN: 'BOOLEAN'>, <TokenType.SMALLMONEY: 'SMALLMONEY'>, <TokenType.RECURSIVE: 'RECURSIVE'>, <TokenType.INT: 'INT'>, <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, <TokenType.NUMRANGE: 'NUMRANGE'>, <TokenType.CURRENT_SCHEMA: 'CURRENT_SCHEMA'>, <TokenType.LONGTEXT: 'LONGTEXT'>, <TokenType.IPV6: 'IPV6'>, <TokenType.CHAR: 'CHAR'>, <TokenType.KEEP: 'KEEP'>, <TokenType.MULTIPOLYGON: 'MULTIPOLYGON'>, <TokenType.JSONB: 'JSONB'>, <TokenType.PROCEDURE: 'PROCEDURE'>, <TokenType.GEOMETRY: 'GEOMETRY'>, <TokenType.FILE_FORMAT: 'FILE_FORMAT'>, <TokenType.BEGIN: 'BEGIN'>, <TokenType.TIMESTAMP_NS: 'TIMESTAMP_NS'>, <TokenType.OBJECT: 'OBJECT'>, <TokenType.ORDINALITY: 'ORDINALITY'>, <TokenType.VIEW: 'VIEW'>, <TokenType.NCHAR: 'NCHAR'>}
SHOW_TRIE: Dict = {}
SET_TRIE: Dict = {'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
sqlglot.parser.Parser
FUNCTIONS
NO_PAREN_FUNCTIONS
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
ALTERABLES
ALIAS_TOKENS
COLON_PLACEHOLDER_TOKENS
ARRAY_CONSTRUCTORS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
FUNC_TOKENS
CONJUNCTION
ASSIGNMENT
DISJUNCTION
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
LAMBDAS
COLUMN_OPERATORS
EXPRESSION_PARSERS
STATEMENT_PARSERS
UNARY_PARSERS
STRING_PARSERS
NUMERIC_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
PIPE_SYNTAX_TRANSFORM_PARSERS
PROPERTY_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
ALTER_ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
NO_PAREN_FUNCTION_PARSERS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
KEY_VALUE_DEFINITIONS
FUNCTION_PARSERS
QUERY_MODIFIER_PARSERS
QUERY_MODIFIER_TOKENS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
TYPE_CONVERTERS
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
CONFLICT_ACTIONS
CREATE_SEQUENCE
ISOLATED_LOADING_OPTIONS
USABLES
CAST_ACTIONS
SCHEMA_BINDING_OPTIONS
PROCEDURE_OPTIONS
EXECUTE_AS_OPTIONS
KEY_CONSTRAINT_OPTIONS
WINDOW_EXCLUDE_OPTIONS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_PREFIX
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
VIEW_ATTRIBUTES
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
NULL_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
SELECT_START_TOKENS
COPY_INTO_VARLEN_OPTIONS
IS_JSON_PREDICATE_KIND
ODBC_DATETIME_LITERALS
ON_CONDITION_TOKENS
PRIVILEGE_FOLLOW_TOKENS
DESCRIBE_STYLES
ANALYZE_STYLES
ANALYZE_EXPRESSION_PARSERS
PARTITION_KEYWORDS
AMBIGUOUS_ALIAS_TOKENS
OPERATION_MODIFIERS
RECURSIVE_CTE_SEARCH_KIND
MODIFIABLES
STRICT_CAST
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
LOG_DEFAULTS_TO_LN
TABLESAMPLE_CSV
DEFAULT_SAMPLING_METHOD
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_SET_OP
SET_OP_MODIFIERS
NO_PAREN_IF_COMMANDS
JSON_ARROWS_REQUIRE_JSON_TYPE
COLON_IS_VARIANT_EXTRACT
VALUES_FOLLOWED_BY_PAREN
SUPPORTS_IMPLICIT_UNNEST
INTERVAL_SPANS
SUPPORTS_PARTITION_SELECTION
WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
OPTIONAL_ALIAS_TOKEN_CTE
ALTER_RENAME_REQUIRES_COLUMN
JOINS_HAVE_EQUAL_PRECEDENCE
ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
error_level
error_message_context
max_errors
dialect
reset
check_errors
raise_error
expression
validate_expression
parse_set_operation
errors
sql
class Athena.Generator(sqlglot.generator.Generator):
142    class Generator(generator.Generator):
143        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
144            hive = kwargs.pop("hive", None) or Hive()
145            trino = kwargs.pop("trino", None) or Trino()
146
147            super().__init__(*args, **kwargs)
148
149            self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
150            self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
151
152        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
153            if _generate_as_hive(expression):
154                generator = self._hive_generator
155            else:
156                generator = self._trino_generator
157
158            return generator.generate(expression, copy=copy)

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether to normalize identifiers to lowercase. Default: False.
  • pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
  • indent: The indentation size in a formatted string. For example, this affects the indentation of subqueries and filters under a WHERE clause. Default: 2.
  • normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether to preserve comments in the output SQL code. Default: True
Athena.Generator(*args: Any, **kwargs: Any)
143        def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
144            hive = kwargs.pop("hive", None) or Hive()
145            trino = kwargs.pop("trino", None) or Trino()
146
147            super().__init__(*args, **kwargs)
148
149            self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
150            self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
def generate( self, expression: sqlglot.expressions.Expression, copy: bool = True) -> str:
152        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
153            if _generate_as_hive(expression):
154                generator = self._hive_generator
155            else:
156                generator = self._trino_generator
157
158            return generator.generate(expression, copy=copy)

Generates the SQL string corresponding to the given syntax tree.

Arguments:
  • expression: The syntax tree.
  • copy: Whether to copy the expression. The generator performs mutations so it is safer to copy.
Returns:

The SQL string corresponding to expression.

SELECT_KINDS: Tuple[str, ...] = ()
SUPPORTS_DECODE_CASE = False
AFTER_HAVING_MODIFIER_TRANSFORMS = {'windows': <function Generator.<lambda>>, 'qualify': <function Generator.<lambda>>}
Inherited Members
sqlglot.generator.Generator
TRANSFORMS
NULL_ORDERING_SUPPORTED
IGNORE_NULLS_IN_FUNC
LOCKING_READS_SUPPORTED
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
WRAP_DERIVED_VALUES
CREATE_FUNCTION_RETURN_AS
MATCHED_BY_SOURCE
SINGLE_STRING_INTERVAL
INTERVAL_ALLOWS_PLURAL_FORM
LIMIT_FETCH
LIMIT_ONLY_LITERALS
RENAME_TABLE_WITH_DB
GROUPINGS_SEP
INDEX_ON
JOIN_HINTS
TABLE_HINTS
QUERY_HINTS
QUERY_HINT_SEP
IS_BOOL_ALLOWED
DUPLICATE_KEY_UPDATE_WITH_SET
LIMIT_IS_TOP
RETURNING_END
EXTRACT_ALLOWS_QUOTES
TZ_TO_WITH_TIME_ZONE
NVL2_SUPPORTED
VALUES_AS_TABLE
ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
UNNEST_WITH_ORDINALITY
AGGREGATE_FILTER_SUPPORTED
SEMI_ANTI_JOIN_WITH_SIDE
COMPUTED_COLUMN_WITH_TYPE
SUPPORTS_TABLE_COPY
TABLESAMPLE_REQUIRES_PARENS
TABLESAMPLE_SIZE_IS_ROWS
TABLESAMPLE_KEYWORDS
TABLESAMPLE_WITH_METHOD
TABLESAMPLE_SEED_KEYWORD
COLLATE_IS_FUNC
DATA_TYPE_SPECIFIERS_ALLOWED
ENSURE_BOOLS
CTE_RECURSIVE_KEYWORD_REQUIRED
SUPPORTS_SINGLE_ARG_CONCAT
LAST_DAY_SUPPORTS_DATE_PART
SUPPORTS_TABLE_ALIAS_COLUMNS
UNPIVOT_ALIASES_ARE_IDENTIFIERS
JSON_KEY_VALUE_PAIR_SEP
INSERT_OVERWRITE
SUPPORTS_SELECT_INTO
SUPPORTS_UNLOGGED_TABLES
SUPPORTS_CREATE_TABLE_LIKE
LIKE_PROPERTY_INSIDE_SCHEMA
MULTI_ARG_DISTINCT
JSON_TYPE_REQUIRED_FOR_EXTRACTION
JSON_PATH_BRACKETED_KEY_SUPPORTED
JSON_PATH_SINGLE_QUOTE_ESCAPE
SUPPORTED_JSON_PATH_PARTS
CAN_IMPLEMENT_ARRAY_ANY
SUPPORTS_TO_NUMBER
SUPPORTS_WINDOW_EXCLUDE
SET_OP_MODIFIERS
COPY_PARAMS_ARE_WRAPPED
COPY_PARAMS_EQ_REQUIRED
COPY_HAS_INTO_KEYWORD
TRY_SUPPORTED
SUPPORTS_UESCAPE
STAR_EXCEPT
HEX_FUNC
WITH_PROPERTIES_PREFIX
QUOTE_JSON_PATH
PAD_FILL_PATTERN_IS_REQUIRED
SUPPORTS_EXPLODING_PROJECTIONS
ARRAY_CONCAT_IS_VAR_LEN
SUPPORTS_CONVERT_TIMEZONE
SUPPORTS_MEDIAN
SUPPORTS_UNIX_SECONDS
ALTER_SET_WRAPPED
NORMALIZE_EXTRACT_DATE_PARTS
PARSE_JSON_NAME
ARRAY_SIZE_NAME
ALTER_SET_TYPE
ARRAY_SIZE_DIM_REQUIRED
SUPPORTS_BETWEEN_FLAGS
TYPE_MAPPING
TIME_PART_SINGULARS
TOKEN_MAPPING
STRUCT_DELIMITER
PARAMETER_TOKEN
NAMED_PLACEHOLDER_TOKEN
EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
PROPERTIES_LOCATION
RESERVED_KEYWORDS
WITH_SEPARATED_COMMENTS
EXCLUDE_COMMENTS
UNWRAPPED_INTERVAL_VALUES
PARAMETERIZABLE_TEXT_TYPES
EXPRESSIONS_WITHOUT_NESTED_CTES
RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
SENTINEL_LINE_BREAK
pretty
identify
normalize
pad
unsupported_level
max_unsupported
leading_comma
max_text_width
comments
dialect
normalize_functions
unsupported_messages
preprocess
unsupported
sep
seg
sanitize_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_parts
column_sql
columnposition_sql
columndef_sql
columnconstraint_sql
computedcolumnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
generatedasrowcolumnconstraint_sql
periodforsystemtimeconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
createable_sql
create_sql
sequenceproperties_sql
clone_sql
describe_sql
heredoc_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
bytestring_sql
unicodestring_sql
rawstring_sql
datatypeparam_sql
datatype_sql
directory_sql
delete_sql
drop_sql
set_operation
set_operations
fetch_sql
limitoptions_sql
filter_sql
hint_sql
indexparameters_sql
index_sql
identifier_sql
hex_sql
lowerhex_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_name
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
partitionboundspec_sql
partitionedofproperty_sql
lockingproperty_sql
withdataproperty_sql
withsystemversioningproperty_sql
insert_sql
introducer_sql
kill_sql
pseudotype_sql
objectidentifier_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
withtablehint_sql
indextablehint_sql
historicaldata_sql
table_parts
table_sql
tablefromrows_sql
tablesample_sql
pivot_sql
version_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
groupingsets_sql
rollup_sql
cube_sql
group_sql
having_sql
connect_sql
prior_sql
join_sql
lambda_sql
lateral_op
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
order_sql
withfill_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognizemeasure_sql
matchrecognize_sql
query_modifiers
options_modifier
for_modifiers
queryoption_sql
offset_limit_modifiers
after_limit_modifiers
select_sql
schema_sql
schema_columns_sql
star_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
unnest_sql
prewhere_sql
where_sql
window_sql
partition_by_sql
windowspec_sql
withingroup_sql
between_sql
bracket_offset_expressions
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
nextvaluefor_sql
extract_sql
trim_sql
convert_concat_args
concat_sql
concatws_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonpath_sql
json_path_part
formatjson_sql
formatphrase_sql
jsonobject_sql
jsonobjectagg_sql
jsonarray_sql
jsonarrayagg_sql
jsoncolumndef_sql
jsonschema_sql
jsontable_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
pivotalias_sql
aliases_sql
atindex_sql
attimezone_sql
fromtimezone_sql
add_sql
and_sql
or_sql
xor_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
alterindex_sql
alterdiststyle_sql
altersortkey_sql
alterrename_sql
renamecolumn_sql
alterset_sql
alter_sql
add_column_sql
droppartition_sql
addconstraint_sql
addpartition_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
havingmax_sql
intdiv_sql
dpipe_sql
div_sql
safedivide_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
propertyeq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
ilikeany_sql
is_sql
like_sql
likeany_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
slice_sql
sub_sql
trycast_sql
jsoncast_sql
try_sql
log_sql
use_sql
binary
ceil_floor
function_fallback_sql
func
format_args
too_wide
format_time
expressions
op_expressions
naked_property
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
whens_sql
merge_sql
tochar_sql
tonumber_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
duplicatekeyproperty_sql
uniquekeyproperty_sql
distributedbyproperty_sql
oncluster_sql
clusteredbyproperty_sql
anyvalue_sql
querytransform_sql
indexconstraintoption_sql
checkcolumnconstraint_sql
indexcolumnconstraint_sql
nvl2_sql
comprehension_sql
columnprefix_sql
opclass_sql
predict_sql
forin_sql
refresh_sql
toarray_sql
tsordstotime_sql
tsordstotimestamp_sql
tsordstodatetime_sql
tsordstodate_sql
unixdate_sql
lastday_sql
dateadd_sql
arrayany_sql
struct_sql
partitionrange_sql
truncatetable_sql
convert_sql
copyparameter_sql
credentials_sql
copy_sql
semicolon_sql
datadeletionproperty_sql
maskingpolicycolumnconstraint_sql
gapfill_sql
scope_resolution
scoperesolution_sql
parsejson_sql
rand_sql
changes_sql
pad_sql
summarize_sql
explodinggenerateseries_sql
arrayconcat_sql
converttimezone_sql
json_sql
jsonvalue_sql
conditionalinsert_sql
multitableinserts_sql
oncondition_sql
jsonextractquote_sql
jsonexists_sql
arrayagg_sql
apply_sql
grant_sql
grantprivilege_sql
grantprincipal_sql
columns_sql
overlay_sql
todouble_sql
string_sql
median_sql
overflowtruncatebehavior_sql
unixseconds_sql
arraysize_sql
attach_sql
detach_sql
attachoption_sql
featuresattime_sql
watermarkcolumnconstraint_sql
encodeproperty_sql
includeproperty_sql
xmlelement_sql
xmlkeyvalueoption_sql
partitionbyrangeproperty_sql
partitionbyrangepropertydynamic_sql
unpivotcolumns_sql
analyzesample_sql
analyzestatistics_sql
analyzehistogram_sql
analyzedelete_sql
analyzelistchainedrows_sql
analyzevalidate_sql
analyze_sql
xmltable_sql
xmlnamespace_sql
export_sql
declare_sql
declareitem_sql
recursivewithsearch_sql
parameterizedagg_sql
anonymousaggfunc_sql
combinedaggfunc_sql
combinedparameterizedagg_sql
show_sql
get_put_sql
translatecharacters_sql
decodecase_sql
semanticview_sql