1from __future__ import annotations
2
3import logging
4import typing as t
5from collections import defaultdict
6
7from sqlglot import exp
8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
9from sqlglot.helper import apply_index_offset, ensure_list, seq_get
10from sqlglot.time import format_time
11from sqlglot.tokens import Token, Tokenizer, TokenType
12from sqlglot.trie import TrieResult, in_trie, new_trie
13
14if t.TYPE_CHECKING:
15 from sqlglot._typing import E
16
17logger = logging.getLogger("sqlglot")
18
19
20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
21 if len(args) == 1 and args[0].is_star:
22 return exp.StarMap(this=args[0])
23
24 keys = []
25 values = []
26 for i in range(0, len(args), 2):
27 keys.append(args[i])
28 values.append(args[i + 1])
29
30 return exp.VarMap(
31 keys=exp.Array(expressions=keys),
32 values=exp.Array(expressions=values),
33 )
34
35
36def parse_like(args: t.List) -> exp.Escape | exp.Like:
37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
39
40
41def binary_range_parser(
42 expr_type: t.Type[exp.Expression],
43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
44 return lambda self, this: self._parse_escape(
45 self.expression(expr_type, this=this, expression=self._parse_bitwise())
46 )
47
48
49class _Parser(type):
50 def __new__(cls, clsname, bases, attrs):
51 klass = super().__new__(cls, clsname, bases, attrs)
52
53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS)
55
56 return klass
57
58
59class Parser(metaclass=_Parser):
60 """
61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
62
63 Args:
64 error_level: The desired error level.
65 Default: ErrorLevel.IMMEDIATE
66 error_message_context: Determines the amount of context to capture from a
67 query string when displaying the error message (in number of characters).
68 Default: 100
69 max_errors: Maximum number of error messages to include in a raised ParseError.
70 This is only relevant if error_level is ErrorLevel.RAISE.
71 Default: 3
72 """
73
74 FUNCTIONS: t.Dict[str, t.Callable] = {
75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
76 "DATE_TO_DATE_STR": lambda args: exp.Cast(
77 this=seq_get(args, 0),
78 to=exp.DataType(this=exp.DataType.Type.TEXT),
79 ),
80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
81 "LIKE": parse_like,
82 "TIME_TO_TIME_STR": lambda args: exp.Cast(
83 this=seq_get(args, 0),
84 to=exp.DataType(this=exp.DataType.Type.TEXT),
85 ),
86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
87 this=exp.Cast(
88 this=seq_get(args, 0),
89 to=exp.DataType(this=exp.DataType.Type.TEXT),
90 ),
91 start=exp.Literal.number(1),
92 length=exp.Literal.number(10),
93 ),
94 "VAR_MAP": parse_var_map,
95 }
96
97 NO_PAREN_FUNCTIONS = {
98 TokenType.CURRENT_DATE: exp.CurrentDate,
99 TokenType.CURRENT_DATETIME: exp.CurrentDate,
100 TokenType.CURRENT_TIME: exp.CurrentTime,
101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
102 TokenType.CURRENT_USER: exp.CurrentUser,
103 }
104
105 STRUCT_TYPE_TOKENS = {
106 TokenType.NESTED,
107 TokenType.STRUCT,
108 }
109
110 NESTED_TYPE_TOKENS = {
111 TokenType.ARRAY,
112 TokenType.LOWCARDINALITY,
113 TokenType.MAP,
114 TokenType.NULLABLE,
115 *STRUCT_TYPE_TOKENS,
116 }
117
118 ENUM_TYPE_TOKENS = {
119 TokenType.ENUM,
120 TokenType.ENUM8,
121 TokenType.ENUM16,
122 }
123
124 TYPE_TOKENS = {
125 TokenType.BIT,
126 TokenType.BOOLEAN,
127 TokenType.TINYINT,
128 TokenType.UTINYINT,
129 TokenType.SMALLINT,
130 TokenType.USMALLINT,
131 TokenType.INT,
132 TokenType.UINT,
133 TokenType.BIGINT,
134 TokenType.UBIGINT,
135 TokenType.INT128,
136 TokenType.UINT128,
137 TokenType.INT256,
138 TokenType.UINT256,
139 TokenType.MEDIUMINT,
140 TokenType.UMEDIUMINT,
141 TokenType.FIXEDSTRING,
142 TokenType.FLOAT,
143 TokenType.DOUBLE,
144 TokenType.CHAR,
145 TokenType.NCHAR,
146 TokenType.VARCHAR,
147 TokenType.NVARCHAR,
148 TokenType.TEXT,
149 TokenType.MEDIUMTEXT,
150 TokenType.LONGTEXT,
151 TokenType.MEDIUMBLOB,
152 TokenType.LONGBLOB,
153 TokenType.BINARY,
154 TokenType.VARBINARY,
155 TokenType.JSON,
156 TokenType.JSONB,
157 TokenType.INTERVAL,
158 TokenType.TINYBLOB,
159 TokenType.TINYTEXT,
160 TokenType.TIME,
161 TokenType.TIMETZ,
162 TokenType.TIMESTAMP,
163 TokenType.TIMESTAMP_S,
164 TokenType.TIMESTAMP_MS,
165 TokenType.TIMESTAMP_NS,
166 TokenType.TIMESTAMPTZ,
167 TokenType.TIMESTAMPLTZ,
168 TokenType.DATETIME,
169 TokenType.DATETIME64,
170 TokenType.DATE,
171 TokenType.INT4RANGE,
172 TokenType.INT4MULTIRANGE,
173 TokenType.INT8RANGE,
174 TokenType.INT8MULTIRANGE,
175 TokenType.NUMRANGE,
176 TokenType.NUMMULTIRANGE,
177 TokenType.TSRANGE,
178 TokenType.TSMULTIRANGE,
179 TokenType.TSTZRANGE,
180 TokenType.TSTZMULTIRANGE,
181 TokenType.DATERANGE,
182 TokenType.DATEMULTIRANGE,
183 TokenType.DECIMAL,
184 TokenType.UDECIMAL,
185 TokenType.BIGDECIMAL,
186 TokenType.UUID,
187 TokenType.GEOGRAPHY,
188 TokenType.GEOMETRY,
189 TokenType.HLLSKETCH,
190 TokenType.HSTORE,
191 TokenType.PSEUDO_TYPE,
192 TokenType.SUPER,
193 TokenType.SERIAL,
194 TokenType.SMALLSERIAL,
195 TokenType.BIGSERIAL,
196 TokenType.XML,
197 TokenType.YEAR,
198 TokenType.UNIQUEIDENTIFIER,
199 TokenType.USERDEFINED,
200 TokenType.MONEY,
201 TokenType.SMALLMONEY,
202 TokenType.ROWVERSION,
203 TokenType.IMAGE,
204 TokenType.VARIANT,
205 TokenType.OBJECT,
206 TokenType.OBJECT_IDENTIFIER,
207 TokenType.INET,
208 TokenType.IPADDRESS,
209 TokenType.IPPREFIX,
210 TokenType.UNKNOWN,
211 TokenType.NULL,
212 *ENUM_TYPE_TOKENS,
213 *NESTED_TYPE_TOKENS,
214 }
215
216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = {
217 TokenType.BIGINT: TokenType.UBIGINT,
218 TokenType.INT: TokenType.UINT,
219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
220 TokenType.SMALLINT: TokenType.USMALLINT,
221 TokenType.TINYINT: TokenType.UTINYINT,
222 TokenType.DECIMAL: TokenType.UDECIMAL,
223 }
224
225 SUBQUERY_PREDICATES = {
226 TokenType.ANY: exp.Any,
227 TokenType.ALL: exp.All,
228 TokenType.EXISTS: exp.Exists,
229 TokenType.SOME: exp.Any,
230 }
231
232 RESERVED_KEYWORDS = {
233 *Tokenizer.SINGLE_TOKENS.values(),
234 TokenType.SELECT,
235 }
236
237 DB_CREATABLES = {
238 TokenType.DATABASE,
239 TokenType.SCHEMA,
240 TokenType.TABLE,
241 TokenType.VIEW,
242 TokenType.MODEL,
243 TokenType.DICTIONARY,
244 }
245
246 CREATABLES = {
247 TokenType.COLUMN,
248 TokenType.CONSTRAINT,
249 TokenType.FUNCTION,
250 TokenType.INDEX,
251 TokenType.PROCEDURE,
252 TokenType.FOREIGN_KEY,
253 *DB_CREATABLES,
254 }
255
256 # Tokens that can represent identifiers
257 ID_VAR_TOKENS = {
258 TokenType.VAR,
259 TokenType.ANTI,
260 TokenType.APPLY,
261 TokenType.ASC,
262 TokenType.AUTO_INCREMENT,
263 TokenType.BEGIN,
264 TokenType.CACHE,
265 TokenType.CASE,
266 TokenType.COLLATE,
267 TokenType.COMMAND,
268 TokenType.COMMENT,
269 TokenType.COMMIT,
270 TokenType.CONSTRAINT,
271 TokenType.DEFAULT,
272 TokenType.DELETE,
273 TokenType.DESC,
274 TokenType.DESCRIBE,
275 TokenType.DICTIONARY,
276 TokenType.DIV,
277 TokenType.END,
278 TokenType.EXECUTE,
279 TokenType.ESCAPE,
280 TokenType.FALSE,
281 TokenType.FIRST,
282 TokenType.FILTER,
283 TokenType.FORMAT,
284 TokenType.FULL,
285 TokenType.IS,
286 TokenType.ISNULL,
287 TokenType.INTERVAL,
288 TokenType.KEEP,
289 TokenType.KILL,
290 TokenType.LEFT,
291 TokenType.LOAD,
292 TokenType.MERGE,
293 TokenType.NATURAL,
294 TokenType.NEXT,
295 TokenType.OFFSET,
296 TokenType.OPERATOR,
297 TokenType.ORDINALITY,
298 TokenType.OVERLAPS,
299 TokenType.OVERWRITE,
300 TokenType.PARTITION,
301 TokenType.PERCENT,
302 TokenType.PIVOT,
303 TokenType.PRAGMA,
304 TokenType.RANGE,
305 TokenType.RECURSIVE,
306 TokenType.REFERENCES,
307 TokenType.REFRESH,
308 TokenType.RIGHT,
309 TokenType.ROW,
310 TokenType.ROWS,
311 TokenType.SEMI,
312 TokenType.SET,
313 TokenType.SETTINGS,
314 TokenType.SHOW,
315 TokenType.TEMPORARY,
316 TokenType.TOP,
317 TokenType.TRUE,
318 TokenType.UNIQUE,
319 TokenType.UNPIVOT,
320 TokenType.UPDATE,
321 TokenType.USE,
322 TokenType.VOLATILE,
323 TokenType.WINDOW,
324 *CREATABLES,
325 *SUBQUERY_PREDICATES,
326 *TYPE_TOKENS,
327 *NO_PAREN_FUNCTIONS,
328 }
329
330 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
331
332 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
333 TokenType.ANTI,
334 TokenType.APPLY,
335 TokenType.ASOF,
336 TokenType.FULL,
337 TokenType.LEFT,
338 TokenType.LOCK,
339 TokenType.NATURAL,
340 TokenType.OFFSET,
341 TokenType.RIGHT,
342 TokenType.SEMI,
343 TokenType.WINDOW,
344 }
345
346 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
347
348 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
349
350 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
351
352 FUNC_TOKENS = {
353 TokenType.COLLATE,
354 TokenType.COMMAND,
355 TokenType.CURRENT_DATE,
356 TokenType.CURRENT_DATETIME,
357 TokenType.CURRENT_TIMESTAMP,
358 TokenType.CURRENT_TIME,
359 TokenType.CURRENT_USER,
360 TokenType.FILTER,
361 TokenType.FIRST,
362 TokenType.FORMAT,
363 TokenType.GLOB,
364 TokenType.IDENTIFIER,
365 TokenType.INDEX,
366 TokenType.ISNULL,
367 TokenType.ILIKE,
368 TokenType.INSERT,
369 TokenType.LIKE,
370 TokenType.MERGE,
371 TokenType.OFFSET,
372 TokenType.PRIMARY_KEY,
373 TokenType.RANGE,
374 TokenType.REPLACE,
375 TokenType.RLIKE,
376 TokenType.ROW,
377 TokenType.UNNEST,
378 TokenType.VAR,
379 TokenType.LEFT,
380 TokenType.RIGHT,
381 TokenType.DATE,
382 TokenType.DATETIME,
383 TokenType.TABLE,
384 TokenType.TIMESTAMP,
385 TokenType.TIMESTAMPTZ,
386 TokenType.WINDOW,
387 TokenType.XOR,
388 *TYPE_TOKENS,
389 *SUBQUERY_PREDICATES,
390 }
391
392 CONJUNCTION = {
393 TokenType.AND: exp.And,
394 TokenType.OR: exp.Or,
395 }
396
397 EQUALITY = {
398 TokenType.COLON_EQ: exp.PropertyEQ,
399 TokenType.EQ: exp.EQ,
400 TokenType.NEQ: exp.NEQ,
401 TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
402 }
403
404 COMPARISON = {
405 TokenType.GT: exp.GT,
406 TokenType.GTE: exp.GTE,
407 TokenType.LT: exp.LT,
408 TokenType.LTE: exp.LTE,
409 }
410
411 BITWISE = {
412 TokenType.AMP: exp.BitwiseAnd,
413 TokenType.CARET: exp.BitwiseXor,
414 TokenType.PIPE: exp.BitwiseOr,
415 TokenType.DPIPE: exp.DPipe,
416 }
417
418 TERM = {
419 TokenType.DASH: exp.Sub,
420 TokenType.PLUS: exp.Add,
421 TokenType.MOD: exp.Mod,
422 TokenType.COLLATE: exp.Collate,
423 }
424
425 FACTOR = {
426 TokenType.DIV: exp.IntDiv,
427 TokenType.LR_ARROW: exp.Distance,
428 TokenType.SLASH: exp.Div,
429 TokenType.STAR: exp.Mul,
430 }
431
432 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {}
433
434 TIMES = {
435 TokenType.TIME,
436 TokenType.TIMETZ,
437 }
438
439 TIMESTAMPS = {
440 TokenType.TIMESTAMP,
441 TokenType.TIMESTAMPTZ,
442 TokenType.TIMESTAMPLTZ,
443 *TIMES,
444 }
445
446 SET_OPERATIONS = {
447 TokenType.UNION,
448 TokenType.INTERSECT,
449 TokenType.EXCEPT,
450 }
451
452 JOIN_METHODS = {
453 TokenType.NATURAL,
454 TokenType.ASOF,
455 }
456
457 JOIN_SIDES = {
458 TokenType.LEFT,
459 TokenType.RIGHT,
460 TokenType.FULL,
461 }
462
463 JOIN_KINDS = {
464 TokenType.INNER,
465 TokenType.OUTER,
466 TokenType.CROSS,
467 TokenType.SEMI,
468 TokenType.ANTI,
469 }
470
471 JOIN_HINTS: t.Set[str] = set()
472
473 LAMBDAS = {
474 TokenType.ARROW: lambda self, expressions: self.expression(
475 exp.Lambda,
476 this=self._replace_lambda(
477 self._parse_conjunction(),
478 {node.name for node in expressions},
479 ),
480 expressions=expressions,
481 ),
482 TokenType.FARROW: lambda self, expressions: self.expression(
483 exp.Kwarg,
484 this=exp.var(expressions[0].name),
485 expression=self._parse_conjunction(),
486 ),
487 }
488
489 COLUMN_OPERATORS = {
490 TokenType.DOT: None,
491 TokenType.DCOLON: lambda self, this, to: self.expression(
492 exp.Cast if self.STRICT_CAST else exp.TryCast,
493 this=this,
494 to=to,
495 ),
496 TokenType.ARROW: lambda self, this, path: self.expression(
497 exp.JSONExtract,
498 this=this,
499 expression=path,
500 ),
501 TokenType.DARROW: lambda self, this, path: self.expression(
502 exp.JSONExtractScalar,
503 this=this,
504 expression=path,
505 ),
506 TokenType.HASH_ARROW: lambda self, this, path: self.expression(
507 exp.JSONBExtract,
508 this=this,
509 expression=path,
510 ),
511 TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
512 exp.JSONBExtractScalar,
513 this=this,
514 expression=path,
515 ),
516 TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
517 exp.JSONBContains,
518 this=this,
519 expression=key,
520 ),
521 }
522
523 EXPRESSION_PARSERS = {
524 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
525 exp.Column: lambda self: self._parse_column(),
526 exp.Condition: lambda self: self._parse_conjunction(),
527 exp.DataType: lambda self: self._parse_types(allow_identifiers=False),
528 exp.Expression: lambda self: self._parse_statement(),
529 exp.From: lambda self: self._parse_from(),
530 exp.Group: lambda self: self._parse_group(),
531 exp.Having: lambda self: self._parse_having(),
532 exp.Identifier: lambda self: self._parse_id_var(),
533 exp.Join: lambda self: self._parse_join(),
534 exp.Lambda: lambda self: self._parse_lambda(),
535 exp.Lateral: lambda self: self._parse_lateral(),
536 exp.Limit: lambda self: self._parse_limit(),
537 exp.Offset: lambda self: self._parse_offset(),
538 exp.Order: lambda self: self._parse_order(),
539 exp.Ordered: lambda self: self._parse_ordered(),
540 exp.Properties: lambda self: self._parse_properties(),
541 exp.Qualify: lambda self: self._parse_qualify(),
542 exp.Returning: lambda self: self._parse_returning(),
543 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
544 exp.Table: lambda self: self._parse_table_parts(),
545 exp.TableAlias: lambda self: self._parse_table_alias(),
546 exp.Where: lambda self: self._parse_where(),
547 exp.Window: lambda self: self._parse_named_window(),
548 exp.With: lambda self: self._parse_with(),
549 "JOIN_TYPE": lambda self: self._parse_join_parts(),
550 }
551
552 STATEMENT_PARSERS = {
553 TokenType.ALTER: lambda self: self._parse_alter(),
554 TokenType.BEGIN: lambda self: self._parse_transaction(),
555 TokenType.CACHE: lambda self: self._parse_cache(),
556 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
557 TokenType.COMMENT: lambda self: self._parse_comment(),
558 TokenType.CREATE: lambda self: self._parse_create(),
559 TokenType.DELETE: lambda self: self._parse_delete(),
560 TokenType.DESC: lambda self: self._parse_describe(),
561 TokenType.DESCRIBE: lambda self: self._parse_describe(),
562 TokenType.DROP: lambda self: self._parse_drop(),
563 TokenType.INSERT: lambda self: self._parse_insert(),
564 TokenType.KILL: lambda self: self._parse_kill(),
565 TokenType.LOAD: lambda self: self._parse_load(),
566 TokenType.MERGE: lambda self: self._parse_merge(),
567 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
568 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
569 TokenType.REFRESH: lambda self: self._parse_refresh(),
570 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
571 TokenType.SET: lambda self: self._parse_set(),
572 TokenType.UNCACHE: lambda self: self._parse_uncache(),
573 TokenType.UPDATE: lambda self: self._parse_update(),
574 TokenType.USE: lambda self: self.expression(
575 exp.Use,
576 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
577 and exp.var(self._prev.text),
578 this=self._parse_table(schema=False),
579 ),
580 }
581
582 UNARY_PARSERS = {
583 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op
584 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
585 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
586 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
587 }
588
589 PRIMARY_PARSERS = {
590 TokenType.STRING: lambda self, token: self.expression(
591 exp.Literal, this=token.text, is_string=True
592 ),
593 TokenType.NUMBER: lambda self, token: self.expression(
594 exp.Literal, this=token.text, is_string=False
595 ),
596 TokenType.STAR: lambda self, _: self.expression(
597 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()}
598 ),
599 TokenType.NULL: lambda self, _: self.expression(exp.Null),
600 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
601 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
602 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
603 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
604 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
605 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
606 TokenType.NATIONAL_STRING: lambda self, token: self.expression(
607 exp.National, this=token.text
608 ),
609 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
610 TokenType.HEREDOC_STRING: lambda self, token: self.expression(
611 exp.RawString, this=token.text
612 ),
613 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
614 }
615
616 PLACEHOLDER_PARSERS = {
617 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
618 TokenType.PARAMETER: lambda self: self._parse_parameter(),
619 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
620 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
621 else None,
622 }
623
624 RANGE_PARSERS = {
625 TokenType.BETWEEN: lambda self, this: self._parse_between(this),
626 TokenType.GLOB: binary_range_parser(exp.Glob),
627 TokenType.ILIKE: binary_range_parser(exp.ILike),
628 TokenType.IN: lambda self, this: self._parse_in(this),
629 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
630 TokenType.IS: lambda self, this: self._parse_is(this),
631 TokenType.LIKE: binary_range_parser(exp.Like),
632 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
633 TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
634 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
635 TokenType.FOR: lambda self, this: self._parse_comprehension(this),
636 }
637
638 PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
639 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
640 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
641 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
642 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
643 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
644 "CHECKSUM": lambda self: self._parse_checksum(),
645 "CLUSTER BY": lambda self: self._parse_cluster(),
646 "CLUSTERED": lambda self: self._parse_clustered_by(),
647 "COLLATE": lambda self, **kwargs: self._parse_property_assignment(
648 exp.CollateProperty, **kwargs
649 ),
650 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
651 "COPY": lambda self: self._parse_copy_property(),
652 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
653 "DEFINER": lambda self: self._parse_definer(),
654 "DETERMINISTIC": lambda self: self.expression(
655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
656 ),
657 "DISTKEY": lambda self: self._parse_distkey(),
658 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
659 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
660 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
661 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
662 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
663 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
664 "FREESPACE": lambda self: self._parse_freespace(),
665 "HEAP": lambda self: self.expression(exp.HeapProperty),
666 "IMMUTABLE": lambda self: self.expression(
667 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
668 ),
669 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()),
670 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
671 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
672 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
673 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
674 "LIKE": lambda self: self._parse_create_like(),
675 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
676 "LOCK": lambda self: self._parse_locking(),
677 "LOCKING": lambda self: self._parse_locking(),
678 "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
679 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
680 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
681 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
682 "NO": lambda self: self._parse_no_property(),
683 "ON": lambda self: self._parse_on_property(),
684 "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
685 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()),
686 "PARTITION": lambda self: self._parse_partitioned_of(),
687 "PARTITION BY": lambda self: self._parse_partitioned_by(),
688 "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
689 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
690 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
691 "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
692 "REMOTE": lambda self: self._parse_remote_with_connection(),
693 "RETURNS": lambda self: self._parse_returns(),
694 "ROW": lambda self: self._parse_row(),
695 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
696 "SAMPLE": lambda self: self.expression(
697 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise()
698 ),
699 "SET": lambda self: self.expression(exp.SetProperty, multi=False),
700 "SETTINGS": lambda self: self.expression(
701 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
702 ),
703 "SORTKEY": lambda self: self._parse_sortkey(),
704 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
705 "STABLE": lambda self: self.expression(
706 exp.StabilityProperty, this=exp.Literal.string("STABLE")
707 ),
708 "STORED": lambda self: self._parse_stored(),
709 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
710 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
711 "TEMP": lambda self: self.expression(exp.TemporaryProperty),
712 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
713 "TO": lambda self: self._parse_to_table(),
714 "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
715 "TRANSFORM": lambda self: self.expression(
716 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression)
717 ),
718 "TTL": lambda self: self._parse_ttl(),
719 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
720 "VOLATILE": lambda self: self._parse_volatile_property(),
721 "WITH": lambda self: self._parse_with_property(),
722 }
723
724 CONSTRAINT_PARSERS = {
725 "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
726 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
727 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
728 "CHARACTER SET": lambda self: self.expression(
729 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
730 ),
731 "CHECK": lambda self: self.expression(
732 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
733 ),
734 "COLLATE": lambda self: self.expression(
735 exp.CollateColumnConstraint, this=self._parse_var()
736 ),
737 "COMMENT": lambda self: self.expression(
738 exp.CommentColumnConstraint, this=self._parse_string()
739 ),
740 "COMPRESS": lambda self: self._parse_compress(),
741 "CLUSTERED": lambda self: self.expression(
742 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
743 ),
744 "NONCLUSTERED": lambda self: self.expression(
745 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
746 ),
747 "DEFAULT": lambda self: self.expression(
748 exp.DefaultColumnConstraint, this=self._parse_bitwise()
749 ),
750 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
751 "FOREIGN KEY": lambda self: self._parse_foreign_key(),
752 "FORMAT": lambda self: self.expression(
753 exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
754 ),
755 "GENERATED": lambda self: self._parse_generated_as_identity(),
756 "IDENTITY": lambda self: self._parse_auto_increment(),
757 "INLINE": lambda self: self._parse_inline(),
758 "LIKE": lambda self: self._parse_create_like(),
759 "NOT": lambda self: self._parse_not_constraint(),
760 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
761 "ON": lambda self: (
762 self._match(TokenType.UPDATE)
763 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function())
764 )
765 or self.expression(exp.OnProperty, this=self._parse_id_var()),
766 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
767 "PERIOD": lambda self: self._parse_period_for_system_time(),
768 "PRIMARY KEY": lambda self: self._parse_primary_key(),
769 "REFERENCES": lambda self: self._parse_references(match=False),
770 "TITLE": lambda self: self.expression(
771 exp.TitleColumnConstraint, this=self._parse_var_or_string()
772 ),
773 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
774 "UNIQUE": lambda self: self._parse_unique(),
775 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
776 "WITH": lambda self: self.expression(
777 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property)
778 ),
779 }
780
781 ALTER_PARSERS = {
782 "ADD": lambda self: self._parse_alter_table_add(),
783 "ALTER": lambda self: self._parse_alter_table_alter(),
784 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
785 "DROP": lambda self: self._parse_alter_table_drop(),
786 "RENAME": lambda self: self._parse_alter_table_rename(),
787 }
788
789 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"}
790
791 NO_PAREN_FUNCTION_PARSERS = {
792 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
793 "CASE": lambda self: self._parse_case(),
794 "IF": lambda self: self._parse_if(),
795 "NEXT": lambda self: self._parse_next_value_for(),
796 }
797
798 INVALID_FUNC_NAME_TOKENS = {
799 TokenType.IDENTIFIER,
800 TokenType.STRING,
801 }
802
803 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
804
805 FUNCTION_PARSERS = {
806 "ANY_VALUE": lambda self: self._parse_any_value(),
807 "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
808 "CONCAT": lambda self: self._parse_concat(),
809 "CONCAT_WS": lambda self: self._parse_concat_ws(),
810 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
811 "DECODE": lambda self: self._parse_decode(),
812 "EXTRACT": lambda self: self._parse_extract(),
813 "JSON_OBJECT": lambda self: self._parse_json_object(),
814 "JSON_TABLE": lambda self: self._parse_json_table(),
815 "LOG": lambda self: self._parse_logarithm(),
816 "MATCH": lambda self: self._parse_match_against(),
817 "OPENJSON": lambda self: self._parse_open_json(),
818 "POSITION": lambda self: self._parse_position(),
819 "PREDICT": lambda self: self._parse_predict(),
820 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True),
821 "STRING_AGG": lambda self: self._parse_string_agg(),
822 "SUBSTRING": lambda self: self._parse_substring(),
823 "TRIM": lambda self: self._parse_trim(),
824 "TRY_CAST": lambda self: self._parse_cast(False, safe=True),
825 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True),
826 }
827
828 QUERY_MODIFIER_PARSERS = {
829 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()),
830 TokenType.WHERE: lambda self: ("where", self._parse_where()),
831 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()),
832 TokenType.HAVING: lambda self: ("having", self._parse_having()),
833 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()),
834 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()),
835 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()),
836 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()),
837 TokenType.FETCH: lambda self: ("limit", self._parse_limit()),
838 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()),
839 TokenType.FOR: lambda self: ("locks", self._parse_locks()),
840 TokenType.LOCK: lambda self: ("locks", self._parse_locks()),
841 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
842 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
843 TokenType.CLUSTER_BY: lambda self: (
844 "cluster",
845 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
846 ),
847 TokenType.DISTRIBUTE_BY: lambda self: (
848 "distribute",
849 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
850 ),
851 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
852 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
853 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
854 }
855
856 SET_PARSERS = {
857 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
858 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
859 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
860 "TRANSACTION": lambda self: self._parse_set_transaction(),
861 }
862
863 SHOW_PARSERS: t.Dict[str, t.Callable] = {}
864
865 TYPE_LITERAL_PARSERS = {
866 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this),
867 }
868
869 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
870
871 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
872
873 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
874
875 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
876 TRANSACTION_CHARACTERISTICS = {
877 "ISOLATION LEVEL REPEATABLE READ",
878 "ISOLATION LEVEL READ COMMITTED",
879 "ISOLATION LEVEL READ UNCOMMITTED",
880 "ISOLATION LEVEL SERIALIZABLE",
881 "READ WRITE",
882 "READ ONLY",
883 }
884
885 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
886
887 CLONE_KEYWORDS = {"CLONE", "COPY"}
888 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
889
890 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"}
891 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
892
893 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
894
895 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
896 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
897 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
898
899 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT}
900
901 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
902
903 DISTINCT_TOKENS = {TokenType.DISTINCT}
904
905 NULL_TOKENS = {TokenType.NULL}
906
907 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
908
909 STRICT_CAST = True
910
911 # A NULL arg in CONCAT yields NULL by default
912 CONCAT_NULL_OUTPUTS_STRING = False
913
914 PREFIXED_PIVOT_COLUMNS = False
915 IDENTIFY_PIVOT_STRINGS = False
916
917 LOG_BASE_FIRST = True
918 LOG_DEFAULTS_TO_LN = False
919
920 # Whether or not ADD is present for each column added by ALTER TABLE
921 ALTER_TABLE_ADD_COLUMN_KEYWORD = True
922
923 # Whether or not the table sample clause expects CSV syntax
924 TABLESAMPLE_CSV = False
925
926 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments
927 SET_REQUIRES_ASSIGNMENT_DELIMITER = True
928
929 # Whether the TRIM function expects the characters to trim as its first argument
930 TRIM_PATTERN_FIRST = False
931
932 # Whether the behavior of a / b depends on the types of a and b.
933 # False means a / b is always float division.
934 # True means a / b is integer division if both a and b are integers.
935 TYPED_DIVISION = False
936
937 # False means 1 / 0 throws an error.
938 # True means 1 / 0 returns null.
939 SAFE_DIVISION = False
940
941 __slots__ = (
942 "error_level",
943 "error_message_context",
944 "max_errors",
945 "sql",
946 "errors",
947 "_tokens",
948 "_index",
949 "_curr",
950 "_next",
951 "_prev",
952 "_prev_comments",
953 "_tokenizer",
954 )
955
956 # Autofilled
957 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer
958 INDEX_OFFSET: int = 0
959 UNNEST_COLUMN_ONLY: bool = False
960 ALIAS_POST_TABLESAMPLE: bool = False
961 STRICT_STRING_CONCAT = False
962 SUPPORTS_USER_DEFINED_TYPES = True
963 NORMALIZE_FUNCTIONS = "upper"
964 NULL_ORDERING: str = "nulls_are_small"
965 SHOW_TRIE: t.Dict = {}
966 SET_TRIE: t.Dict = {}
967 FORMAT_MAPPING: t.Dict[str, str] = {}
968 FORMAT_TRIE: t.Dict = {}
969 TIME_MAPPING: t.Dict[str, str] = {}
970 TIME_TRIE: t.Dict = {}
971
972 def __init__(
973 self,
974 error_level: t.Optional[ErrorLevel] = None,
975 error_message_context: int = 100,
976 max_errors: int = 3,
977 ):
978 self.error_level = error_level or ErrorLevel.IMMEDIATE
979 self.error_message_context = error_message_context
980 self.max_errors = max_errors
981 self._tokenizer = self.TOKENIZER_CLASS()
982 self.reset()
983
984 def reset(self):
985 self.sql = ""
986 self.errors = []
987 self._tokens = []
988 self._index = 0
989 self._curr = None
990 self._next = None
991 self._prev = None
992 self._prev_comments = None
993
994 def parse(
995 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
996 ) -> t.List[t.Optional[exp.Expression]]:
997 """
998 Parses a list of tokens and returns a list of syntax trees, one tree
999 per parsed SQL statement.
1000
1001 Args:
1002 raw_tokens: The list of tokens.
1003 sql: The original SQL string, used to produce helpful debug messages.
1004
1005 Returns:
1006 The list of the produced syntax trees.
1007 """
1008 return self._parse(
1009 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
1010 )
1011
1012 def parse_into(
1013 self,
1014 expression_types: exp.IntoType,
1015 raw_tokens: t.List[Token],
1016 sql: t.Optional[str] = None,
1017 ) -> t.List[t.Optional[exp.Expression]]:
1018 """
1019 Parses a list of tokens into a given Expression type. If a collection of Expression
1020 types is given instead, this method will try to parse the token list into each one
1021 of them, stopping at the first for which the parsing succeeds.
1022
1023 Args:
1024 expression_types: The expression type(s) to try and parse the token list into.
1025 raw_tokens: The list of tokens.
1026 sql: The original SQL string, used to produce helpful debug messages.
1027
1028 Returns:
1029 The target Expression.
1030 """
1031 errors = []
1032 for expression_type in ensure_list(expression_types):
1033 parser = self.EXPRESSION_PARSERS.get(expression_type)
1034 if not parser:
1035 raise TypeError(f"No parser registered for {expression_type}")
1036
1037 try:
1038 return self._parse(parser, raw_tokens, sql)
1039 except ParseError as e:
1040 e.errors[0]["into_expression"] = expression_type
1041 errors.append(e)
1042
1043 raise ParseError(
1044 f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
1045 errors=merge_errors(errors),
1046 ) from errors[-1]
1047
1048 def _parse(
1049 self,
1050 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
1051 raw_tokens: t.List[Token],
1052 sql: t.Optional[str] = None,
1053 ) -> t.List[t.Optional[exp.Expression]]:
1054 self.reset()
1055 self.sql = sql or ""
1056
1057 total = len(raw_tokens)
1058 chunks: t.List[t.List[Token]] = [[]]
1059
1060 for i, token in enumerate(raw_tokens):
1061 if token.token_type == TokenType.SEMICOLON:
1062 if i < total - 1:
1063 chunks.append([])
1064 else:
1065 chunks[-1].append(token)
1066
1067 expressions = []
1068
1069 for tokens in chunks:
1070 self._index = -1
1071 self._tokens = tokens
1072 self._advance()
1073
1074 expressions.append(parse_method(self))
1075
1076 if self._index < len(self._tokens):
1077 self.raise_error("Invalid expression / Unexpected token")
1078
1079 self.check_errors()
1080
1081 return expressions
1082
1083 def check_errors(self) -> None:
1084 """Logs or raises any found errors, depending on the chosen error level setting."""
1085 if self.error_level == ErrorLevel.WARN:
1086 for error in self.errors:
1087 logger.error(str(error))
1088 elif self.error_level == ErrorLevel.RAISE and self.errors:
1089 raise ParseError(
1090 concat_messages(self.errors, self.max_errors),
1091 errors=merge_errors(self.errors),
1092 )
1093
1094 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
1095 """
1096 Appends an error in the list of recorded errors or raises it, depending on the chosen
1097 error level setting.
1098 """
1099 token = token or self._curr or self._prev or Token.string("")
1100 start = token.start
1101 end = token.end + 1
1102 start_context = self.sql[max(start - self.error_message_context, 0) : start]
1103 highlight = self.sql[start:end]
1104 end_context = self.sql[end : end + self.error_message_context]
1105
1106 error = ParseError.new(
1107 f"{message}. Line {token.line}, Col: {token.col}.\n"
1108 f" {start_context}\033[4m{highlight}\033[0m{end_context}",
1109 description=message,
1110 line=token.line,
1111 col=token.col,
1112 start_context=start_context,
1113 highlight=highlight,
1114 end_context=end_context,
1115 )
1116
1117 if self.error_level == ErrorLevel.IMMEDIATE:
1118 raise error
1119
1120 self.errors.append(error)
1121
1122 def expression(
1123 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
1124 ) -> E:
1125 """
1126 Creates a new, validated Expression.
1127
1128 Args:
1129 exp_class: The expression class to instantiate.
1130 comments: An optional list of comments to attach to the expression.
1131 kwargs: The arguments to set for the expression along with their respective values.
1132
1133 Returns:
1134 The target expression.
1135 """
1136 instance = exp_class(**kwargs)
1137 instance.add_comments(comments) if comments else self._add_comments(instance)
1138 return self.validate_expression(instance)
1139
1140 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
1141 if expression and self._prev_comments:
1142 expression.add_comments(self._prev_comments)
1143 self._prev_comments = None
1144
1145 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
1146 """
1147 Validates an Expression, making sure that all its mandatory arguments are set.
1148
1149 Args:
1150 expression: The expression to validate.
1151 args: An optional list of items that was used to instantiate the expression, if it's a Func.
1152
1153 Returns:
1154 The validated expression.
1155 """
1156 if self.error_level != ErrorLevel.IGNORE:
1157 for error_message in expression.error_messages(args):
1158 self.raise_error(error_message)
1159
1160 return expression
1161
1162 def _find_sql(self, start: Token, end: Token) -> str:
1163 return self.sql[start.start : end.end + 1]
1164
1165 def _advance(self, times: int = 1) -> None:
1166 self._index += times
1167 self._curr = seq_get(self._tokens, self._index)
1168 self._next = seq_get(self._tokens, self._index + 1)
1169
1170 if self._index > 0:
1171 self._prev = self._tokens[self._index - 1]
1172 self._prev_comments = self._prev.comments
1173 else:
1174 self._prev = None
1175 self._prev_comments = None
1176
1177 def _retreat(self, index: int) -> None:
1178 if index != self._index:
1179 self._advance(index - self._index)
1180
1181 def _parse_command(self) -> exp.Command:
1182 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1183
1184 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1185 start = self._prev
1186 exists = self._parse_exists() if allow_exists else None
1187
1188 self._match(TokenType.ON)
1189
1190 kind = self._match_set(self.CREATABLES) and self._prev
1191 if not kind:
1192 return self._parse_as_command(start)
1193
1194 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1195 this = self._parse_user_defined_function(kind=kind.token_type)
1196 elif kind.token_type == TokenType.TABLE:
1197 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1198 elif kind.token_type == TokenType.COLUMN:
1199 this = self._parse_column()
1200 else:
1201 this = self._parse_id_var()
1202
1203 self._match(TokenType.IS)
1204
1205 return self.expression(
1206 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1207 )
1208
1209 def _parse_to_table(
1210 self,
1211 ) -> exp.ToTableProperty:
1212 table = self._parse_table_parts(schema=True)
1213 return self.expression(exp.ToTableProperty, this=table)
1214
1215 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1216 def _parse_ttl(self) -> exp.Expression:
1217 def _parse_ttl_action() -> t.Optional[exp.Expression]:
1218 this = self._parse_bitwise()
1219
1220 if self._match_text_seq("DELETE"):
1221 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1222 if self._match_text_seq("RECOMPRESS"):
1223 return self.expression(
1224 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1225 )
1226 if self._match_text_seq("TO", "DISK"):
1227 return self.expression(
1228 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1229 )
1230 if self._match_text_seq("TO", "VOLUME"):
1231 return self.expression(
1232 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1233 )
1234
1235 return this
1236
1237 expressions = self._parse_csv(_parse_ttl_action)
1238 where = self._parse_where()
1239 group = self._parse_group()
1240
1241 aggregates = None
1242 if group and self._match(TokenType.SET):
1243 aggregates = self._parse_csv(self._parse_set_item)
1244
1245 return self.expression(
1246 exp.MergeTreeTTL,
1247 expressions=expressions,
1248 where=where,
1249 group=group,
1250 aggregates=aggregates,
1251 )
1252
1253 def _parse_statement(self) -> t.Optional[exp.Expression]:
1254 if self._curr is None:
1255 return None
1256
1257 if self._match_set(self.STATEMENT_PARSERS):
1258 return self.STATEMENT_PARSERS[self._prev.token_type](self)
1259
1260 if self._match_set(Tokenizer.COMMANDS):
1261 return self._parse_command()
1262
1263 expression = self._parse_expression()
1264 expression = self._parse_set_operations(expression) if expression else self._parse_select()
1265 return self._parse_query_modifiers(expression)
1266
1267 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
1268 start = self._prev
1269 temporary = self._match(TokenType.TEMPORARY)
1270 materialized = self._match_text_seq("MATERIALIZED")
1271
1272 kind = self._match_set(self.CREATABLES) and self._prev.text
1273 if not kind:
1274 return self._parse_as_command(start)
1275
1276 return self.expression(
1277 exp.Drop,
1278 comments=start.comments,
1279 exists=exists or self._parse_exists(),
1280 this=self._parse_table(schema=True),
1281 kind=kind,
1282 temporary=temporary,
1283 materialized=materialized,
1284 cascade=self._match_text_seq("CASCADE"),
1285 constraints=self._match_text_seq("CONSTRAINTS"),
1286 purge=self._match_text_seq("PURGE"),
1287 )
1288
1289 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1290 return (
1291 self._match_text_seq("IF")
1292 and (not not_ or self._match(TokenType.NOT))
1293 and self._match(TokenType.EXISTS)
1294 )
1295
1296 def _parse_create(self) -> exp.Create | exp.Command:
1297 # Note: this can't be None because we've matched a statement parser
1298 start = self._prev
1299 comments = self._prev_comments
1300
1301 replace = start.text.upper() == "REPLACE" or self._match_pair(
1302 TokenType.OR, TokenType.REPLACE
1303 )
1304 unique = self._match(TokenType.UNIQUE)
1305
1306 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1307 self._advance()
1308
1309 properties = None
1310 create_token = self._match_set(self.CREATABLES) and self._prev
1311
1312 if not create_token:
1313 # exp.Properties.Location.POST_CREATE
1314 properties = self._parse_properties()
1315 create_token = self._match_set(self.CREATABLES) and self._prev
1316
1317 if not properties or not create_token:
1318 return self._parse_as_command(start)
1319
1320 exists = self._parse_exists(not_=True)
1321 this = None
1322 expression: t.Optional[exp.Expression] = None
1323 indexes = None
1324 no_schema_binding = None
1325 begin = None
1326 end = None
1327 clone = None
1328
1329 def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
1330 nonlocal properties
1331 if properties and temp_props:
1332 properties.expressions.extend(temp_props.expressions)
1333 elif temp_props:
1334 properties = temp_props
1335
1336 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1337 this = self._parse_user_defined_function(kind=create_token.token_type)
1338
1339 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
1340 extend_props(self._parse_properties())
1341
1342 self._match(TokenType.ALIAS)
1343
1344 if self._match(TokenType.COMMAND):
1345 expression = self._parse_as_command(self._prev)
1346 else:
1347 begin = self._match(TokenType.BEGIN)
1348 return_ = self._match_text_seq("RETURN")
1349
1350 if self._match(TokenType.STRING, advance=False):
1351 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property
1352 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement
1353 expression = self._parse_string()
1354 extend_props(self._parse_properties())
1355 else:
1356 expression = self._parse_statement()
1357
1358 end = self._match_text_seq("END")
1359
1360 if return_:
1361 expression = self.expression(exp.Return, this=expression)
1362 elif create_token.token_type == TokenType.INDEX:
1363 this = self._parse_index(index=self._parse_id_var())
1364 elif create_token.token_type in self.DB_CREATABLES:
1365 table_parts = self._parse_table_parts(schema=True)
1366
1367 # exp.Properties.Location.POST_NAME
1368 self._match(TokenType.COMMA)
1369 extend_props(self._parse_properties(before=True))
1370
1371 this = self._parse_schema(this=table_parts)
1372
1373 # exp.Properties.Location.POST_SCHEMA and POST_WITH
1374 extend_props(self._parse_properties())
1375
1376 self._match(TokenType.ALIAS)
1377 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
1378 # exp.Properties.Location.POST_ALIAS
1379 extend_props(self._parse_properties())
1380
1381 expression = self._parse_ddl_select()
1382
1383 if create_token.token_type == TokenType.TABLE:
1384 # exp.Properties.Location.POST_EXPRESSION
1385 extend_props(self._parse_properties())
1386
1387 indexes = []
1388 while True:
1389 index = self._parse_index()
1390
1391 # exp.Properties.Location.POST_INDEX
1392 extend_props(self._parse_properties())
1393
1394 if not index:
1395 break
1396 else:
1397 self._match(TokenType.COMMA)
1398 indexes.append(index)
1399 elif create_token.token_type == TokenType.VIEW:
1400 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1401 no_schema_binding = True
1402
1403 shallow = self._match_text_seq("SHALLOW")
1404
1405 if self._match_texts(self.CLONE_KEYWORDS):
1406 copy = self._prev.text.lower() == "copy"
1407 clone = self._parse_table(schema=True)
1408 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper()
1409 clone_kind = (
1410 self._match(TokenType.L_PAREN)
1411 and self._match_texts(self.CLONE_KINDS)
1412 and self._prev.text.upper()
1413 )
1414 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1415 self._match(TokenType.R_PAREN)
1416 clone = self.expression(
1417 exp.Clone,
1418 this=clone,
1419 when=when,
1420 kind=clone_kind,
1421 shallow=shallow,
1422 expression=clone_expression,
1423 copy=copy,
1424 )
1425
1426 return self.expression(
1427 exp.Create,
1428 comments=comments,
1429 this=this,
1430 kind=create_token.text,
1431 replace=replace,
1432 unique=unique,
1433 expression=expression,
1434 exists=exists,
1435 properties=properties,
1436 indexes=indexes,
1437 no_schema_binding=no_schema_binding,
1438 begin=begin,
1439 end=end,
1440 clone=clone,
1441 )
1442
1443 def _parse_property_before(self) -> t.Optional[exp.Expression]:
1444 # only used for teradata currently
1445 self._match(TokenType.COMMA)
1446
1447 kwargs = {
1448 "no": self._match_text_seq("NO"),
1449 "dual": self._match_text_seq("DUAL"),
1450 "before": self._match_text_seq("BEFORE"),
1451 "default": self._match_text_seq("DEFAULT"),
1452 "local": (self._match_text_seq("LOCAL") and "LOCAL")
1453 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1454 "after": self._match_text_seq("AFTER"),
1455 "minimum": self._match_texts(("MIN", "MINIMUM")),
1456 "maximum": self._match_texts(("MAX", "MAXIMUM")),
1457 }
1458
1459 if self._match_texts(self.PROPERTY_PARSERS):
1460 parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1461 try:
1462 return parser(self, **{k: v for k, v in kwargs.items() if v})
1463 except TypeError:
1464 self.raise_error(f"Cannot parse property '{self._prev.text}'")
1465
1466 return None
1467
1468 def _parse_property(self) -> t.Optional[exp.Expression]:
1469 if self._match_texts(self.PROPERTY_PARSERS):
1470 return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1471
1472 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
1473 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
1474
1475 if self._match_text_seq("COMPOUND", "SORTKEY"):
1476 return self._parse_sortkey(compound=True)
1477
1478 if self._match_text_seq("SQL", "SECURITY"):
1479 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1480
1481 index = self._index
1482 key = self._parse_column()
1483
1484 if not self._match(TokenType.EQ):
1485 self._retreat(index)
1486 return None
1487
1488 return self.expression(
1489 exp.Property,
1490 this=key.to_dot() if isinstance(key, exp.Column) else key,
1491 value=self._parse_column() or self._parse_var(any_token=True),
1492 )
1493
1494 def _parse_stored(self) -> exp.FileFormatProperty:
1495 self._match(TokenType.ALIAS)
1496
1497 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1498 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1499
1500 return self.expression(
1501 exp.FileFormatProperty,
1502 this=self.expression(
1503 exp.InputOutputFormat, input_format=input_format, output_format=output_format
1504 )
1505 if input_format or output_format
1506 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1507 )
1508
1509 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E:
1510 self._match(TokenType.EQ)
1511 self._match(TokenType.ALIAS)
1512 return self.expression(exp_class, this=self._parse_field(), **kwargs)
1513
1514 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]:
1515 properties = []
1516 while True:
1517 if before:
1518 prop = self._parse_property_before()
1519 else:
1520 prop = self._parse_property()
1521
1522 if not prop:
1523 break
1524 for p in ensure_list(prop):
1525 properties.append(p)
1526
1527 if properties:
1528 return self.expression(exp.Properties, expressions=properties)
1529
1530 return None
1531
1532 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
1533 return self.expression(
1534 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1535 )
1536
1537 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
1538 if self._index >= 2:
1539 pre_volatile_token = self._tokens[self._index - 2]
1540 else:
1541 pre_volatile_token = None
1542
1543 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
1544 return exp.VolatileProperty()
1545
1546 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1547
1548 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty:
1549 self._match_pair(TokenType.EQ, TokenType.ON)
1550
1551 prop = self.expression(exp.WithSystemVersioningProperty)
1552 if self._match(TokenType.L_PAREN):
1553 self._match_text_seq("HISTORY_TABLE", "=")
1554 prop.set("this", self._parse_table_parts())
1555
1556 if self._match(TokenType.COMMA):
1557 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=")
1558 prop.set("expression", self._advance_any() and self._prev.text.upper())
1559
1560 self._match_r_paren()
1561
1562 return prop
1563
1564 def _parse_with_property(
1565 self,
1566 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
1567 if self._match(TokenType.L_PAREN, advance=False):
1568 return self._parse_wrapped_csv(self._parse_property)
1569
1570 if self._match_text_seq("JOURNAL"):
1571 return self._parse_withjournaltable()
1572
1573 if self._match_text_seq("DATA"):
1574 return self._parse_withdata(no=False)
1575 elif self._match_text_seq("NO", "DATA"):
1576 return self._parse_withdata(no=True)
1577
1578 if not self._next:
1579 return None
1580
1581 return self._parse_withisolatedloading()
1582
1583 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1584 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]:
1585 self._match(TokenType.EQ)
1586
1587 user = self._parse_id_var()
1588 self._match(TokenType.PARAMETER)
1589 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1590
1591 if not user or not host:
1592 return None
1593
1594 return exp.DefinerProperty(this=f"{user}@{host}")
1595
1596 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
1597 self._match(TokenType.TABLE)
1598 self._match(TokenType.EQ)
1599 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1600
1601 def _parse_log(self, no: bool = False) -> exp.LogProperty:
1602 return self.expression(exp.LogProperty, no=no)
1603
1604 def _parse_journal(self, **kwargs) -> exp.JournalProperty:
1605 return self.expression(exp.JournalProperty, **kwargs)
1606
1607 def _parse_checksum(self) -> exp.ChecksumProperty:
1608 self._match(TokenType.EQ)
1609
1610 on = None
1611 if self._match(TokenType.ON):
1612 on = True
1613 elif self._match_text_seq("OFF"):
1614 on = False
1615
1616 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
1617
1618 def _parse_cluster(self) -> exp.Cluster:
1619 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered))
1620
1621 def _parse_clustered_by(self) -> exp.ClusteredByProperty:
1622 self._match_text_seq("BY")
1623
1624 self._match_l_paren()
1625 expressions = self._parse_csv(self._parse_column)
1626 self._match_r_paren()
1627
1628 if self._match_text_seq("SORTED", "BY"):
1629 self._match_l_paren()
1630 sorted_by = self._parse_csv(self._parse_ordered)
1631 self._match_r_paren()
1632 else:
1633 sorted_by = None
1634
1635 self._match(TokenType.INTO)
1636 buckets = self._parse_number()
1637 self._match_text_seq("BUCKETS")
1638
1639 return self.expression(
1640 exp.ClusteredByProperty,
1641 expressions=expressions,
1642 sorted_by=sorted_by,
1643 buckets=buckets,
1644 )
1645
1646 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]:
1647 if not self._match_text_seq("GRANTS"):
1648 self._retreat(self._index - 1)
1649 return None
1650
1651 return self.expression(exp.CopyGrantsProperty)
1652
1653 def _parse_freespace(self) -> exp.FreespaceProperty:
1654 self._match(TokenType.EQ)
1655 return self.expression(
1656 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1657 )
1658
1659 def _parse_mergeblockratio(
1660 self, no: bool = False, default: bool = False
1661 ) -> exp.MergeBlockRatioProperty:
1662 if self._match(TokenType.EQ):
1663 return self.expression(
1664 exp.MergeBlockRatioProperty,
1665 this=self._parse_number(),
1666 percent=self._match(TokenType.PERCENT),
1667 )
1668
1669 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default)
1670
1671 def _parse_datablocksize(
1672 self,
1673 default: t.Optional[bool] = None,
1674 minimum: t.Optional[bool] = None,
1675 maximum: t.Optional[bool] = None,
1676 ) -> exp.DataBlocksizeProperty:
1677 self._match(TokenType.EQ)
1678 size = self._parse_number()
1679
1680 units = None
1681 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1682 units = self._prev.text
1683
1684 return self.expression(
1685 exp.DataBlocksizeProperty,
1686 size=size,
1687 units=units,
1688 default=default,
1689 minimum=minimum,
1690 maximum=maximum,
1691 )
1692
1693 def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
1694 self._match(TokenType.EQ)
1695 always = self._match_text_seq("ALWAYS")
1696 manual = self._match_text_seq("MANUAL")
1697 never = self._match_text_seq("NEVER")
1698 default = self._match_text_seq("DEFAULT")
1699
1700 autotemp = None
1701 if self._match_text_seq("AUTOTEMP"):
1702 autotemp = self._parse_schema()
1703
1704 return self.expression(
1705 exp.BlockCompressionProperty,
1706 always=always,
1707 manual=manual,
1708 never=never,
1709 default=default,
1710 autotemp=autotemp,
1711 )
1712
1713 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty:
1714 no = self._match_text_seq("NO")
1715 concurrent = self._match_text_seq("CONCURRENT")
1716 self._match_text_seq("ISOLATED", "LOADING")
1717 for_all = self._match_text_seq("FOR", "ALL")
1718 for_insert = self._match_text_seq("FOR", "INSERT")
1719 for_none = self._match_text_seq("FOR", "NONE")
1720 return self.expression(
1721 exp.IsolatedLoadingProperty,
1722 no=no,
1723 concurrent=concurrent,
1724 for_all=for_all,
1725 for_insert=for_insert,
1726 for_none=for_none,
1727 )
1728
1729 def _parse_locking(self) -> exp.LockingProperty:
1730 if self._match(TokenType.TABLE):
1731 kind = "TABLE"
1732 elif self._match(TokenType.VIEW):
1733 kind = "VIEW"
1734 elif self._match(TokenType.ROW):
1735 kind = "ROW"
1736 elif self._match_text_seq("DATABASE"):
1737 kind = "DATABASE"
1738 else:
1739 kind = None
1740
1741 if kind in ("DATABASE", "TABLE", "VIEW"):
1742 this = self._parse_table_parts()
1743 else:
1744 this = None
1745
1746 if self._match(TokenType.FOR):
1747 for_or_in = "FOR"
1748 elif self._match(TokenType.IN):
1749 for_or_in = "IN"
1750 else:
1751 for_or_in = None
1752
1753 if self._match_text_seq("ACCESS"):
1754 lock_type = "ACCESS"
1755 elif self._match_texts(("EXCL", "EXCLUSIVE")):
1756 lock_type = "EXCLUSIVE"
1757 elif self._match_text_seq("SHARE"):
1758 lock_type = "SHARE"
1759 elif self._match_text_seq("READ"):
1760 lock_type = "READ"
1761 elif self._match_text_seq("WRITE"):
1762 lock_type = "WRITE"
1763 elif self._match_text_seq("CHECKSUM"):
1764 lock_type = "CHECKSUM"
1765 else:
1766 lock_type = None
1767
1768 override = self._match_text_seq("OVERRIDE")
1769
1770 return self.expression(
1771 exp.LockingProperty,
1772 this=this,
1773 kind=kind,
1774 for_or_in=for_or_in,
1775 lock_type=lock_type,
1776 override=override,
1777 )
1778
1779 def _parse_partition_by(self) -> t.List[exp.Expression]:
1780 if self._match(TokenType.PARTITION_BY):
1781 return self._parse_csv(self._parse_conjunction)
1782 return []
1783
1784 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec:
1785 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]:
1786 if self._match_text_seq("MINVALUE"):
1787 return exp.var("MINVALUE")
1788 if self._match_text_seq("MAXVALUE"):
1789 return exp.var("MAXVALUE")
1790 return self._parse_bitwise()
1791
1792 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None
1793 expression = None
1794 from_expressions = None
1795 to_expressions = None
1796
1797 if self._match(TokenType.IN):
1798 this = self._parse_wrapped_csv(self._parse_bitwise)
1799 elif self._match(TokenType.FROM):
1800 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
1801 self._match_text_seq("TO")
1802 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
1803 elif self._match_text_seq("WITH", "(", "MODULUS"):
1804 this = self._parse_number()
1805 self._match_text_seq(",", "REMAINDER")
1806 expression = self._parse_number()
1807 self._match_r_paren()
1808 else:
1809 self.raise_error("Failed to parse partition bound spec.")
1810
1811 return self.expression(
1812 exp.PartitionBoundSpec,
1813 this=this,
1814 expression=expression,
1815 from_expressions=from_expressions,
1816 to_expressions=to_expressions,
1817 )
1818
1819 # https://www.postgresql.org/docs/current/sql-createtable.html
1820 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]:
1821 if not self._match_text_seq("OF"):
1822 self._retreat(self._index - 1)
1823 return None
1824
1825 this = self._parse_table(schema=True)
1826
1827 if self._match(TokenType.DEFAULT):
1828 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT")
1829 elif self._match_text_seq("FOR", "VALUES"):
1830 expression = self._parse_partition_bound_spec()
1831 else:
1832 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.")
1833
1834 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression)
1835
1836 def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
1837 self._match(TokenType.EQ)
1838 return self.expression(
1839 exp.PartitionedByProperty,
1840 this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1841 )
1842
1843 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
1844 if self._match_text_seq("AND", "STATISTICS"):
1845 statistics = True
1846 elif self._match_text_seq("AND", "NO", "STATISTICS"):
1847 statistics = False
1848 else:
1849 statistics = None
1850
1851 return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1852
1853 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]:
1854 if self._match_text_seq("PRIMARY", "INDEX"):
1855 return exp.NoPrimaryIndexProperty()
1856 return None
1857
1858 def _parse_on_property(self) -> t.Optional[exp.Expression]:
1859 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1860 return exp.OnCommitProperty()
1861 if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1862 return exp.OnCommitProperty(delete=True)
1863 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var()))
1864
1865 def _parse_distkey(self) -> exp.DistKeyProperty:
1866 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1867
1868 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]:
1869 table = self._parse_table(schema=True)
1870
1871 options = []
1872 while self._match_texts(("INCLUDING", "EXCLUDING")):
1873 this = self._prev.text.upper()
1874
1875 id_var = self._parse_id_var()
1876 if not id_var:
1877 return None
1878
1879 options.append(
1880 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper()))
1881 )
1882
1883 return self.expression(exp.LikeProperty, this=table, expressions=options)
1884
1885 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
1886 return self.expression(
1887 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound
1888 )
1889
1890 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
1891 self._match(TokenType.EQ)
1892 return self.expression(
1893 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1894 )
1895
1896 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty:
1897 self._match_text_seq("WITH", "CONNECTION")
1898 return self.expression(
1899 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts()
1900 )
1901
1902 def _parse_returns(self) -> exp.ReturnsProperty:
1903 value: t.Optional[exp.Expression]
1904 is_table = self._match(TokenType.TABLE)
1905
1906 if is_table:
1907 if self._match(TokenType.LT):
1908 value = self.expression(
1909 exp.Schema,
1910 this="TABLE",
1911 expressions=self._parse_csv(self._parse_struct_types),
1912 )
1913 if not self._match(TokenType.GT):
1914 self.raise_error("Expecting >")
1915 else:
1916 value = self._parse_schema(exp.var("TABLE"))
1917 else:
1918 value = self._parse_types()
1919
1920 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1921
1922 def _parse_describe(self) -> exp.Describe:
1923 kind = self._match_set(self.CREATABLES) and self._prev.text
1924 this = self._parse_table(schema=True)
1925 properties = self._parse_properties()
1926 expressions = properties.expressions if properties else None
1927 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions)
1928
1929 def _parse_insert(self) -> exp.Insert:
1930 comments = ensure_list(self._prev_comments)
1931 overwrite = self._match(TokenType.OVERWRITE)
1932 ignore = self._match(TokenType.IGNORE)
1933 local = self._match_text_seq("LOCAL")
1934 alternative = None
1935
1936 if self._match_text_seq("DIRECTORY"):
1937 this: t.Optional[exp.Expression] = self.expression(
1938 exp.Directory,
1939 this=self._parse_var_or_string(),
1940 local=local,
1941 row_format=self._parse_row_format(match_row=True),
1942 )
1943 else:
1944 if self._match(TokenType.OR):
1945 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1946
1947 self._match(TokenType.INTO)
1948 comments += ensure_list(self._prev_comments)
1949 self._match(TokenType.TABLE)
1950 this = self._parse_table(schema=True)
1951
1952 returning = self._parse_returning()
1953
1954 return self.expression(
1955 exp.Insert,
1956 comments=comments,
1957 this=this,
1958 by_name=self._match_text_seq("BY", "NAME"),
1959 exists=self._parse_exists(),
1960 partition=self._parse_partition(),
1961 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE)
1962 and self._parse_conjunction(),
1963 expression=self._parse_ddl_select(),
1964 conflict=self._parse_on_conflict(),
1965 returning=returning or self._parse_returning(),
1966 overwrite=overwrite,
1967 alternative=alternative,
1968 ignore=ignore,
1969 )
1970
1971 def _parse_kill(self) -> exp.Kill:
1972 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None
1973
1974 return self.expression(
1975 exp.Kill,
1976 this=self._parse_primary(),
1977 kind=kind,
1978 )
1979
1980 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]:
1981 conflict = self._match_text_seq("ON", "CONFLICT")
1982 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1983
1984 if not conflict and not duplicate:
1985 return None
1986
1987 nothing = None
1988 expressions = None
1989 key = None
1990 constraint = None
1991
1992 if conflict:
1993 if self._match_text_seq("ON", "CONSTRAINT"):
1994 constraint = self._parse_id_var()
1995 else:
1996 key = self._parse_csv(self._parse_value)
1997
1998 self._match_text_seq("DO")
1999 if self._match_text_seq("NOTHING"):
2000 nothing = True
2001 else:
2002 self._match(TokenType.UPDATE)
2003 self._match(TokenType.SET)
2004 expressions = self._parse_csv(self._parse_equality)
2005
2006 return self.expression(
2007 exp.OnConflict,
2008 duplicate=duplicate,
2009 expressions=expressions,
2010 nothing=nothing,
2011 key=key,
2012 constraint=constraint,
2013 )
2014
2015 def _parse_returning(self) -> t.Optional[exp.Returning]:
2016 if not self._match(TokenType.RETURNING):
2017 return None
2018 return self.expression(
2019 exp.Returning,
2020 expressions=self._parse_csv(self._parse_expression),
2021 into=self._match(TokenType.INTO) and self._parse_table_part(),
2022 )
2023
2024 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2025 if not self._match(TokenType.FORMAT):
2026 return None
2027 return self._parse_row_format()
2028
2029 def _parse_row_format(
2030 self, match_row: bool = False
2031 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2032 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
2033 return None
2034
2035 if self._match_text_seq("SERDE"):
2036 this = self._parse_string()
2037
2038 serde_properties = None
2039 if self._match(TokenType.SERDE_PROPERTIES):
2040 serde_properties = self.expression(
2041 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property)
2042 )
2043
2044 return self.expression(
2045 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties
2046 )
2047
2048 self._match_text_seq("DELIMITED")
2049
2050 kwargs = {}
2051
2052 if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
2053 kwargs["fields"] = self._parse_string()
2054 if self._match_text_seq("ESCAPED", "BY"):
2055 kwargs["escaped"] = self._parse_string()
2056 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
2057 kwargs["collection_items"] = self._parse_string()
2058 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
2059 kwargs["map_keys"] = self._parse_string()
2060 if self._match_text_seq("LINES", "TERMINATED", "BY"):
2061 kwargs["lines"] = self._parse_string()
2062 if self._match_text_seq("NULL", "DEFINED", "AS"):
2063 kwargs["null"] = self._parse_string()
2064
2065 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore
2066
2067 def _parse_load(self) -> exp.LoadData | exp.Command:
2068 if self._match_text_seq("DATA"):
2069 local = self._match_text_seq("LOCAL")
2070 self._match_text_seq("INPATH")
2071 inpath = self._parse_string()
2072 overwrite = self._match(TokenType.OVERWRITE)
2073 self._match_pair(TokenType.INTO, TokenType.TABLE)
2074
2075 return self.expression(
2076 exp.LoadData,
2077 this=self._parse_table(schema=True),
2078 local=local,
2079 overwrite=overwrite,
2080 inpath=inpath,
2081 partition=self._parse_partition(),
2082 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
2083 serde=self._match_text_seq("SERDE") and self._parse_string(),
2084 )
2085 return self._parse_as_command(self._prev)
2086
2087 def _parse_delete(self) -> exp.Delete:
2088 # This handles MySQL's "Multiple-Table Syntax"
2089 # https://dev.mysql.com/doc/refman/8.0/en/delete.html
2090 tables = None
2091 comments = self._prev_comments
2092 if not self._match(TokenType.FROM, advance=False):
2093 tables = self._parse_csv(self._parse_table) or None
2094
2095 returning = self._parse_returning()
2096
2097 return self.expression(
2098 exp.Delete,
2099 comments=comments,
2100 tables=tables,
2101 this=self._match(TokenType.FROM) and self._parse_table(joins=True),
2102 using=self._match(TokenType.USING) and self._parse_table(joins=True),
2103 where=self._parse_where(),
2104 returning=returning or self._parse_returning(),
2105 limit=self._parse_limit(),
2106 )
2107
2108 def _parse_update(self) -> exp.Update:
2109 comments = self._prev_comments
2110 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS)
2111 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
2112 returning = self._parse_returning()
2113 return self.expression(
2114 exp.Update,
2115 comments=comments,
2116 **{ # type: ignore
2117 "this": this,
2118 "expressions": expressions,
2119 "from": self._parse_from(joins=True),
2120 "where": self._parse_where(),
2121 "returning": returning or self._parse_returning(),
2122 "order": self._parse_order(),
2123 "limit": self._parse_limit(),
2124 },
2125 )
2126
2127 def _parse_uncache(self) -> exp.Uncache:
2128 if not self._match(TokenType.TABLE):
2129 self.raise_error("Expecting TABLE after UNCACHE")
2130
2131 return self.expression(
2132 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True)
2133 )
2134
2135 def _parse_cache(self) -> exp.Cache:
2136 lazy = self._match_text_seq("LAZY")
2137 self._match(TokenType.TABLE)
2138 table = self._parse_table(schema=True)
2139
2140 options = []
2141 if self._match_text_seq("OPTIONS"):
2142 self._match_l_paren()
2143 k = self._parse_string()
2144 self._match(TokenType.EQ)
2145 v = self._parse_string()
2146 options = [k, v]
2147 self._match_r_paren()
2148
2149 self._match(TokenType.ALIAS)
2150 return self.expression(
2151 exp.Cache,
2152 this=table,
2153 lazy=lazy,
2154 options=options,
2155 expression=self._parse_select(nested=True),
2156 )
2157
2158 def _parse_partition(self) -> t.Optional[exp.Partition]:
2159 if not self._match(TokenType.PARTITION):
2160 return None
2161
2162 return self.expression(
2163 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
2164 )
2165
2166 def _parse_value(self) -> exp.Tuple:
2167 if self._match(TokenType.L_PAREN):
2168 expressions = self._parse_csv(self._parse_conjunction)
2169 self._match_r_paren()
2170 return self.expression(exp.Tuple, expressions=expressions)
2171
2172 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
2173 # https://prestodb.io/docs/current/sql/values.html
2174 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
2175
2176 def _parse_projections(self) -> t.List[exp.Expression]:
2177 return self._parse_expressions()
2178
2179 def _parse_select(
2180 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
2181 ) -> t.Optional[exp.Expression]:
2182 cte = self._parse_with()
2183
2184 if cte:
2185 this = self._parse_statement()
2186
2187 if not this:
2188 self.raise_error("Failed to parse any statement following CTE")
2189 return cte
2190
2191 if "with" in this.arg_types:
2192 this.set("with", cte)
2193 else:
2194 self.raise_error(f"{this.key} does not support CTE")
2195 this = cte
2196
2197 return this
2198
2199 # duckdb supports leading with FROM x
2200 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None
2201
2202 if self._match(TokenType.SELECT):
2203 comments = self._prev_comments
2204
2205 hint = self._parse_hint()
2206 all_ = self._match(TokenType.ALL)
2207 distinct = self._match_set(self.DISTINCT_TOKENS)
2208
2209 kind = (
2210 self._match(TokenType.ALIAS)
2211 and self._match_texts(("STRUCT", "VALUE"))
2212 and self._prev.text
2213 )
2214
2215 if distinct:
2216 distinct = self.expression(
2217 exp.Distinct,
2218 on=self._parse_value() if self._match(TokenType.ON) else None,
2219 )
2220
2221 if all_ and distinct:
2222 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
2223
2224 limit = self._parse_limit(top=True)
2225 projections = self._parse_projections()
2226
2227 this = self.expression(
2228 exp.Select,
2229 kind=kind,
2230 hint=hint,
2231 distinct=distinct,
2232 expressions=projections,
2233 limit=limit,
2234 )
2235 this.comments = comments
2236
2237 into = self._parse_into()
2238 if into:
2239 this.set("into", into)
2240
2241 if not from_:
2242 from_ = self._parse_from()
2243
2244 if from_:
2245 this.set("from", from_)
2246
2247 this = self._parse_query_modifiers(this)
2248 elif (table or nested) and self._match(TokenType.L_PAREN):
2249 if self._match(TokenType.PIVOT):
2250 this = self._parse_simplified_pivot()
2251 elif self._match(TokenType.FROM):
2252 this = exp.select("*").from_(
2253 t.cast(exp.From, self._parse_from(skip_from_token=True))
2254 )
2255 else:
2256 this = self._parse_table() if table else self._parse_select(nested=True)
2257 this = self._parse_set_operations(self._parse_query_modifiers(this))
2258
2259 self._match_r_paren()
2260
2261 # We return early here so that the UNION isn't attached to the subquery by the
2262 # following call to _parse_set_operations, but instead becomes the parent node
2263 return self._parse_subquery(this, parse_alias=parse_subquery_alias)
2264 elif self._match(TokenType.VALUES):
2265 this = self.expression(
2266 exp.Values,
2267 expressions=self._parse_csv(self._parse_value),
2268 alias=self._parse_table_alias(),
2269 )
2270 elif from_:
2271 this = exp.select("*").from_(from_.this, copy=False)
2272 else:
2273 this = None
2274
2275 return self._parse_set_operations(this)
2276
2277 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
2278 if not skip_with_token and not self._match(TokenType.WITH):
2279 return None
2280
2281 comments = self._prev_comments
2282 recursive = self._match(TokenType.RECURSIVE)
2283
2284 expressions = []
2285 while True:
2286 expressions.append(self._parse_cte())
2287
2288 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
2289 break
2290 else:
2291 self._match(TokenType.WITH)
2292
2293 return self.expression(
2294 exp.With, comments=comments, expressions=expressions, recursive=recursive
2295 )
2296
2297 def _parse_cte(self) -> exp.CTE:
2298 alias = self._parse_table_alias()
2299 if not alias or not alias.this:
2300 self.raise_error("Expected CTE to have alias")
2301
2302 self._match(TokenType.ALIAS)
2303 return self.expression(
2304 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias
2305 )
2306
2307 def _parse_table_alias(
2308 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2309 ) -> t.Optional[exp.TableAlias]:
2310 any_token = self._match(TokenType.ALIAS)
2311 alias = (
2312 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2313 or self._parse_string_as_identifier()
2314 )
2315
2316 index = self._index
2317 if self._match(TokenType.L_PAREN):
2318 columns = self._parse_csv(self._parse_function_parameter)
2319 self._match_r_paren() if columns else self._retreat(index)
2320 else:
2321 columns = None
2322
2323 if not alias and not columns:
2324 return None
2325
2326 return self.expression(exp.TableAlias, this=alias, columns=columns)
2327
2328 def _parse_subquery(
2329 self, this: t.Optional[exp.Expression], parse_alias: bool = True
2330 ) -> t.Optional[exp.Subquery]:
2331 if not this:
2332 return None
2333
2334 return self.expression(
2335 exp.Subquery,
2336 this=this,
2337 pivots=self._parse_pivots(),
2338 alias=self._parse_table_alias() if parse_alias else None,
2339 )
2340
2341 def _parse_query_modifiers(
2342 self, this: t.Optional[exp.Expression]
2343 ) -> t.Optional[exp.Expression]:
2344 if isinstance(this, self.MODIFIABLES):
2345 for join in iter(self._parse_join, None):
2346 this.append("joins", join)
2347 for lateral in iter(self._parse_lateral, None):
2348 this.append("laterals", lateral)
2349
2350 while True:
2351 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False):
2352 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type]
2353 key, expression = parser(self)
2354
2355 if expression:
2356 this.set(key, expression)
2357 if key == "limit":
2358 offset = expression.args.pop("offset", None)
2359 if offset:
2360 this.set("offset", exp.Offset(expression=offset))
2361 continue
2362 break
2363 return this
2364
2365 def _parse_hint(self) -> t.Optional[exp.Hint]:
2366 if self._match(TokenType.HINT):
2367 hints = []
2368 for hint in iter(lambda: self._parse_csv(self._parse_function), []):
2369 hints.extend(hint)
2370
2371 if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2372 self.raise_error("Expected */ after HINT")
2373
2374 return self.expression(exp.Hint, expressions=hints)
2375
2376 return None
2377
2378 def _parse_into(self) -> t.Optional[exp.Into]:
2379 if not self._match(TokenType.INTO):
2380 return None
2381
2382 temp = self._match(TokenType.TEMPORARY)
2383 unlogged = self._match_text_seq("UNLOGGED")
2384 self._match(TokenType.TABLE)
2385
2386 return self.expression(
2387 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2388 )
2389
2390 def _parse_from(
2391 self, joins: bool = False, skip_from_token: bool = False
2392 ) -> t.Optional[exp.From]:
2393 if not skip_from_token and not self._match(TokenType.FROM):
2394 return None
2395
2396 return self.expression(
2397 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
2398 )
2399
2400 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]:
2401 if not self._match(TokenType.MATCH_RECOGNIZE):
2402 return None
2403
2404 self._match_l_paren()
2405
2406 partition = self._parse_partition_by()
2407 order = self._parse_order()
2408 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None
2409
2410 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2411 rows = exp.var("ONE ROW PER MATCH")
2412 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2413 text = "ALL ROWS PER MATCH"
2414 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2415 text += f" SHOW EMPTY MATCHES"
2416 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2417 text += f" OMIT EMPTY MATCHES"
2418 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2419 text += f" WITH UNMATCHED ROWS"
2420 rows = exp.var(text)
2421 else:
2422 rows = None
2423
2424 if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2425 text = "AFTER MATCH SKIP"
2426 if self._match_text_seq("PAST", "LAST", "ROW"):
2427 text += f" PAST LAST ROW"
2428 elif self._match_text_seq("TO", "NEXT", "ROW"):
2429 text += f" TO NEXT ROW"
2430 elif self._match_text_seq("TO", "FIRST"):
2431 text += f" TO FIRST {self._advance_any().text}" # type: ignore
2432 elif self._match_text_seq("TO", "LAST"):
2433 text += f" TO LAST {self._advance_any().text}" # type: ignore
2434 after = exp.var(text)
2435 else:
2436 after = None
2437
2438 if self._match_text_seq("PATTERN"):
2439 self._match_l_paren()
2440
2441 if not self._curr:
2442 self.raise_error("Expecting )", self._curr)
2443
2444 paren = 1
2445 start = self._curr
2446
2447 while self._curr and paren > 0:
2448 if self._curr.token_type == TokenType.L_PAREN:
2449 paren += 1
2450 if self._curr.token_type == TokenType.R_PAREN:
2451 paren -= 1
2452
2453 end = self._prev
2454 self._advance()
2455
2456 if paren > 0:
2457 self.raise_error("Expecting )", self._curr)
2458
2459 pattern = exp.var(self._find_sql(start, end))
2460 else:
2461 pattern = None
2462
2463 define = (
2464 self._parse_csv(
2465 lambda: self.expression(
2466 exp.Alias,
2467 alias=self._parse_id_var(any_token=True),
2468 this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2469 )
2470 )
2471 if self._match_text_seq("DEFINE")
2472 else None
2473 )
2474
2475 self._match_r_paren()
2476
2477 return self.expression(
2478 exp.MatchRecognize,
2479 partition_by=partition,
2480 order=order,
2481 measures=measures,
2482 rows=rows,
2483 after=after,
2484 pattern=pattern,
2485 define=define,
2486 alias=self._parse_table_alias(),
2487 )
2488
2489 def _parse_lateral(self) -> t.Optional[exp.Lateral]:
2490 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2491 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2492
2493 if outer_apply or cross_apply:
2494 this = self._parse_select(table=True)
2495 view = None
2496 outer = not cross_apply
2497 elif self._match(TokenType.LATERAL):
2498 this = self._parse_select(table=True)
2499 view = self._match(TokenType.VIEW)
2500 outer = self._match(TokenType.OUTER)
2501 else:
2502 return None
2503
2504 if not this:
2505 this = (
2506 self._parse_unnest()
2507 or self._parse_function()
2508 or self._parse_id_var(any_token=False)
2509 )
2510
2511 while self._match(TokenType.DOT):
2512 this = exp.Dot(
2513 this=this,
2514 expression=self._parse_function() or self._parse_id_var(any_token=False),
2515 )
2516
2517 if view:
2518 table = self._parse_id_var(any_token=False)
2519 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2520 table_alias: t.Optional[exp.TableAlias] = self.expression(
2521 exp.TableAlias, this=table, columns=columns
2522 )
2523 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias:
2524 # We move the alias from the lateral's child node to the lateral itself
2525 table_alias = this.args["alias"].pop()
2526 else:
2527 table_alias = self._parse_table_alias()
2528
2529 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias)
2530
2531 def _parse_join_parts(
2532 self,
2533 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2534 return (
2535 self._match_set(self.JOIN_METHODS) and self._prev,
2536 self._match_set(self.JOIN_SIDES) and self._prev,
2537 self._match_set(self.JOIN_KINDS) and self._prev,
2538 )
2539
2540 def _parse_join(
2541 self, skip_join_token: bool = False, parse_bracket: bool = False
2542 ) -> t.Optional[exp.Join]:
2543 if self._match(TokenType.COMMA):
2544 return self.expression(exp.Join, this=self._parse_table())
2545
2546 index = self._index
2547 method, side, kind = self._parse_join_parts()
2548 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2549 join = self._match(TokenType.JOIN)
2550
2551 if not skip_join_token and not join:
2552 self._retreat(index)
2553 kind = None
2554 method = None
2555 side = None
2556
2557 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2558 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2559
2560 if not skip_join_token and not join and not outer_apply and not cross_apply:
2561 return None
2562
2563 if outer_apply:
2564 side = Token(TokenType.LEFT, "LEFT")
2565
2566 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)}
2567
2568 if method:
2569 kwargs["method"] = method.text
2570 if side:
2571 kwargs["side"] = side.text
2572 if kind:
2573 kwargs["kind"] = kind.text
2574 if hint:
2575 kwargs["hint"] = hint
2576
2577 if self._match(TokenType.ON):
2578 kwargs["on"] = self._parse_conjunction()
2579 elif self._match(TokenType.USING):
2580 kwargs["using"] = self._parse_wrapped_id_vars()
2581 elif not (kind and kind.token_type == TokenType.CROSS):
2582 index = self._index
2583 join = self._parse_join()
2584
2585 if join and self._match(TokenType.ON):
2586 kwargs["on"] = self._parse_conjunction()
2587 elif join and self._match(TokenType.USING):
2588 kwargs["using"] = self._parse_wrapped_id_vars()
2589 else:
2590 join = None
2591 self._retreat(index)
2592
2593 kwargs["this"].set("joins", [join] if join else None)
2594
2595 comments = [c for token in (method, side, kind) if token for c in token.comments]
2596 return self.expression(exp.Join, comments=comments, **kwargs)
2597
2598 def _parse_opclass(self) -> t.Optional[exp.Expression]:
2599 this = self._parse_conjunction()
2600 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
2601 return this
2602
2603 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False):
2604 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts())
2605
2606 return this
2607
2608 def _parse_index(
2609 self,
2610 index: t.Optional[exp.Expression] = None,
2611 ) -> t.Optional[exp.Index]:
2612 if index:
2613 unique = None
2614 primary = None
2615 amp = None
2616
2617 self._match(TokenType.ON)
2618 self._match(TokenType.TABLE) # hive
2619 table = self._parse_table_parts(schema=True)
2620 else:
2621 unique = self._match(TokenType.UNIQUE)
2622 primary = self._match_text_seq("PRIMARY")
2623 amp = self._match_text_seq("AMP")
2624
2625 if not self._match(TokenType.INDEX):
2626 return None
2627
2628 index = self._parse_id_var()
2629 table = None
2630
2631 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
2632
2633 if self._match(TokenType.L_PAREN, advance=False):
2634 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass))
2635 else:
2636 columns = None
2637
2638 return self.expression(
2639 exp.Index,
2640 this=index,
2641 table=table,
2642 using=using,
2643 columns=columns,
2644 unique=unique,
2645 primary=primary,
2646 amp=amp,
2647 partition_by=self._parse_partition_by(),
2648 where=self._parse_where(),
2649 )
2650
2651 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
2652 hints: t.List[exp.Expression] = []
2653 if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2654 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
2655 hints.append(
2656 self.expression(
2657 exp.WithTableHint,
2658 expressions=self._parse_csv(
2659 lambda: self._parse_function() or self._parse_var(any_token=True)
2660 ),
2661 )
2662 )
2663 self._match_r_paren()
2664 else:
2665 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
2666 while self._match_set(self.TABLE_INDEX_HINT_TOKENS):
2667 hint = exp.IndexTableHint(this=self._prev.text.upper())
2668
2669 self._match_texts(("INDEX", "KEY"))
2670 if self._match(TokenType.FOR):
2671 hint.set("target", self._advance_any() and self._prev.text.upper())
2672
2673 hint.set("expressions", self._parse_wrapped_id_vars())
2674 hints.append(hint)
2675
2676 return hints or None
2677
2678 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2679 return (
2680 (not schema and self._parse_function(optional_parens=False))
2681 or self._parse_id_var(any_token=False)
2682 or self._parse_string_as_identifier()
2683 or self._parse_placeholder()
2684 )
2685
2686 def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2687 catalog = None
2688 db = None
2689 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema)
2690
2691 while self._match(TokenType.DOT):
2692 if catalog:
2693 # This allows nesting the table in arbitrarily many dot expressions if needed
2694 table = self.expression(
2695 exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2696 )
2697 else:
2698 catalog = db
2699 db = table
2700 table = self._parse_table_part(schema=schema) or ""
2701
2702 if not table:
2703 self.raise_error(f"Expected table name but got {self._curr}")
2704
2705 return self.expression(
2706 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2707 )
2708
2709 def _parse_table(
2710 self,
2711 schema: bool = False,
2712 joins: bool = False,
2713 alias_tokens: t.Optional[t.Collection[TokenType]] = None,
2714 parse_bracket: bool = False,
2715 ) -> t.Optional[exp.Expression]:
2716 lateral = self._parse_lateral()
2717 if lateral:
2718 return lateral
2719
2720 unnest = self._parse_unnest()
2721 if unnest:
2722 return unnest
2723
2724 values = self._parse_derived_table_values()
2725 if values:
2726 return values
2727
2728 subquery = self._parse_select(table=True)
2729 if subquery:
2730 if not subquery.args.get("pivots"):
2731 subquery.set("pivots", self._parse_pivots())
2732 return subquery
2733
2734 bracket = parse_bracket and self._parse_bracket(None)
2735 bracket = self.expression(exp.Table, this=bracket) if bracket else None
2736 this = t.cast(
2737 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema))
2738 )
2739
2740 if schema:
2741 return self._parse_schema(this=this)
2742
2743 version = self._parse_version()
2744
2745 if version:
2746 this.set("version", version)
2747
2748 if self.ALIAS_POST_TABLESAMPLE:
2749 table_sample = self._parse_table_sample()
2750
2751 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2752 if alias:
2753 this.set("alias", alias)
2754
2755 if self._match_text_seq("AT"):
2756 this.set("index", self._parse_id_var())
2757
2758 this.set("hints", self._parse_table_hints())
2759
2760 if not this.args.get("pivots"):
2761 this.set("pivots", self._parse_pivots())
2762
2763 if not self.ALIAS_POST_TABLESAMPLE:
2764 table_sample = self._parse_table_sample()
2765
2766 if table_sample:
2767 table_sample.set("this", this)
2768 this = table_sample
2769
2770 if joins:
2771 for join in iter(self._parse_join, None):
2772 this.append("joins", join)
2773
2774 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
2775 this.set("ordinality", True)
2776 this.set("alias", self._parse_table_alias())
2777
2778 return this
2779
2780 def _parse_version(self) -> t.Optional[exp.Version]:
2781 if self._match(TokenType.TIMESTAMP_SNAPSHOT):
2782 this = "TIMESTAMP"
2783 elif self._match(TokenType.VERSION_SNAPSHOT):
2784 this = "VERSION"
2785 else:
2786 return None
2787
2788 if self._match_set((TokenType.FROM, TokenType.BETWEEN)):
2789 kind = self._prev.text.upper()
2790 start = self._parse_bitwise()
2791 self._match_texts(("TO", "AND"))
2792 end = self._parse_bitwise()
2793 expression: t.Optional[exp.Expression] = self.expression(
2794 exp.Tuple, expressions=[start, end]
2795 )
2796 elif self._match_text_seq("CONTAINED", "IN"):
2797 kind = "CONTAINED IN"
2798 expression = self.expression(
2799 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise)
2800 )
2801 elif self._match(TokenType.ALL):
2802 kind = "ALL"
2803 expression = None
2804 else:
2805 self._match_text_seq("AS", "OF")
2806 kind = "AS OF"
2807 expression = self._parse_type()
2808
2809 return self.expression(exp.Version, this=this, expression=expression, kind=kind)
2810
2811 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]:
2812 if not self._match(TokenType.UNNEST):
2813 return None
2814
2815 expressions = self._parse_wrapped_csv(self._parse_equality)
2816 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2817
2818 alias = self._parse_table_alias() if with_alias else None
2819
2820 if alias:
2821 if self.UNNEST_COLUMN_ONLY:
2822 if alias.args.get("columns"):
2823 self.raise_error("Unexpected extra column alias in unnest.")
2824
2825 alias.set("columns", [alias.this])
2826 alias.set("this", None)
2827
2828 columns = alias.args.get("columns") or []
2829 if offset and len(expressions) < len(columns):
2830 offset = columns.pop()
2831
2832 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
2833 self._match(TokenType.ALIAS)
2834 offset = self._parse_id_var(
2835 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
2836 ) or exp.to_identifier("offset")
2837
2838 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset)
2839
2840 def _parse_derived_table_values(self) -> t.Optional[exp.Values]:
2841 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2842 if not is_derived and not self._match(TokenType.VALUES):
2843 return None
2844
2845 expressions = self._parse_csv(self._parse_value)
2846 alias = self._parse_table_alias()
2847
2848 if is_derived:
2849 self._match_r_paren()
2850
2851 return self.expression(
2852 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias()
2853 )
2854
2855 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
2856 if not self._match(TokenType.TABLE_SAMPLE) and not (
2857 as_modifier and self._match_text_seq("USING", "SAMPLE")
2858 ):
2859 return None
2860
2861 bucket_numerator = None
2862 bucket_denominator = None
2863 bucket_field = None
2864 percent = None
2865 rows = None
2866 size = None
2867 seed = None
2868
2869 kind = (
2870 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2871 )
2872 method = self._parse_var(tokens=(TokenType.ROW,))
2873
2874 matched_l_paren = self._match(TokenType.L_PAREN)
2875
2876 if self.TABLESAMPLE_CSV:
2877 num = None
2878 expressions = self._parse_csv(self._parse_primary)
2879 else:
2880 expressions = None
2881 num = (
2882 self._parse_factor()
2883 if self._match(TokenType.NUMBER, advance=False)
2884 else self._parse_primary() or self._parse_placeholder()
2885 )
2886
2887 if self._match_text_seq("BUCKET"):
2888 bucket_numerator = self._parse_number()
2889 self._match_text_seq("OUT", "OF")
2890 bucket_denominator = bucket_denominator = self._parse_number()
2891 self._match(TokenType.ON)
2892 bucket_field = self._parse_field()
2893 elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2894 percent = num
2895 elif self._match(TokenType.ROWS):
2896 rows = num
2897 elif num:
2898 size = num
2899
2900 if matched_l_paren:
2901 self._match_r_paren()
2902
2903 if self._match(TokenType.L_PAREN):
2904 method = self._parse_var()
2905 seed = self._match(TokenType.COMMA) and self._parse_number()
2906 self._match_r_paren()
2907 elif self._match_texts(("SEED", "REPEATABLE")):
2908 seed = self._parse_wrapped(self._parse_number)
2909
2910 return self.expression(
2911 exp.TableSample,
2912 expressions=expressions,
2913 method=method,
2914 bucket_numerator=bucket_numerator,
2915 bucket_denominator=bucket_denominator,
2916 bucket_field=bucket_field,
2917 percent=percent,
2918 rows=rows,
2919 size=size,
2920 seed=seed,
2921 kind=kind,
2922 )
2923
2924 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]:
2925 return list(iter(self._parse_pivot, None)) or None
2926
2927 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]:
2928 return list(iter(self._parse_join, None)) or None
2929
2930 # https://duckdb.org/docs/sql/statements/pivot
2931 def _parse_simplified_pivot(self) -> exp.Pivot:
2932 def _parse_on() -> t.Optional[exp.Expression]:
2933 this = self._parse_bitwise()
2934 return self._parse_in(this) if self._match(TokenType.IN) else this
2935
2936 this = self._parse_table()
2937 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2938 using = self._match(TokenType.USING) and self._parse_csv(
2939 lambda: self._parse_alias(self._parse_function())
2940 )
2941 group = self._parse_group()
2942 return self.expression(
2943 exp.Pivot, this=this, expressions=expressions, using=using, group=group
2944 )
2945
2946 def _parse_pivot(self) -> t.Optional[exp.Pivot]:
2947 index = self._index
2948 include_nulls = None
2949
2950 if self._match(TokenType.PIVOT):
2951 unpivot = False
2952 elif self._match(TokenType.UNPIVOT):
2953 unpivot = True
2954
2955 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax
2956 if self._match_text_seq("INCLUDE", "NULLS"):
2957 include_nulls = True
2958 elif self._match_text_seq("EXCLUDE", "NULLS"):
2959 include_nulls = False
2960 else:
2961 return None
2962
2963 expressions = []
2964 field = None
2965
2966 if not self._match(TokenType.L_PAREN):
2967 self._retreat(index)
2968 return None
2969
2970 if unpivot:
2971 expressions = self._parse_csv(self._parse_column)
2972 else:
2973 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2974
2975 if not expressions:
2976 self.raise_error("Failed to parse PIVOT's aggregation list")
2977
2978 if not self._match(TokenType.FOR):
2979 self.raise_error("Expecting FOR")
2980
2981 value = self._parse_column()
2982
2983 if not self._match(TokenType.IN):
2984 self.raise_error("Expecting IN")
2985
2986 field = self._parse_in(value, alias=True)
2987
2988 self._match_r_paren()
2989
2990 pivot = self.expression(
2991 exp.Pivot,
2992 expressions=expressions,
2993 field=field,
2994 unpivot=unpivot,
2995 include_nulls=include_nulls,
2996 )
2997
2998 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2999 pivot.set("alias", self._parse_table_alias())
3000
3001 if not unpivot:
3002 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
3003
3004 columns: t.List[exp.Expression] = []
3005 for fld in pivot.args["field"].expressions:
3006 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
3007 for name in names:
3008 if self.PREFIXED_PIVOT_COLUMNS:
3009 name = f"{name}_{field_name}" if name else field_name
3010 else:
3011 name = f"{field_name}_{name}" if name else field_name
3012
3013 columns.append(exp.to_identifier(name))
3014
3015 pivot.set("columns", columns)
3016
3017 return pivot
3018
3019 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
3020 return [agg.alias for agg in aggregations]
3021
3022 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]:
3023 if not skip_where_token and not self._match(TokenType.WHERE):
3024 return None
3025
3026 return self.expression(
3027 exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
3028 )
3029
3030 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
3031 if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
3032 return None
3033
3034 elements = defaultdict(list)
3035
3036 if self._match(TokenType.ALL):
3037 return self.expression(exp.Group, all=True)
3038
3039 while True:
3040 expressions = self._parse_csv(self._parse_conjunction)
3041 if expressions:
3042 elements["expressions"].extend(expressions)
3043
3044 grouping_sets = self._parse_grouping_sets()
3045 if grouping_sets:
3046 elements["grouping_sets"].extend(grouping_sets)
3047
3048 rollup = None
3049 cube = None
3050 totals = None
3051
3052 index = self._index
3053 with_ = self._match(TokenType.WITH)
3054 if self._match(TokenType.ROLLUP):
3055 rollup = with_ or self._parse_wrapped_csv(self._parse_column)
3056 elements["rollup"].extend(ensure_list(rollup))
3057
3058 if self._match(TokenType.CUBE):
3059 cube = with_ or self._parse_wrapped_csv(self._parse_column)
3060 elements["cube"].extend(ensure_list(cube))
3061
3062 if self._match_text_seq("TOTALS"):
3063 totals = True
3064 elements["totals"] = True # type: ignore
3065
3066 if not (grouping_sets or rollup or cube or totals):
3067 if with_:
3068 self._retreat(index)
3069 break
3070
3071 return self.expression(exp.Group, **elements) # type: ignore
3072
3073 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]:
3074 if not self._match(TokenType.GROUPING_SETS):
3075 return None
3076
3077 return self._parse_wrapped_csv(self._parse_grouping_set)
3078
3079 def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
3080 if self._match(TokenType.L_PAREN):
3081 grouping_set = self._parse_csv(self._parse_column)
3082 self._match_r_paren()
3083 return self.expression(exp.Tuple, expressions=grouping_set)
3084
3085 return self._parse_column()
3086
3087 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]:
3088 if not skip_having_token and not self._match(TokenType.HAVING):
3089 return None
3090 return self.expression(exp.Having, this=self._parse_conjunction())
3091
3092 def _parse_qualify(self) -> t.Optional[exp.Qualify]:
3093 if not self._match(TokenType.QUALIFY):
3094 return None
3095 return self.expression(exp.Qualify, this=self._parse_conjunction())
3096
3097 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]:
3098 if skip_start_token:
3099 start = None
3100 elif self._match(TokenType.START_WITH):
3101 start = self._parse_conjunction()
3102 else:
3103 return None
3104
3105 self._match(TokenType.CONNECT_BY)
3106 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
3107 exp.Prior, this=self._parse_bitwise()
3108 )
3109 connect = self._parse_conjunction()
3110 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR")
3111
3112 if not start and self._match(TokenType.START_WITH):
3113 start = self._parse_conjunction()
3114
3115 return self.expression(exp.Connect, start=start, connect=connect)
3116
3117 def _parse_order(
3118 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
3119 ) -> t.Optional[exp.Expression]:
3120 if not skip_order_token and not self._match(TokenType.ORDER_BY):
3121 return this
3122
3123 return self.expression(
3124 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
3125 )
3126
3127 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]:
3128 if not self._match(token):
3129 return None
3130 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
3131
3132 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered:
3133 this = parse_method() if parse_method else self._parse_conjunction()
3134
3135 asc = self._match(TokenType.ASC)
3136 desc = self._match(TokenType.DESC) or (asc and False)
3137
3138 is_nulls_first = self._match_text_seq("NULLS", "FIRST")
3139 is_nulls_last = self._match_text_seq("NULLS", "LAST")
3140
3141 nulls_first = is_nulls_first or False
3142 explicitly_null_ordered = is_nulls_first or is_nulls_last
3143
3144 if (
3145 not explicitly_null_ordered
3146 and (
3147 (not desc and self.NULL_ORDERING == "nulls_are_small")
3148 or (desc and self.NULL_ORDERING != "nulls_are_small")
3149 )
3150 and self.NULL_ORDERING != "nulls_are_last"
3151 ):
3152 nulls_first = True
3153
3154 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
3155
3156 def _parse_limit(
3157 self, this: t.Optional[exp.Expression] = None, top: bool = False
3158 ) -> t.Optional[exp.Expression]:
3159 if self._match(TokenType.TOP if top else TokenType.LIMIT):
3160 comments = self._prev_comments
3161 if top:
3162 limit_paren = self._match(TokenType.L_PAREN)
3163 expression = self._parse_term() if limit_paren else self._parse_number()
3164
3165 if limit_paren:
3166 self._match_r_paren()
3167 else:
3168 expression = self._parse_term()
3169
3170 if self._match(TokenType.COMMA):
3171 offset = expression
3172 expression = self._parse_term()
3173 else:
3174 offset = None
3175
3176 limit_exp = self.expression(
3177 exp.Limit, this=this, expression=expression, offset=offset, comments=comments
3178 )
3179
3180 return limit_exp
3181
3182 if self._match(TokenType.FETCH):
3183 direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
3184 direction = self._prev.text if direction else "FIRST"
3185
3186 count = self._parse_field(tokens=self.FETCH_TOKENS)
3187 percent = self._match(TokenType.PERCENT)
3188
3189 self._match_set((TokenType.ROW, TokenType.ROWS))
3190
3191 only = self._match_text_seq("ONLY")
3192 with_ties = self._match_text_seq("WITH", "TIES")
3193
3194 if only and with_ties:
3195 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
3196
3197 return self.expression(
3198 exp.Fetch,
3199 direction=direction,
3200 count=count,
3201 percent=percent,
3202 with_ties=with_ties,
3203 )
3204
3205 return this
3206
3207 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3208 if not self._match(TokenType.OFFSET):
3209 return this
3210
3211 count = self._parse_term()
3212 self._match_set((TokenType.ROW, TokenType.ROWS))
3213 return self.expression(exp.Offset, this=this, expression=count)
3214
3215 def _parse_locks(self) -> t.List[exp.Lock]:
3216 locks = []
3217 while True:
3218 if self._match_text_seq("FOR", "UPDATE"):
3219 update = True
3220 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
3221 "LOCK", "IN", "SHARE", "MODE"
3222 ):
3223 update = False
3224 else:
3225 break
3226
3227 expressions = None
3228 if self._match_text_seq("OF"):
3229 expressions = self._parse_csv(lambda: self._parse_table(schema=True))
3230
3231 wait: t.Optional[bool | exp.Expression] = None
3232 if self._match_text_seq("NOWAIT"):
3233 wait = True
3234 elif self._match_text_seq("WAIT"):
3235 wait = self._parse_primary()
3236 elif self._match_text_seq("SKIP", "LOCKED"):
3237 wait = False
3238
3239 locks.append(
3240 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
3241 )
3242
3243 return locks
3244
3245 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3246 if not self._match_set(self.SET_OPERATIONS):
3247 return this
3248
3249 token_type = self._prev.token_type
3250
3251 if token_type == TokenType.UNION:
3252 expression = exp.Union
3253 elif token_type == TokenType.EXCEPT:
3254 expression = exp.Except
3255 else:
3256 expression = exp.Intersect
3257
3258 return self.expression(
3259 expression,
3260 comments=self._prev.comments,
3261 this=this,
3262 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
3263 by_name=self._match_text_seq("BY", "NAME"),
3264 expression=self._parse_set_operations(self._parse_select(nested=True)),
3265 )
3266
3267 def _parse_expression(self) -> t.Optional[exp.Expression]:
3268 return self._parse_alias(self._parse_conjunction())
3269
3270 def _parse_conjunction(self) -> t.Optional[exp.Expression]:
3271 return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
3272
3273 def _parse_equality(self) -> t.Optional[exp.Expression]:
3274 return self._parse_tokens(self._parse_comparison, self.EQUALITY)
3275
3276 def _parse_comparison(self) -> t.Optional[exp.Expression]:
3277 return self._parse_tokens(self._parse_range, self.COMPARISON)
3278
3279 def _parse_range(self) -> t.Optional[exp.Expression]:
3280 this = self._parse_bitwise()
3281 negate = self._match(TokenType.NOT)
3282
3283 if self._match_set(self.RANGE_PARSERS):
3284 expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
3285 if not expression:
3286 return this
3287
3288 this = expression
3289 elif self._match(TokenType.ISNULL):
3290 this = self.expression(exp.Is, this=this, expression=exp.Null())
3291
3292 # Postgres supports ISNULL and NOTNULL for conditions.
3293 # https://blog.andreiavram.ro/postgresql-null-composite-type/
3294 if self._match(TokenType.NOTNULL):
3295 this = self.expression(exp.Is, this=this, expression=exp.Null())
3296 this = self.expression(exp.Not, this=this)
3297
3298 if negate:
3299 this = self.expression(exp.Not, this=this)
3300
3301 if self._match(TokenType.IS):
3302 this = self._parse_is(this)
3303
3304 return this
3305
3306 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3307 index = self._index - 1
3308 negate = self._match(TokenType.NOT)
3309
3310 if self._match_text_seq("DISTINCT", "FROM"):
3311 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
3312 return self.expression(klass, this=this, expression=self._parse_conjunction())
3313
3314 expression = self._parse_null() or self._parse_boolean()
3315 if not expression:
3316 self._retreat(index)
3317 return None
3318
3319 this = self.expression(exp.Is, this=this, expression=expression)
3320 return self.expression(exp.Not, this=this) if negate else this
3321
3322 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
3323 unnest = self._parse_unnest(with_alias=False)
3324 if unnest:
3325 this = self.expression(exp.In, this=this, unnest=unnest)
3326 elif self._match(TokenType.L_PAREN):
3327 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
3328
3329 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
3330 this = self.expression(exp.In, this=this, query=expressions[0])
3331 else:
3332 this = self.expression(exp.In, this=this, expressions=expressions)
3333
3334 self._match_r_paren(this)
3335 else:
3336 this = self.expression(exp.In, this=this, field=self._parse_field())
3337
3338 return this
3339
3340 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between:
3341 low = self._parse_bitwise()
3342 self._match(TokenType.AND)
3343 high = self._parse_bitwise()
3344 return self.expression(exp.Between, this=this, low=low, high=high)
3345
3346 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3347 if not self._match(TokenType.ESCAPE):
3348 return this
3349 return self.expression(exp.Escape, this=this, expression=self._parse_string())
3350
3351 def _parse_interval(self) -> t.Optional[exp.Interval]:
3352 index = self._index
3353
3354 if not self._match(TokenType.INTERVAL):
3355 return None
3356
3357 if self._match(TokenType.STRING, advance=False):
3358 this = self._parse_primary()
3359 else:
3360 this = self._parse_term()
3361
3362 if not this:
3363 self._retreat(index)
3364 return None
3365
3366 unit = self._parse_function() or self._parse_var(any_token=True)
3367
3368 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
3369 # each INTERVAL expression into this canonical form so it's easy to transpile
3370 if this and this.is_number:
3371 this = exp.Literal.string(this.name)
3372 elif this and this.is_string:
3373 parts = this.name.split()
3374
3375 if len(parts) == 2:
3376 if unit:
3377 # This is not actually a unit, it's something else (e.g. a "window side")
3378 unit = None
3379 self._retreat(self._index - 1)
3380
3381 this = exp.Literal.string(parts[0])
3382 unit = self.expression(exp.Var, this=parts[1])
3383
3384 return self.expression(exp.Interval, this=this, unit=unit)
3385
3386 def _parse_bitwise(self) -> t.Optional[exp.Expression]:
3387 this = self._parse_term()
3388
3389 while True:
3390 if self._match_set(self.BITWISE):
3391 this = self.expression(
3392 self.BITWISE[self._prev.token_type],
3393 this=this,
3394 expression=self._parse_term(),
3395 )
3396 elif self._match(TokenType.DQMARK):
3397 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term())
3398 elif self._match_pair(TokenType.LT, TokenType.LT):
3399 this = self.expression(
3400 exp.BitwiseLeftShift, this=this, expression=self._parse_term()
3401 )
3402 elif self._match_pair(TokenType.GT, TokenType.GT):
3403 this = self.expression(
3404 exp.BitwiseRightShift, this=this, expression=self._parse_term()
3405 )
3406 else:
3407 break
3408
3409 return this
3410
3411 def _parse_term(self) -> t.Optional[exp.Expression]:
3412 return self._parse_tokens(self._parse_factor, self.TERM)
3413
3414 def _parse_factor(self) -> t.Optional[exp.Expression]:
3415 if self.EXPONENT:
3416 factor = self._parse_tokens(self._parse_exponent, self.FACTOR)
3417 else:
3418 factor = self._parse_tokens(self._parse_unary, self.FACTOR)
3419 if isinstance(factor, exp.Div):
3420 factor.args["typed"] = self.TYPED_DIVISION
3421 factor.args["safe"] = self.SAFE_DIVISION
3422 return factor
3423
3424 def _parse_exponent(self) -> t.Optional[exp.Expression]:
3425 return self._parse_tokens(self._parse_unary, self.EXPONENT)
3426
3427 def _parse_unary(self) -> t.Optional[exp.Expression]:
3428 if self._match_set(self.UNARY_PARSERS):
3429 return self.UNARY_PARSERS[self._prev.token_type](self)
3430 return self._parse_at_time_zone(self._parse_type())
3431
3432 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]:
3433 interval = parse_interval and self._parse_interval()
3434 if interval:
3435 return interval
3436
3437 index = self._index
3438 data_type = self._parse_types(check_func=True, allow_identifiers=False)
3439 this = self._parse_column()
3440
3441 if data_type:
3442 if isinstance(this, exp.Literal):
3443 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
3444 if parser:
3445 return parser(self, this, data_type)
3446 return self.expression(exp.Cast, this=this, to=data_type)
3447 if not data_type.expressions:
3448 self._retreat(index)
3449 return self._parse_column()
3450 return self._parse_column_ops(data_type)
3451
3452 return this and self._parse_column_ops(this)
3453
3454 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]:
3455 this = self._parse_type()
3456 if not this:
3457 return None
3458
3459 return self.expression(
3460 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True)
3461 )
3462
3463 def _parse_types(
3464 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
3465 ) -> t.Optional[exp.Expression]:
3466 index = self._index
3467
3468 prefix = self._match_text_seq("SYSUDTLIB", ".")
3469
3470 if not self._match_set(self.TYPE_TOKENS):
3471 identifier = allow_identifiers and self._parse_id_var(
3472 any_token=False, tokens=(TokenType.VAR,)
3473 )
3474
3475 if identifier:
3476 tokens = self._tokenizer.tokenize(identifier.name)
3477
3478 if len(tokens) != 1:
3479 self.raise_error("Unexpected identifier", self._prev)
3480
3481 if tokens[0].token_type in self.TYPE_TOKENS:
3482 self._prev = tokens[0]
3483 elif self.SUPPORTS_USER_DEFINED_TYPES:
3484 type_name = identifier.name
3485
3486 while self._match(TokenType.DOT):
3487 type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
3488
3489 return exp.DataType.build(type_name, udt=True)
3490 else:
3491 return None
3492 else:
3493 return None
3494
3495 type_token = self._prev.token_type
3496
3497 if type_token == TokenType.PSEUDO_TYPE:
3498 return self.expression(exp.PseudoType, this=self._prev.text)
3499
3500 if type_token == TokenType.OBJECT_IDENTIFIER:
3501 return self.expression(exp.ObjectIdentifier, this=self._prev.text)
3502
3503 nested = type_token in self.NESTED_TYPE_TOKENS
3504 is_struct = type_token in self.STRUCT_TYPE_TOKENS
3505 expressions = None
3506 maybe_func = False
3507
3508 if self._match(TokenType.L_PAREN):
3509 if is_struct:
3510 expressions = self._parse_csv(self._parse_struct_types)
3511 elif nested:
3512 expressions = self._parse_csv(
3513 lambda: self._parse_types(
3514 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
3515 )
3516 )
3517 elif type_token in self.ENUM_TYPE_TOKENS:
3518 expressions = self._parse_csv(self._parse_equality)
3519 else:
3520 expressions = self._parse_csv(self._parse_type_size)
3521
3522 if not expressions or not self._match(TokenType.R_PAREN):
3523 self._retreat(index)
3524 return None
3525
3526 maybe_func = True
3527
3528 this: t.Optional[exp.Expression] = None
3529 values: t.Optional[t.List[exp.Expression]] = None
3530
3531 if nested and self._match(TokenType.LT):
3532 if is_struct:
3533 expressions = self._parse_csv(self._parse_struct_types)
3534 else:
3535 expressions = self._parse_csv(
3536 lambda: self._parse_types(
3537 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
3538 )
3539 )
3540
3541 if not self._match(TokenType.GT):
3542 self.raise_error("Expecting >")
3543
3544 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
3545 values = self._parse_csv(self._parse_conjunction)
3546 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
3547
3548 if type_token in self.TIMESTAMPS:
3549 if self._match_text_seq("WITH", "TIME", "ZONE"):
3550 maybe_func = False
3551 tz_type = (
3552 exp.DataType.Type.TIMETZ
3553 if type_token in self.TIMES
3554 else exp.DataType.Type.TIMESTAMPTZ
3555 )
3556 this = exp.DataType(this=tz_type, expressions=expressions)
3557 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"):
3558 maybe_func = False
3559 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
3560 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
3561 maybe_func = False
3562 elif type_token == TokenType.INTERVAL:
3563 unit = self._parse_var()
3564
3565 if self._match_text_seq("TO"):
3566 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())]
3567 else:
3568 span = None
3569
3570 if span or not unit:
3571 this = self.expression(
3572 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span
3573 )
3574 else:
3575 this = self.expression(exp.Interval, unit=unit)
3576
3577 if maybe_func and check_func:
3578 index2 = self._index
3579 peek = self._parse_string()
3580
3581 if not peek:
3582 self._retreat(index)
3583 return None
3584
3585 self._retreat(index2)
3586
3587 if not this:
3588 if self._match_text_seq("UNSIGNED"):
3589 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token)
3590 if not unsigned_type_token:
3591 self.raise_error(f"Cannot convert {type_token.value} to unsigned.")
3592
3593 type_token = unsigned_type_token or type_token
3594
3595 this = exp.DataType(
3596 this=exp.DataType.Type[type_token.value],
3597 expressions=expressions,
3598 nested=nested,
3599 values=values,
3600 prefix=prefix,
3601 )
3602
3603 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
3604 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True)
3605
3606 return this
3607
3608 def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3609 this = self._parse_type(parse_interval=False) or self._parse_id_var()
3610 self._match(TokenType.COLON)
3611 return self._parse_column_def(this)
3612
3613 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3614 if not self._match_text_seq("AT", "TIME", "ZONE"):
3615 return this
3616 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3617
3618 def _parse_column(self) -> t.Optional[exp.Expression]:
3619 this = self._parse_field()
3620 if isinstance(this, exp.Identifier):
3621 this = self.expression(exp.Column, this=this)
3622 elif not this:
3623 return self._parse_bracket(this)
3624 return self._parse_column_ops(this)
3625
3626 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3627 this = self._parse_bracket(this)
3628
3629 while self._match_set(self.COLUMN_OPERATORS):
3630 op_token = self._prev.token_type
3631 op = self.COLUMN_OPERATORS.get(op_token)
3632
3633 if op_token == TokenType.DCOLON:
3634 field = self._parse_types()
3635 if not field:
3636 self.raise_error("Expected type")
3637 elif op and self._curr:
3638 self._advance()
3639 value = self._prev.text
3640 field = (
3641 exp.Literal.number(value)
3642 if self._prev.token_type == TokenType.NUMBER
3643 else exp.Literal.string(value)
3644 )
3645 else:
3646 field = self._parse_field(anonymous_func=True, any_token=True)
3647
3648 if isinstance(field, exp.Func):
3649 # bigquery allows function calls like x.y.count(...)
3650 # SAFE.SUBSTR(...)
3651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3652 this = self._replace_columns_with_dots(this)
3653
3654 if op:
3655 this = op(self, this, field)
3656 elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3657 this = self.expression(
3658 exp.Column,
3659 this=field,
3660 table=this.this,
3661 db=this.args.get("table"),
3662 catalog=this.args.get("db"),
3663 )
3664 else:
3665 this = self.expression(exp.Dot, this=this, expression=field)
3666 this = self._parse_bracket(this)
3667 return this
3668
3669 def _parse_primary(self) -> t.Optional[exp.Expression]:
3670 if self._match_set(self.PRIMARY_PARSERS):
3671 token_type = self._prev.token_type
3672 primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3673
3674 if token_type == TokenType.STRING:
3675 expressions = [primary]
3676 while self._match(TokenType.STRING):
3677 expressions.append(exp.Literal.string(self._prev.text))
3678
3679 if len(expressions) > 1:
3680 return self.expression(exp.Concat, expressions=expressions)
3681
3682 return primary
3683
3684 if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3685 return exp.Literal.number(f"0.{self._prev.text}")
3686
3687 if self._match(TokenType.L_PAREN):
3688 comments = self._prev_comments
3689 query = self._parse_select()
3690
3691 if query:
3692 expressions = [query]
3693 else:
3694 expressions = self._parse_expressions()
3695
3696 this = self._parse_query_modifiers(seq_get(expressions, 0))
3697
3698 if isinstance(this, exp.Subqueryable):
3699 this = self._parse_set_operations(
3700 self._parse_subquery(this=this, parse_alias=False)
3701 )
3702 elif len(expressions) > 1:
3703 this = self.expression(exp.Tuple, expressions=expressions)
3704 else:
3705 this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3706
3707 if this:
3708 this.add_comments(comments)
3709
3710 self._match_r_paren(expression=this)
3711 return this
3712
3713 return None
3714
3715 def _parse_field(
3716 self,
3717 any_token: bool = False,
3718 tokens: t.Optional[t.Collection[TokenType]] = None,
3719 anonymous_func: bool = False,
3720 ) -> t.Optional[exp.Expression]:
3721 return (
3722 self._parse_primary()
3723 or self._parse_function(anonymous=anonymous_func)
3724 or self._parse_id_var(any_token=any_token, tokens=tokens)
3725 )
3726
3727 def _parse_function(
3728 self,
3729 functions: t.Optional[t.Dict[str, t.Callable]] = None,
3730 anonymous: bool = False,
3731 optional_parens: bool = True,
3732 ) -> t.Optional[exp.Expression]:
3733 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this)
3734 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences
3735 fn_syntax = False
3736 if (
3737 self._match(TokenType.L_BRACE, advance=False)
3738 and self._next
3739 and self._next.text.upper() == "FN"
3740 ):
3741 self._advance(2)
3742 fn_syntax = True
3743
3744 func = self._parse_function_call(
3745 functions=functions, anonymous=anonymous, optional_parens=optional_parens
3746 )
3747
3748 if fn_syntax:
3749 self._match(TokenType.R_BRACE)
3750
3751 return func
3752
3753 def _parse_function_call(
3754 self,
3755 functions: t.Optional[t.Dict[str, t.Callable]] = None,
3756 anonymous: bool = False,
3757 optional_parens: bool = True,
3758 ) -> t.Optional[exp.Expression]:
3759 if not self._curr:
3760 return None
3761
3762 token_type = self._curr.token_type
3763 this = self._curr.text
3764 upper = this.upper()
3765
3766 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper)
3767 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS:
3768 self._advance()
3769 return parser(self)
3770
3771 if not self._next or self._next.token_type != TokenType.L_PAREN:
3772 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS:
3773 self._advance()
3774 return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3775
3776 return None
3777
3778 if token_type not in self.FUNC_TOKENS:
3779 return None
3780
3781 self._advance(2)
3782
3783 parser = self.FUNCTION_PARSERS.get(upper)
3784 if parser and not anonymous:
3785 this = parser(self)
3786 else:
3787 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3788
3789 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3790 this = self.expression(subquery_predicate, this=self._parse_select())
3791 self._match_r_paren()
3792 return this
3793
3794 if functions is None:
3795 functions = self.FUNCTIONS
3796
3797 function = functions.get(upper)
3798
3799 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3800 args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3801
3802 if function and not anonymous:
3803 func = self.validate_expression(function(args), args)
3804 if not self.NORMALIZE_FUNCTIONS:
3805 func.meta["name"] = this
3806 this = func
3807 else:
3808 this = self.expression(exp.Anonymous, this=this, expressions=args)
3809
3810 self._match_r_paren(this)
3811 return self._parse_window(this)
3812
3813 def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3814 return self._parse_column_def(self._parse_id_var())
3815
3816 def _parse_user_defined_function(
3817 self, kind: t.Optional[TokenType] = None
3818 ) -> t.Optional[exp.Expression]:
3819 this = self._parse_id_var()
3820
3821 while self._match(TokenType.DOT):
3822 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3823
3824 if not self._match(TokenType.L_PAREN):
3825 return this
3826
3827 expressions = self._parse_csv(self._parse_function_parameter)
3828 self._match_r_paren()
3829 return self.expression(
3830 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3831 )
3832
3833 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier:
3834 literal = self._parse_primary()
3835 if literal:
3836 return self.expression(exp.Introducer, this=token.text, expression=literal)
3837
3838 return self.expression(exp.Identifier, this=token.text)
3839
3840 def _parse_session_parameter(self) -> exp.SessionParameter:
3841 kind = None
3842 this = self._parse_id_var() or self._parse_primary()
3843
3844 if this and self._match(TokenType.DOT):
3845 kind = this.name
3846 this = self._parse_var() or self._parse_primary()
3847
3848 return self.expression(exp.SessionParameter, this=this, kind=kind)
3849
3850 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3851 index = self._index
3852
3853 if self._match(TokenType.L_PAREN):
3854 expressions = t.cast(
3855 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var)
3856 )
3857
3858 if not self._match(TokenType.R_PAREN):
3859 self._retreat(index)
3860 else:
3861 expressions = [self._parse_id_var()]
3862
3863 if self._match_set(self.LAMBDAS):
3864 return self.LAMBDAS[self._prev.token_type](self, expressions)
3865
3866 self._retreat(index)
3867
3868 this: t.Optional[exp.Expression]