1from __future__ import annotations
2
3import logging
4import typing as t
5from collections import defaultdict
6
7from sqlglot import exp
8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
9from sqlglot.helper import apply_index_offset, ensure_list, seq_get
10from sqlglot.time import format_time
11from sqlglot.tokens import Token, Tokenizer, TokenType
12from sqlglot.trie import TrieResult, in_trie, new_trie
13
14if t.TYPE_CHECKING:
15 from sqlglot._typing import E, Lit
16 from sqlglot.dialects.dialect import Dialect, DialectType
17
18 T = t.TypeVar("T")
19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor)
20
21logger = logging.getLogger("sqlglot")
22
23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]]
24
25
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
27 if len(args) == 1 and args[0].is_star:
28 return exp.StarMap(this=args[0])
29
30 keys = []
31 values = []
32 for i in range(0, len(args), 2):
33 keys.append(args[i])
34 values.append(args[i + 1])
35
36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
37
38
39def build_like(args: t.List) -> exp.Escape | exp.Like:
40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
42
43
44def binary_range_parser(
45 expr_type: t.Type[exp.Expression], reverse_args: bool = False
46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
47 def _parse_binary_range(
48 self: Parser, this: t.Optional[exp.Expression]
49 ) -> t.Optional[exp.Expression]:
50 expression = self._parse_bitwise()
51 if reverse_args:
52 this, expression = expression, this
53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression))
54
55 return _parse_binary_range
56
57
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func:
59 # Default argument order is base, expression
60 this = seq_get(args, 0)
61 expression = seq_get(args, 1)
62
63 if expression:
64 if not dialect.LOG_BASE_FIRST:
65 this, expression = expression, this
66 return exp.Log(this=this, expression=expression)
67
68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
69
70
71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex:
72 arg = seq_get(args, 0)
73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg)
74
75
76def build_lower(args: t.List) -> exp.Lower | exp.Hex:
77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation
78 arg = seq_get(args, 0)
79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg)
80
81
82def build_upper(args: t.List) -> exp.Upper | exp.Hex:
83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation
84 arg = seq_get(args, 0)
85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg)
86
87
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
89 def _builder(args: t.List, dialect: Dialect) -> E:
90 expression = expr_type(
91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
92 )
93 if len(args) > 2 and expr_type is exp.JSONExtract:
94 expression.set("expressions", args[2:])
95
96 return expression
97
98 return _builder
99
100
101def build_mod(args: t.List) -> exp.Mod:
102 this = seq_get(args, 0)
103 expression = seq_get(args, 1)
104
105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7
106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this
107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression
108
109 return exp.Mod(this=this, expression=expression)
110
111
112def build_pad(args: t.List, is_left: bool = True):
113 return exp.Pad(
114 this=seq_get(args, 0),
115 expression=seq_get(args, 1),
116 fill_pattern=seq_get(args, 2),
117 is_left=is_left,
118 )
119
120
121def build_array_constructor(
122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect
123) -> exp.Expression:
124 array_exp = exp_class(expressions=args)
125
126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS:
127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET)
128
129 return array_exp
130
131
132def build_convert_timezone(
133 args: t.List, default_source_tz: t.Optional[str] = None
134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]:
135 if len(args) == 2:
136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None
137 return exp.ConvertTimezone(
138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1)
139 )
140
141 return exp.ConvertTimezone.from_arg_list(args)
142
143
144def build_trim(args: t.List, is_left: bool = True):
145 return exp.Trim(
146 this=seq_get(args, 0),
147 expression=seq_get(args, 1),
148 position="LEADING" if is_left else "TRAILING",
149 )
150
151
152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce:
153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl)
154
155
156def build_locate_strposition(args: t.List):
157 return exp.StrPosition(
158 this=seq_get(args, 1),
159 substr=seq_get(args, 0),
160 position=seq_get(args, 2),
161 )
162
163
164class _Parser(type):
165 def __new__(cls, clsname, bases, attrs):
166 klass = super().__new__(cls, clsname, bases, attrs)
167
168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS)
170
171 return klass
172
173
174class Parser(metaclass=_Parser):
175 """
176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
177
178 Args:
179 error_level: The desired error level.
180 Default: ErrorLevel.IMMEDIATE
181 error_message_context: The amount of context to capture from a query string when displaying
182 the error message (in number of characters).
183 Default: 100
184 max_errors: Maximum number of error messages to include in a raised ParseError.
185 This is only relevant if error_level is ErrorLevel.RAISE.
186 Default: 3
187 """
188
189 FUNCTIONS: t.Dict[str, t.Callable] = {
190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()},
191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce),
192 "ARRAY": lambda args, dialect: exp.Array(expressions=args),
193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg(
194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None
195 ),
196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg(
197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None
198 ),
199 "CHAR": lambda args: exp.Chr(expressions=args),
200 "CHR": lambda args: exp.Chr(expressions=args),
201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True),
202 "CONCAT": lambda args, dialect: exp.Concat(
203 expressions=args,
204 safe=not dialect.STRICT_STRING_CONCAT,
205 coalesce=dialect.CONCAT_COALESCE,
206 ),
207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs(
208 expressions=args,
209 safe=not dialect.STRICT_STRING_CONCAT,
210 coalesce=dialect.CONCAT_COALESCE,
211 ),
212 "CONVERT_TIMEZONE": build_convert_timezone,
213 "DATE_TO_DATE_STR": lambda args: exp.Cast(
214 this=seq_get(args, 0),
215 to=exp.DataType(this=exp.DataType.Type.TEXT),
216 ),
217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray(
218 start=seq_get(args, 0),
219 end=seq_get(args, 1),
220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")),
221 ),
222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
223 "HEX": build_hex,
224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract),
225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar),
226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
227 "LIKE": build_like,
228 "LOG": build_logarithm,
229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
231 "LOWER": build_lower,
232 "LPAD": lambda args: build_pad(args),
233 "LEFTPAD": lambda args: build_pad(args),
234 "LTRIM": lambda args: build_trim(args),
235 "MOD": build_mod,
236 "RIGHTPAD": lambda args: build_pad(args, is_left=False),
237 "RPAD": lambda args: build_pad(args, is_left=False),
238 "RTRIM": lambda args: build_trim(args, is_left=False),
239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0))
240 if len(args) != 2
241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)),
242 "STRPOS": exp.StrPosition.from_arg_list,
243 "CHARINDEX": lambda args: build_locate_strposition(args),
244 "INSTR": exp.StrPosition.from_arg_list,
245 "LOCATE": lambda args: build_locate_strposition(args),
246 "TIME_TO_TIME_STR": lambda args: exp.Cast(
247 this=seq_get(args, 0),
248 to=exp.DataType(this=exp.DataType.Type.TEXT),
249 ),
250 "TO_HEX": build_hex,
251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
252 this=exp.Cast(
253 this=seq_get(args, 0),
254 to=exp.DataType(this=exp.DataType.Type.TEXT),
255 ),
256 start=exp.Literal.number(1),
257 length=exp.Literal.number(10),
258 ),
259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))),
260 "UPPER": build_upper,
261 "VAR_MAP": build_var_map,
262 }
263
264 NO_PAREN_FUNCTIONS = {
265 TokenType.CURRENT_DATE: exp.CurrentDate,
266 TokenType.CURRENT_DATETIME: exp.CurrentDate,
267 TokenType.CURRENT_TIME: exp.CurrentTime,
268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
269 TokenType.CURRENT_USER: exp.CurrentUser,
270 }
271
272 STRUCT_TYPE_TOKENS = {
273 TokenType.NESTED,
274 TokenType.OBJECT,
275 TokenType.STRUCT,
276 TokenType.UNION,
277 }
278
279 NESTED_TYPE_TOKENS = {
280 TokenType.ARRAY,
281 TokenType.LIST,
282 TokenType.LOWCARDINALITY,
283 TokenType.MAP,
284 TokenType.NULLABLE,
285 TokenType.RANGE,
286 *STRUCT_TYPE_TOKENS,
287 }
288
289 ENUM_TYPE_TOKENS = {
290 TokenType.DYNAMIC,
291 TokenType.ENUM,
292 TokenType.ENUM8,
293 TokenType.ENUM16,
294 }
295
296 AGGREGATE_TYPE_TOKENS = {
297 TokenType.AGGREGATEFUNCTION,
298 TokenType.SIMPLEAGGREGATEFUNCTION,
299 }
300
301 TYPE_TOKENS = {
302 TokenType.BIT,
303 TokenType.BOOLEAN,
304 TokenType.TINYINT,
305 TokenType.UTINYINT,
306 TokenType.SMALLINT,
307 TokenType.USMALLINT,
308 TokenType.INT,
309 TokenType.UINT,
310 TokenType.BIGINT,
311 TokenType.UBIGINT,
312 TokenType.INT128,
313 TokenType.UINT128,
314 TokenType.INT256,
315 TokenType.UINT256,
316 TokenType.MEDIUMINT,
317 TokenType.UMEDIUMINT,
318 TokenType.FIXEDSTRING,
319 TokenType.FLOAT,
320 TokenType.DOUBLE,
321 TokenType.CHAR,
322 TokenType.NCHAR,
323 TokenType.VARCHAR,
324 TokenType.NVARCHAR,
325 TokenType.BPCHAR,
326 TokenType.TEXT,
327 TokenType.MEDIUMTEXT,
328 TokenType.LONGTEXT,
329 TokenType.MEDIUMBLOB,
330 TokenType.LONGBLOB,
331 TokenType.BINARY,
332 TokenType.VARBINARY,
333 TokenType.JSON,
334 TokenType.JSONB,
335 TokenType.INTERVAL,
336 TokenType.TINYBLOB,
337 TokenType.TINYTEXT,
338 TokenType.TIME,
339 TokenType.TIMETZ,
340 TokenType.TIMESTAMP,
341 TokenType.TIMESTAMP_S,
342 TokenType.TIMESTAMP_MS,
343 TokenType.TIMESTAMP_NS,
344 TokenType.TIMESTAMPTZ,
345 TokenType.TIMESTAMPLTZ,
346 TokenType.TIMESTAMPNTZ,
347 TokenType.DATETIME,
348 TokenType.DATETIME2,
349 TokenType.DATETIME64,
350 TokenType.SMALLDATETIME,
351 TokenType.DATE,
352 TokenType.DATE32,
353 TokenType.INT4RANGE,
354 TokenType.INT4MULTIRANGE,
355 TokenType.INT8RANGE,
356 TokenType.INT8MULTIRANGE,
357 TokenType.NUMRANGE,
358 TokenType.NUMMULTIRANGE,
359 TokenType.TSRANGE,
360 TokenType.TSMULTIRANGE,
361 TokenType.TSTZRANGE,
362 TokenType.TSTZMULTIRANGE,
363 TokenType.DATERANGE,
364 TokenType.DATEMULTIRANGE,
365 TokenType.DECIMAL,
366 TokenType.DECIMAL32,
367 TokenType.DECIMAL64,
368 TokenType.DECIMAL128,
369 TokenType.DECIMAL256,
370 TokenType.UDECIMAL,
371 TokenType.BIGDECIMAL,
372 TokenType.UUID,
373 TokenType.GEOGRAPHY,
374 TokenType.GEOMETRY,
375 TokenType.POINT,
376 TokenType.RING,
377 TokenType.LINESTRING,
378 TokenType.MULTILINESTRING,
379 TokenType.POLYGON,
380 TokenType.MULTIPOLYGON,
381 TokenType.HLLSKETCH,
382 TokenType.HSTORE,
383 TokenType.PSEUDO_TYPE,
384 TokenType.SUPER,
385 TokenType.SERIAL,
386 TokenType.SMALLSERIAL,
387 TokenType.BIGSERIAL,
388 TokenType.XML,
389 TokenType.YEAR,
390 TokenType.UNIQUEIDENTIFIER,
391 TokenType.USERDEFINED,
392 TokenType.MONEY,
393 TokenType.SMALLMONEY,
394 TokenType.ROWVERSION,
395 TokenType.IMAGE,
396 TokenType.VARIANT,
397 TokenType.VECTOR,
398 TokenType.OBJECT,
399 TokenType.OBJECT_IDENTIFIER,
400 TokenType.INET,
401 TokenType.IPADDRESS,
402 TokenType.IPPREFIX,
403 TokenType.IPV4,
404 TokenType.IPV6,
405 TokenType.UNKNOWN,
406 TokenType.NULL,
407 TokenType.NAME,
408 TokenType.TDIGEST,
409 TokenType.DYNAMIC,
410 *ENUM_TYPE_TOKENS,
411 *NESTED_TYPE_TOKENS,
412 *AGGREGATE_TYPE_TOKENS,
413 }
414
415 SIGNED_TO_UNSIGNED_TYPE_TOKEN = {
416 TokenType.BIGINT: TokenType.UBIGINT,
417 TokenType.INT: TokenType.UINT,
418 TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
419 TokenType.SMALLINT: TokenType.USMALLINT,
420 TokenType.TINYINT: TokenType.UTINYINT,
421 TokenType.DECIMAL: TokenType.UDECIMAL,
422 }
423
424 SUBQUERY_PREDICATES = {
425 TokenType.ANY: exp.Any,
426 TokenType.ALL: exp.All,
427 TokenType.EXISTS: exp.Exists,
428 TokenType.SOME: exp.Any,
429 }
430
431 RESERVED_TOKENS = {
432 *Tokenizer.SINGLE_TOKENS.values(),
433 TokenType.SELECT,
434 } - {TokenType.IDENTIFIER}
435
436 DB_CREATABLES = {
437 TokenType.DATABASE,
438 TokenType.DICTIONARY,
439 TokenType.MODEL,
440 TokenType.NAMESPACE,
441 TokenType.SCHEMA,
442 TokenType.SEQUENCE,
443 TokenType.SINK,
444 TokenType.SOURCE,
445 TokenType.STORAGE_INTEGRATION,
446 TokenType.STREAMLIT,
447 TokenType.TABLE,
448 TokenType.TAG,
449 TokenType.VIEW,
450 TokenType.WAREHOUSE,
451 }
452
453 CREATABLES = {
454 TokenType.COLUMN,
455 TokenType.CONSTRAINT,
456 TokenType.FOREIGN_KEY,
457 TokenType.FUNCTION,
458 TokenType.INDEX,
459 TokenType.PROCEDURE,
460 *DB_CREATABLES,
461 }
462
463 ALTERABLES = {
464 TokenType.INDEX,
465 TokenType.TABLE,
466 TokenType.VIEW,
467 }
468
469 # Tokens that can represent identifiers
470 ID_VAR_TOKENS = {
471 TokenType.ALL,
472 TokenType.ATTACH,
473 TokenType.VAR,
474 TokenType.ANTI,
475 TokenType.APPLY,
476 TokenType.ASC,
477 TokenType.ASOF,
478 TokenType.AUTO_INCREMENT,
479 TokenType.BEGIN,
480 TokenType.BPCHAR,
481 TokenType.CACHE,
482 TokenType.CASE,
483 TokenType.COLLATE,
484 TokenType.COMMAND,
485 TokenType.COMMENT,
486 TokenType.COMMIT,
487 TokenType.CONSTRAINT,
488 TokenType.COPY,
489 TokenType.CUBE,
490 TokenType.CURRENT_SCHEMA,
491 TokenType.DEFAULT,
492 TokenType.DELETE,
493 TokenType.DESC,
494 TokenType.DESCRIBE,
495 TokenType.DETACH,
496 TokenType.DICTIONARY,
497 TokenType.DIV,
498 TokenType.END,
499 TokenType.EXECUTE,
500 TokenType.EXPORT,
501 TokenType.ESCAPE,
502 TokenType.FALSE,
503 TokenType.FIRST,
504 TokenType.FILTER,
505 TokenType.FINAL,
506 TokenType.FORMAT,
507 TokenType.FULL,
508 TokenType.IDENTIFIER,
509 TokenType.IS,
510 TokenType.ISNULL,
511 TokenType.INTERVAL,
512 TokenType.KEEP,
513 TokenType.KILL,
514 TokenType.LEFT,
515 TokenType.LIMIT,
516 TokenType.LOAD,
517 TokenType.MERGE,
518 TokenType.NATURAL,
519 TokenType.NEXT,
520 TokenType.OFFSET,
521 TokenType.OPERATOR,
522 TokenType.ORDINALITY,
523 TokenType.OVERLAPS,
524 TokenType.OVERWRITE,
525 TokenType.PARTITION,
526 TokenType.PERCENT,
527 TokenType.PIVOT,
528 TokenType.PRAGMA,
529 TokenType.RANGE,
530 TokenType.RECURSIVE,
531 TokenType.REFERENCES,
532 TokenType.REFRESH,
533 TokenType.RENAME,
534 TokenType.REPLACE,
535 TokenType.RIGHT,
536 TokenType.ROLLUP,
537 TokenType.ROW,
538 TokenType.ROWS,
539 TokenType.SEMI,
540 TokenType.SET,
541 TokenType.SETTINGS,
542 TokenType.SHOW,
543 TokenType.TEMPORARY,
544 TokenType.TOP,
545 TokenType.TRUE,
546 TokenType.TRUNCATE,
547 TokenType.UNIQUE,
548 TokenType.UNNEST,
549 TokenType.UNPIVOT,
550 TokenType.UPDATE,
551 TokenType.USE,
552 TokenType.VOLATILE,
553 TokenType.WINDOW,
554 *CREATABLES,
555 *SUBQUERY_PREDICATES,
556 *TYPE_TOKENS,
557 *NO_PAREN_FUNCTIONS,
558 }
559 ID_VAR_TOKENS.remove(TokenType.UNION)
560
561 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
562 TokenType.ANTI,
563 TokenType.APPLY,
564 TokenType.ASOF,
565 TokenType.FULL,
566 TokenType.LEFT,
567 TokenType.LOCK,
568 TokenType.NATURAL,
569 TokenType.RIGHT,
570 TokenType.SEMI,
571 TokenType.WINDOW,
572 }
573
574 ALIAS_TOKENS = ID_VAR_TOKENS
575
576 ARRAY_CONSTRUCTORS = {
577 "ARRAY": exp.Array,
578 "LIST": exp.List,
579 }
580
581 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
582
583 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
584
585 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
586
587 FUNC_TOKENS = {
588 TokenType.COLLATE,
589 TokenType.COMMAND,
590 TokenType.CURRENT_DATE,
591 TokenType.CURRENT_DATETIME,
592 TokenType.CURRENT_SCHEMA,
593 TokenType.CURRENT_TIMESTAMP,
594 TokenType.CURRENT_TIME,
595 TokenType.CURRENT_USER,
596 TokenType.FILTER,
597 TokenType.FIRST,
598 TokenType.FORMAT,
599 TokenType.GLOB,
600 TokenType.IDENTIFIER,
601 TokenType.INDEX,
602 TokenType.ISNULL,
603 TokenType.ILIKE,
604 TokenType.INSERT,
605 TokenType.LIKE,
606 TokenType.MERGE,
607 TokenType.NEXT,
608 TokenType.OFFSET,
609 TokenType.PRIMARY_KEY,
610 TokenType.RANGE,
611 TokenType.REPLACE,
612 TokenType.RLIKE,
613 TokenType.ROW,
614 TokenType.UNNEST,
615 TokenType.VAR,
616 TokenType.LEFT,
617 TokenType.RIGHT,
618 TokenType.SEQUENCE,
619 TokenType.DATE,
620 TokenType.DATETIME,
621 TokenType.TABLE,
622 TokenType.TIMESTAMP,
623 TokenType.TIMESTAMPTZ,
624 TokenType.TRUNCATE,
625 TokenType.WINDOW,
626 TokenType.XOR,
627 *TYPE_TOKENS,
628 *SUBQUERY_PREDICATES,
629 }
630
631 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = {
632 TokenType.AND: exp.And,
633 }
634
635 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = {
636 TokenType.COLON_EQ: exp.PropertyEQ,
637 }
638
639 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = {
640 TokenType.OR: exp.Or,
641 }
642
643 EQUALITY = {
644 TokenType.EQ: exp.EQ,
645 TokenType.NEQ: exp.NEQ,
646 TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
647 }
648
649 COMPARISON = {
650 TokenType.GT: exp.GT,
651 TokenType.GTE: exp.GTE,
652 TokenType.LT: exp.LT,
653 TokenType.LTE: exp.LTE,
654 }
655
656 BITWISE = {
657 TokenType.AMP: exp.BitwiseAnd,
658 TokenType.CARET: exp.BitwiseXor,
659 TokenType.PIPE: exp.BitwiseOr,
660 }
661
662 TERM = {
663 TokenType.DASH: exp.Sub,
664 TokenType.PLUS: exp.Add,
665 TokenType.MOD: exp.Mod,
666 TokenType.COLLATE: exp.Collate,
667 }
668
669 FACTOR = {
670 TokenType.DIV: exp.IntDiv,
671 TokenType.LR_ARROW: exp.Distance,
672 TokenType.SLASH: exp.Div,
673 TokenType.STAR: exp.Mul,
674 }
675
676 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {}
677
678 TIMES = {
679 TokenType.TIME,
680 TokenType.TIMETZ,
681 }
682
683 TIMESTAMPS = {
684 TokenType.TIMESTAMP,
685 TokenType.TIMESTAMPTZ,
686 TokenType.TIMESTAMPLTZ,
687 *TIMES,
688 }
689
690 SET_OPERATIONS = {
691 TokenType.UNION,
692 TokenType.INTERSECT,
693 TokenType.EXCEPT,
694 }
695
696 JOIN_METHODS = {
697 TokenType.ASOF,
698 TokenType.NATURAL,
699 TokenType.POSITIONAL,
700 }
701
702 JOIN_SIDES = {
703 TokenType.LEFT,
704 TokenType.RIGHT,
705 TokenType.FULL,
706 }
707
708 JOIN_KINDS = {
709 TokenType.ANTI,
710 TokenType.CROSS,
711 TokenType.INNER,
712 TokenType.OUTER,
713 TokenType.SEMI,
714 TokenType.STRAIGHT_JOIN,
715 }
716
717 JOIN_HINTS: t.Set[str] = set()
718
719 LAMBDAS = {
720 TokenType.ARROW: lambda self, expressions: self.expression(
721 exp.Lambda,
722 this=self._replace_lambda(
723 self._parse_assignment(),
724 expressions,
725 ),
726 expressions=expressions,
727 ),
728 TokenType.FARROW: lambda self, expressions: self.expression(
729 exp.Kwarg,
730 this=exp.var(expressions[0].name),
731 expression=self._parse_assignment(),
732 ),
733 }
734
735 COLUMN_OPERATORS = {
736 TokenType.DOT: None,
737 TokenType.DCOLON: lambda self, this, to: self.expression(
738 exp.Cast if self.STRICT_CAST else exp.TryCast,
739 this=this,
740 to=to,
741 ),
742 TokenType.ARROW: lambda self, this, path: self.expression(
743 exp.JSONExtract,
744 this=this,
745 expression=self.dialect.to_json_path(path),
746 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
747 ),
748 TokenType.DARROW: lambda self, this, path: self.expression(
749 exp.JSONExtractScalar,
750 this=this,
751 expression=self.dialect.to_json_path(path),
752 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
753 ),
754 TokenType.HASH_ARROW: lambda self, this, path: self.expression(
755 exp.JSONBExtract,
756 this=this,
757 expression=path,
758 ),
759 TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
760 exp.JSONBExtractScalar,
761 this=this,
762 expression=path,
763 ),
764 TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
765 exp.JSONBContains,
766 this=this,
767 expression=key,
768 ),
769 }
770
771 EXPRESSION_PARSERS = {
772 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
773 exp.Column: lambda self: self._parse_column(),
774 exp.Condition: lambda self: self._parse_assignment(),
775 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True),
776 exp.Expression: lambda self: self._parse_expression(),
777 exp.From: lambda self: self._parse_from(joins=True),
778 exp.Group: lambda self: self._parse_group(),
779 exp.Having: lambda self: self._parse_having(),
780 exp.Hint: lambda self: self._parse_hint_body(),
781 exp.Identifier: lambda self: self._parse_id_var(),
782 exp.Join: lambda self: self._parse_join(),
783 exp.Lambda: lambda self: self._parse_lambda(),
784 exp.Lateral: lambda self: self._parse_lateral(),
785 exp.Limit: lambda self: self._parse_limit(),
786 exp.Offset: lambda self: self._parse_offset(),
787 exp.Order: lambda self: self._parse_order(),
788 exp.Ordered: lambda self: self._parse_ordered(),
789 exp.Properties: lambda self: self._parse_properties(),
790 exp.Qualify: lambda self: self._parse_qualify(),
791 exp.Returning: lambda self: self._parse_returning(),
792 exp.Select: lambda self: self._parse_select(),
793 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
794 exp.Table: lambda self: self._parse_table_parts(),
795 exp.TableAlias: lambda self: self._parse_table_alias(),
796 exp.Tuple: lambda self: self._parse_value(),
797 exp.Whens: lambda self: self._parse_when_matched(),
798 exp.Where: lambda self: self._parse_where(),
799 exp.Window: lambda self: self._parse_named_window(),
800 exp.With: lambda self: self._parse_with(),
801 "JOIN_TYPE": lambda self: self._parse_join_parts(),
802 }
803
804 STATEMENT_PARSERS = {
805 TokenType.ALTER: lambda self: self._parse_alter(),
806 TokenType.ANALYZE: lambda self: self._parse_analyze(),
807 TokenType.BEGIN: lambda self: self._parse_transaction(),
808 TokenType.CACHE: lambda self: self._parse_cache(),
809 TokenType.COMMENT: lambda self: self._parse_comment(),
810 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
811 TokenType.COPY: lambda self: self._parse_copy(),
812 TokenType.CREATE: lambda self: self._parse_create(),
813 TokenType.DELETE: lambda self: self._parse_delete(),
814 TokenType.DESC: lambda self: self._parse_describe(),
815 TokenType.DESCRIBE: lambda self: self._parse_describe(),
816 TokenType.DROP: lambda self: self._parse_drop(),
817 TokenType.GRANT: lambda self: self._parse_grant(),
818 TokenType.INSERT: lambda self: self._parse_insert(),
819 TokenType.KILL: lambda self: self._parse_kill(),
820 TokenType.LOAD: lambda self: self._parse_load(),
821 TokenType.MERGE: lambda self: self._parse_merge(),
822 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
823 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
824 TokenType.REFRESH: lambda self: self._parse_refresh(),
825 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
826 TokenType.SET: lambda self: self._parse_set(),
827 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
828 TokenType.UNCACHE: lambda self: self._parse_uncache(),
829 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True),
830 TokenType.UPDATE: lambda self: self._parse_update(),
831 TokenType.USE: lambda self: self.expression(
832 exp.Use,
833 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
834 this=self._parse_table(schema=False),
835 ),
836 TokenType.SEMICOLON: lambda self: exp.Semicolon(),
837 }
838
839 UNARY_PARSERS = {
840 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op
841 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
842 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
843 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
844 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()),
845 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()),
846 }
847
848 STRING_PARSERS = {
849 TokenType.HEREDOC_STRING: lambda self, token: self.expression(
850 exp.RawString, this=token.text
851 ),
852 TokenType.NATIONAL_STRING: lambda self, token: self.expression(
853 exp.National, this=token.text
854 ),
855 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
856 TokenType.STRING: lambda self, token: self.expression(
857 exp.Literal, this=token.text, is_string=True
858 ),
859 TokenType.UNICODE_STRING: lambda self, token: self.expression(
860 exp.UnicodeString,
861 this=token.text,
862 escape=self._match_text_seq("UESCAPE") and self._parse_string(),
863 ),
864 }
865
866 NUMERIC_PARSERS = {
867 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
868 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
869 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
870 TokenType.NUMBER: lambda self, token: self.expression(
871 exp.Literal, this=token.text, is_string=False
872 ),
873 }
874
875 PRIMARY_PARSERS = {
876 **STRING_PARSERS,
877 **NUMERIC_PARSERS,
878 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
879 TokenType.NULL: lambda self, _: self.expression(exp.Null),
880 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
881 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
882 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
883 TokenType.STAR: lambda self, _: self._parse_star_ops(),
884 }
885
886 PLACEHOLDER_PARSERS = {
887 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
888 TokenType.PARAMETER: lambda self: self._parse_parameter(),
889 TokenType.COLON: lambda self: (
890 self.expression(exp.Placeholder, this=self._prev.text)
891 if self._match_set(self.ID_VAR_TOKENS)
892 else None
893 ),
894 }
895
896 RANGE_PARSERS = {
897 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll),
898 TokenType.BETWEEN: lambda self, this: self._parse_between(this),
899 TokenType.GLOB: binary_range_parser(exp.Glob),
900 TokenType.ILIKE: binary_range_parser(exp.ILike),
901 TokenType.IN: lambda self, this: self._parse_in(this),
902 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
903 TokenType.IS: lambda self, this: self._parse_is(this),
904 TokenType.LIKE: binary_range_parser(exp.Like),
905 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True),
906 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
907 TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
908 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
909 TokenType.FOR: lambda self, this: self._parse_comprehension(this),
910 }
911
912 PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
913 "ALLOWED_VALUES": lambda self: self.expression(
914 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary)
915 ),
916 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
917 "AUTO": lambda self: self._parse_auto_property(),
918 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
919 "BACKUP": lambda self: self.expression(
920 exp.BackupProperty, this=self._parse_var(any_token=True)
921 ),
922 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
923 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
924 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
925 "CHECKSUM": lambda self: self._parse_checksum(),
926 "CLUSTER BY": lambda self: self._parse_cluster(),
927 "CLUSTERED": lambda self: self._parse_clustered_by(),
928 "COLLATE": lambda self, **kwargs: self._parse_property_assignment(
929 exp.CollateProperty, **kwargs
930 ),
931 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
932 "CONTAINS": lambda self: self._parse_contains_property(),
933 "COPY": lambda self: self._parse_copy_property(),
934 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
935 "DATA_DELETION": lambda self: self._parse_data_deletion_property(),
936 "DEFINER": lambda self: self._parse_definer(),
937 "DETERMINISTIC": lambda self: self.expression(
938 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
939 ),
940 "DISTRIBUTED": lambda self: self._parse_distributed_property(),
941 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty),
942 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty),
943 "DISTKEY": lambda self: self._parse_distkey(),
944 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
945 "EMPTY": lambda self: self.expression(exp.EmptyProperty),
946 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
947 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
948 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
949 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
950 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
951 "FREESPACE": lambda self: self._parse_freespace(),
952 "GLOBAL": lambda self: self.expression(exp.GlobalProperty),
953 "HEAP": lambda self: self.expression(exp.HeapProperty),
954 "ICEBERG": lambda self: self.expression(exp.IcebergProperty),
955 "IMMUTABLE": lambda self: self.expression(
956 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
957 ),
958 "INHERITS": lambda self: self.expression(
959 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table)
960 ),
961 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()),
962 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
963 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
964 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
965 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
966 "LIKE": lambda self: self._parse_create_like(),
967 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
968 "LOCK": lambda self: self._parse_locking(),
969 "LOCKING": lambda self: self._parse_locking(),
970 "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
971 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
972 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
973 "MODIFIES": lambda self: self._parse_modifies_property(),
974 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
975 "NO": lambda self: self._parse_no_property(),
976 "ON": lambda self: self._parse_on_property(),
977 "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
978 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()),
979 "PARTITION": lambda self: self._parse_partitioned_of(),
980 "PARTITION BY": lambda self: self._parse_partitioned_by(),
981 "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
982 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
983 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
984 "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
985 "READS": lambda self: self._parse_reads_property(),
986 "REMOTE": lambda self: self._parse_remote_with_connection(),
987 "RETURNS": lambda self: self._parse_returns(),
988 "STRICT": lambda self: self.expression(exp.StrictProperty),
989 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty),
990 "ROW": lambda self: self._parse_row(),
991 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
992 "SAMPLE": lambda self: self.expression(
993 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise()
994 ),
995 "SECURE": lambda self: self.expression(exp.SecureProperty),
996 "SECURITY": lambda self: self._parse_security(),
997 "SET": lambda self: self.expression(exp.SetProperty, multi=False),
998 "SETTINGS": lambda self: self._parse_settings_property(),
999 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
1000 "SORTKEY": lambda self: self._parse_sortkey(),
1001 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
1002 "STABLE": lambda self: self.expression(
1003 exp.StabilityProperty, this=exp.Literal.string("STABLE")
1004 ),
1005 "STORED": lambda self: self._parse_stored(),
1006 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
1007 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
1008 "TEMP": lambda self: self.expression(exp.TemporaryProperty),
1009 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
1010 "TO": lambda self: self._parse_to_table(),
1011 "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
1012 "TRANSFORM": lambda self: self.expression(
1013 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression)
1014 ),
1015 "TTL": lambda self: self._parse_ttl(),
1016 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
1017 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty),
1018 "VOLATILE": lambda self: self._parse_volatile_property(),
1019 "WITH": lambda self: self._parse_with_property(),
1020 }
1021
1022 CONSTRAINT_PARSERS = {
1023 "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
1024 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
1025 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
1026 "CHARACTER SET": lambda self: self.expression(
1027 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
1028 ),
1029 "CHECK": lambda self: self.expression(
1030 exp.CheckColumnConstraint,
1031 this=self._parse_wrapped(self._parse_assignment),
1032 enforced=self._match_text_seq("ENFORCED"),
1033 ),
1034 "COLLATE": lambda self: self.expression(
1035 exp.CollateColumnConstraint,
1036 this=self._parse_identifier() or self._parse_column(),
1037 ),
1038 "COMMENT": lambda self: self.expression(
1039 exp.CommentColumnConstraint, this=self._parse_string()
1040 ),
1041 "COMPRESS": lambda self: self._parse_compress(),
1042 "CLUSTERED": lambda self: self.expression(
1043 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
1044 ),
1045 "NONCLUSTERED": lambda self: self.expression(
1046 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
1047 ),
1048 "DEFAULT": lambda self: self.expression(
1049 exp.DefaultColumnConstraint, this=self._parse_bitwise()
1050 ),
1051 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
1052 "EPHEMERAL": lambda self: self.expression(
1053 exp.EphemeralColumnConstraint, this=self._parse_bitwise()
1054 ),
1055 "EXCLUDE": lambda self: self.expression(
1056 exp.ExcludeColumnConstraint, this=self._parse_index_params()
1057 ),
1058 "FOREIGN KEY": lambda self: self._parse_foreign_key(),
1059 "FORMAT": lambda self: self.expression(
1060 exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
1061 ),
1062 "GENERATED": lambda self: self._parse_generated_as_identity(),
1063 "IDENTITY": lambda self: self._parse_auto_increment(),
1064 "INLINE": lambda self: self._parse_inline(),
1065 "LIKE": lambda self: self._parse_create_like(),
1066 "NOT": lambda self: self._parse_not_constraint(),
1067 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
1068 "ON": lambda self: (
1069 self._match(TokenType.UPDATE)
1070 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function())
1071 )
1072 or self.expression(exp.OnProperty, this=self._parse_id_var()),
1073 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
1074 "PERIOD": lambda self: self._parse_period_for_system_time(),
1075 "PRIMARY KEY": lambda self: self._parse_primary_key(),
1076 "REFERENCES": lambda self: self._parse_references(match=False),
1077 "TITLE": lambda self: self.expression(
1078 exp.TitleColumnConstraint, this=self._parse_var_or_string()
1079 ),
1080 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
1081 "UNIQUE": lambda self: self._parse_unique(),
1082 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
1083 "WATERMARK": lambda self: self.expression(
1084 exp.WatermarkColumnConstraint,
1085 this=self._match(TokenType.FOR) and self._parse_column(),
1086 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(),
1087 ),
1088 "WITH": lambda self: self.expression(
1089 exp.Properties, expressions=self._parse_wrapped_properties()
1090 ),
1091 }
1092
1093 ALTER_PARSERS = {
1094 "ADD": lambda self: self._parse_alter_table_add(),
1095 "AS": lambda self: self._parse_select(),
1096 "ALTER": lambda self: self._parse_alter_table_alter(),
1097 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True),
1098 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
1099 "DROP": lambda self: self._parse_alter_table_drop(),
1100 "RENAME": lambda self: self._parse_alter_table_rename(),
1101 "SET": lambda self: self._parse_alter_table_set(),
1102 "SWAP": lambda self: self.expression(
1103 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True)
1104 ),
1105 }
1106
1107 ALTER_ALTER_PARSERS = {
1108 "DISTKEY": lambda self: self._parse_alter_diststyle(),
1109 "DISTSTYLE": lambda self: self._parse_alter_diststyle(),
1110 "SORTKEY": lambda self: self._parse_alter_sortkey(),
1111 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True),
1112 }
1113
1114 SCHEMA_UNNAMED_CONSTRAINTS = {
1115 "CHECK",
1116 "EXCLUDE",
1117 "FOREIGN KEY",
1118 "LIKE",
1119 "PERIOD",
1120 "PRIMARY KEY",
1121 "UNIQUE",
1122 "WATERMARK",
1123 }
1124
1125 NO_PAREN_FUNCTION_PARSERS = {
1126 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
1127 "CASE": lambda self: self._parse_case(),
1128 "CONNECT_BY_ROOT": lambda self: self.expression(
1129 exp.ConnectByRoot, this=self._parse_column()
1130 ),
1131 "IF": lambda self: self._parse_if(),
1132 }
1133
1134 INVALID_FUNC_NAME_TOKENS = {
1135 TokenType.IDENTIFIER,
1136 TokenType.STRING,
1137 }
1138
1139 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
1140
1141 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice)
1142
1143 FUNCTION_PARSERS = {
1144 "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
1145 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil),
1146 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
1147 "DECODE": lambda self: self._parse_decode(),
1148 "EXTRACT": lambda self: self._parse_extract(),
1149 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor),
1150 "GAP_FILL": lambda self: self._parse_gap_fill(),
1151 "JSON_OBJECT": lambda self: self._parse_json_object(),
1152 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True),
1153 "JSON_TABLE": lambda self: self._parse_json_table(),
1154 "MATCH": lambda self: self._parse_match_against(),
1155 "NORMALIZE": lambda self: self._parse_normalize(),
1156 "OPENJSON": lambda self: self._parse_open_json(),
1157 "OVERLAY": lambda self: self._parse_overlay(),
1158 "POSITION": lambda self: self._parse_position(),
1159 "PREDICT": lambda self: self._parse_predict(),
1160 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True),
1161 "STRING_AGG": lambda self: self._parse_string_agg(),
1162 "SUBSTRING": lambda self: self._parse_substring(),
1163 "TRIM": lambda self: self._parse_trim(),
1164 "TRY_CAST": lambda self: self._parse_cast(False, safe=True),
1165 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True),
1166 "XMLELEMENT": lambda self: self.expression(
1167 exp.XMLElement,
1168 this=self._match_text_seq("NAME") and self._parse_id_var(),
1169 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression),
1170 ),
1171 "XMLTABLE": lambda self: self._parse_xml_table(),
1172 }
1173
1174 QUERY_MODIFIER_PARSERS = {
1175 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()),
1176 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()),
1177 TokenType.WHERE: lambda self: ("where", self._parse_where()),
1178 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()),
1179 TokenType.HAVING: lambda self: ("having", self._parse_having()),
1180 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()),
1181 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()),
1182 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()),
1183 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()),
1184 TokenType.FETCH: lambda self: ("limit", self._parse_limit()),
1185 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()),
1186 TokenType.FOR: lambda self: ("locks", self._parse_locks()),
1187 TokenType.LOCK: lambda self: ("locks", self._parse_locks()),
1188 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
1189 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
1190 TokenType.CLUSTER_BY: lambda self: (
1191 "cluster",
1192 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
1193 ),
1194 TokenType.DISTRIBUTE_BY: lambda self: (
1195 "distribute",
1196 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
1197 ),
1198 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
1199 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
1200 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
1201 }
1202
1203 SET_PARSERS = {
1204 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
1205 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
1206 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
1207 "TRANSACTION": lambda self: self._parse_set_transaction(),
1208 }
1209
1210 SHOW_PARSERS: t.Dict[str, t.Callable] = {}
1211
1212 TYPE_LITERAL_PARSERS = {
1213 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this),
1214 }
1215
1216 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {}
1217
1218 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
1219
1220 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
1221
1222 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
1223 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = {
1224 "ISOLATION": (
1225 ("LEVEL", "REPEATABLE", "READ"),
1226 ("LEVEL", "READ", "COMMITTED"),
1227 ("LEVEL", "READ", "UNCOMITTED"),
1228 ("LEVEL", "SERIALIZABLE"),
1229 ),
1230 "READ": ("WRITE", "ONLY"),
1231 }
1232
1233 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys(
1234 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()
1235 )
1236 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE")
1237
1238 CREATE_SEQUENCE: OPTIONS_TYPE = {
1239 "SCALE": ("EXTEND", "NOEXTEND"),
1240 "SHARD": ("EXTEND", "NOEXTEND"),
1241 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
1242 **dict.fromkeys(
1243 (
1244 "SESSION",
1245 "GLOBAL",
1246 "KEEP",
1247 "NOKEEP",
1248 "ORDER",
1249 "NOORDER",
1250 "NOCACHE",
1251 "CYCLE",
1252 "NOCYCLE",
1253 "NOMINVALUE",
1254 "NOMAXVALUE",
1255 "NOSCALE",
1256 "NOSHARD",
1257 ),
1258 tuple(),
1259 ),
1260 }
1261
1262 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")}
1263
1264 USABLES: OPTIONS_TYPE = dict.fromkeys(
1265 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple()
1266 )
1267
1268 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
1269
1270 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = {
1271 "TYPE": ("EVOLUTION",),
1272 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()),
1273 }
1274
1275 PROCEDURE_OPTIONS: OPTIONS_TYPE = {}
1276
1277 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple())
1278
1279 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = {
1280 "NOT": ("ENFORCED",),
1281 "MATCH": (
1282 "FULL",
1283 "PARTIAL",
1284 "SIMPLE",
1285 ),
1286 "INITIALLY": ("DEFERRED", "IMMEDIATE"),
1287 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()),
1288 }
1289
1290 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
1291
1292 CLONE_KEYWORDS = {"CLONE", "COPY"}
1293 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"}
1294 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
1295
1296 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"}
1297
1298 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
1299
1300 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
1301
1302 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
1303
1304 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
1305 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
1306 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
1307
1308 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS}
1309
1310 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT}
1311
1312 ADD_CONSTRAINT_TOKENS = {
1313 TokenType.CONSTRAINT,
1314 TokenType.FOREIGN_KEY,
1315 TokenType.INDEX,
1316 TokenType.KEY,
1317 TokenType.PRIMARY_KEY,
1318 TokenType.UNIQUE,
1319 }
1320
1321 DISTINCT_TOKENS = {TokenType.DISTINCT}
1322
1323 NULL_TOKENS = {TokenType.NULL}
1324
1325 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
1326
1327 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
1328
1329 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"}
1330
1331 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"}
1332
1333 ODBC_DATETIME_LITERALS = {
1334 "d": exp.Date,
1335 "t": exp.Time,
1336 "ts": exp.Timestamp,
1337 }
1338
1339 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"}
1340
1341 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN}
1342
1343 # The style options for the DESCRIBE statement
1344 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"}
1345
1346 # The style options for the ANALYZE statement
1347 ANALYZE_STYLES = {
1348 "BUFFER_USAGE_LIMIT",
1349 "FULL",
1350 "LOCAL",
1351 "NO_WRITE_TO_BINLOG",
1352 "SAMPLE",
1353 "SKIP_LOCKED",
1354 "VERBOSE",
1355 }
1356
1357 ANALYZE_EXPRESSION_PARSERS = {
1358 "ALL": lambda self: self._parse_analyze_columns(),
1359 "COMPUTE": lambda self: self._parse_analyze_statistics(),
1360 "DELETE": lambda self: self._parse_analyze_delete(),
1361 "DROP": lambda self: self._parse_analyze_histogram(),
1362 "ESTIMATE": lambda self: self._parse_analyze_statistics(),
1363 "LIST": lambda self: self._parse_analyze_list(),
1364 "PREDICATE": lambda self: self._parse_analyze_columns(),
1365 "UPDATE": lambda self: self._parse_analyze_histogram(),
1366 "VALIDATE": lambda self: self._parse_analyze_validate(),
1367 }
1368
1369 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"}
1370
1371 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET)
1372
1373 OPERATION_MODIFIERS: t.Set[str] = set()
1374
1375 STRICT_CAST = True
1376
1377 PREFIXED_PIVOT_COLUMNS = False
1378 IDENTIFY_PIVOT_STRINGS = False
1379
1380 LOG_DEFAULTS_TO_LN = False
1381
1382 # Whether ADD is present for each column added by ALTER TABLE
1383 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True
1384
1385 # Whether the table sample clause expects CSV syntax
1386 TABLESAMPLE_CSV = False
1387
1388 # The default method used for table sampling
1389 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None
1390
1391 # Whether the SET command needs a delimiter (e.g. "=") for assignments
1392 SET_REQUIRES_ASSIGNMENT_DELIMITER = True
1393
1394 # Whether the TRIM function expects the characters to trim as its first argument
1395 TRIM_PATTERN_FIRST = False
1396
1397 # Whether string aliases are supported `SELECT COUNT(*) 'count'`
1398 STRING_ALIASES = False
1399
1400 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
1401 MODIFIERS_ATTACHED_TO_SET_OP = True
1402 SET_OP_MODIFIERS = {"order", "limit", "offset"}
1403
1404 # Whether to parse IF statements that aren't followed by a left parenthesis as commands
1405 NO_PAREN_IF_COMMANDS = True
1406
1407 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres)
1408 JSON_ARROWS_REQUIRE_JSON_TYPE = False
1409
1410 # Whether the `:` operator is used to extract a value from a VARIANT column
1411 COLON_IS_VARIANT_EXTRACT = False
1412
1413 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause.
1414 # If this is True and '(' is not found, the keyword will be treated as an identifier
1415 VALUES_FOLLOWED_BY_PAREN = True
1416
1417 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
1418 SUPPORTS_IMPLICIT_UNNEST = False
1419
1420 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
1421 INTERVAL_SPANS = True
1422
1423 # Whether a PARTITION clause can follow a table reference
1424 SUPPORTS_PARTITION_SELECTION = False
1425
1426 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr`
1427 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True
1428
1429 # Whether the 'AS' keyword is optional in the CTE definition syntax
1430 OPTIONAL_ALIAS_TOKEN_CTE = True
1431
1432 __slots__ = (
1433 "error_level",
1434 "error_message_context",
1435 "max_errors",
1436 "dialect",
1437 "sql",
1438 "errors",
1439 "_tokens",
1440 "_index",
1441 "_curr",
1442 "_next",
1443 "_prev",
1444 "_prev_comments",
1445 )
1446
1447 # Autofilled
1448 SHOW_TRIE: t.Dict = {}
1449 SET_TRIE: t.Dict = {}
1450
1451 def __init__(
1452 self,
1453 error_level: t.Optional[ErrorLevel] = None,
1454 error_message_context: int = 100,
1455 max_errors: int = 3,
1456 dialect: DialectType = None,
1457 ):
1458 from sqlglot.dialects import Dialect
1459
1460 self.error_level = error_level or ErrorLevel.IMMEDIATE
1461 self.error_message_context = error_message_context
1462 self.max_errors = max_errors
1463 self.dialect = Dialect.get_or_raise(dialect)
1464 self.reset()
1465
1466 def reset(self):
1467 self.sql = ""
1468 self.errors = []
1469 self._tokens = []
1470 self._index = 0
1471 self._curr = None
1472 self._next = None
1473 self._prev = None
1474 self._prev_comments = None
1475
1476 def parse(
1477 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
1478 ) -> t.List[t.Optional[exp.Expression]]:
1479 """
1480 Parses a list of tokens and returns a list of syntax trees, one tree
1481 per parsed SQL statement.
1482
1483 Args:
1484 raw_tokens: The list of tokens.
1485 sql: The original SQL string, used to produce helpful debug messages.
1486
1487 Returns:
1488 The list of the produced syntax trees.
1489 """
1490 return self._parse(
1491 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
1492 )
1493
1494 def parse_into(
1495 self,
1496 expression_types: exp.IntoType,
1497 raw_tokens: t.List[Token],
1498 sql: t.Optional[str] = None,
1499 ) -> t.List[t.Optional[exp.Expression]]:
1500 """
1501 Parses a list of tokens into a given Expression type. If a collection of Expression
1502 types is given instead, this method will try to parse the token list into each one
1503 of them, stopping at the first for which the parsing succeeds.
1504
1505 Args:
1506 expression_types: The expression type(s) to try and parse the token list into.
1507 raw_tokens: The list of tokens.
1508 sql: The original SQL string, used to produce helpful debug messages.
1509
1510 Returns:
1511 The target Expression.
1512 """
1513 errors = []
1514 for expression_type in ensure_list(expression_types):
1515 parser = self.EXPRESSION_PARSERS.get(expression_type)
1516 if not parser:
1517 raise TypeError(f"No parser registered for {expression_type}")
1518
1519 try:
1520 return self._parse(parser, raw_tokens, sql)
1521 except ParseError as e:
1522 e.errors[0]["into_expression"] = expression_type
1523 errors.append(e)
1524
1525 raise ParseError(
1526 f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
1527 errors=merge_errors(errors),
1528 ) from errors[-1]
1529
1530 def _parse(
1531 self,
1532 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
1533 raw_tokens: t.List[Token],
1534 sql: t.Optional[str] = None,
1535 ) -> t.List[t.Optional[exp.Expression]]:
1536 self.reset()
1537 self.sql = sql or ""
1538
1539 total = len(raw_tokens)
1540 chunks: t.List[t.List[Token]] = [[]]
1541
1542 for i, token in enumerate(raw_tokens):
1543 if token.token_type == TokenType.SEMICOLON:
1544 if token.comments:
1545 chunks.append([token])
1546
1547 if i < total - 1:
1548 chunks.append([])
1549 else:
1550 chunks[-1].append(token)
1551
1552 expressions = []
1553
1554 for tokens in chunks:
1555 self._index = -1
1556 self._tokens = tokens
1557 self._advance()
1558
1559 expressions.append(parse_method(self))
1560
1561 if self._index < len(self._tokens):
1562 self.raise_error("Invalid expression / Unexpected token")
1563
1564 self.check_errors()
1565
1566 return expressions
1567
1568 def check_errors(self) -> None:
1569 """Logs or raises any found errors, depending on the chosen error level setting."""
1570 if self.error_level == ErrorLevel.WARN:
1571 for error in self.errors:
1572 logger.error(str(error))
1573 elif self.error_level == ErrorLevel.RAISE and self.errors:
1574 raise ParseError(
1575 concat_messages(self.errors, self.max_errors),
1576 errors=merge_errors(self.errors),
1577 )
1578
1579 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
1580 """
1581 Appends an error in the list of recorded errors or raises it, depending on the chosen
1582 error level setting.
1583 """
1584 token = token or self._curr or self._prev or Token.string("")
1585 start = token.start
1586 end = token.end + 1
1587 start_context = self.sql[max(start - self.error_message_context, 0) : start]
1588 highlight = self.sql[start:end]
1589 end_context = self.sql[end : end + self.error_message_context]
1590
1591 error = ParseError.new(
1592 f"{message}. Line {token.line}, Col: {token.col}.\n"
1593 f" {start_context}\033[4m{highlight}\033[0m{end_context}",
1594 description=message,
1595 line=token.line,
1596 col=token.col,
1597 start_context=start_context,
1598 highlight=highlight,
1599 end_context=end_context,
1600 )
1601
1602 if self.error_level == ErrorLevel.IMMEDIATE:
1603 raise error
1604
1605 self.errors.append(error)
1606
1607 def expression(
1608 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
1609 ) -> E:
1610 """
1611 Creates a new, validated Expression.
1612
1613 Args:
1614 exp_class: The expression class to instantiate.
1615 comments: An optional list of comments to attach to the expression.
1616 kwargs: The arguments to set for the expression along with their respective values.
1617
1618 Returns:
1619 The target expression.
1620 """
1621 instance = exp_class(**kwargs)
1622 instance.add_comments(comments) if comments else self._add_comments(instance)
1623 return self.validate_expression(instance)
1624
1625 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
1626 if expression and self._prev_comments:
1627 expression.add_comments(self._prev_comments)
1628 self._prev_comments = None
1629
1630 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
1631 """
1632 Validates an Expression, making sure that all its mandatory arguments are set.
1633
1634 Args:
1635 expression: The expression to validate.
1636 args: An optional list of items that was used to instantiate the expression, if it's a Func.
1637
1638 Returns:
1639 The validated expression.
1640 """
1641 if self.error_level != ErrorLevel.IGNORE:
1642 for error_message in expression.error_messages(args):
1643 self.raise_error(error_message)
1644
1645 return expression
1646
1647 def _find_sql(self, start: Token, end: Token) -> str:
1648 return self.sql[start.start : end.end + 1]
1649
1650 def _is_connected(self) -> bool:
1651 return self._prev and self._curr and self._prev.end + 1 == self._curr.start
1652
1653 def _advance(self, times: int = 1) -> None:
1654 self._index += times
1655 self._curr = seq_get(self._tokens, self._index)
1656 self._next = seq_get(self._tokens, self._index + 1)
1657
1658 if self._index > 0:
1659 self._prev = self._tokens[self._index - 1]
1660 self._prev_comments = self._prev.comments
1661 else:
1662 self._prev = None
1663 self._prev_comments = None
1664
1665 def _retreat(self, index: int) -> None:
1666 if index != self._index:
1667 self._advance(index - self._index)
1668
1669 def _warn_unsupported(self) -> None:
1670 if len(self._tokens) <= 1:
1671 return
1672
1673 # We use _find_sql because self.sql may comprise multiple chunks, and we're only
1674 # interested in emitting a warning for the one being currently processed.
1675 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context]
1676
1677 logger.warning(
1678 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'."
1679 )
1680
1681 def _parse_command(self) -> exp.Command:
1682 self._warn_unsupported()
1683 return self.expression(
1684 exp.Command,
1685 comments=self._prev_comments,
1686 this=self._prev.text.upper(),
1687 expression=self._parse_string(),
1688 )
1689
1690 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]:
1691 """
1692 Attemps to backtrack if a parse function that contains a try/catch internally raises an error.
1693 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to
1694 solve this by setting & resetting the parser state accordingly
1695 """
1696 index = self._index
1697 error_level = self.error_level
1698
1699 self.error_level = ErrorLevel.IMMEDIATE
1700 try:
1701 this = parse_method()
1702 except ParseError:
1703 this = None
1704 finally:
1705 if not this or retreat:
1706 self._retreat(index)
1707 self.error_level = error_level
1708
1709 return this
1710
1711 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1712 start = self._prev
1713 exists = self._parse_exists() if allow_exists else None
1714
1715 self._match(TokenType.ON)
1716
1717 materialized = self._match_text_seq("MATERIALIZED")
1718 kind = self._match_set(self.CREATABLES) and self._prev
1719 if not kind:
1720 return self._parse_as_command(start)
1721
1722 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1723 this = self._parse_user_defined_function(kind=kind.token_type)
1724 elif kind.token_type == TokenType.TABLE:
1725 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1726 elif kind.token_type == TokenType.COLUMN:
1727 this = self._parse_column()
1728 else:
1729 this = self._parse_id_var()
1730
1731 self._match(TokenType.IS)
1732
1733 return self.expression(
1734 exp.Comment,
1735 this=this,
1736 kind=kind.text,
1737 expression=self._parse_string(),
1738 exists=exists,
1739 materialized=materialized,
1740 )
1741
1742 def _parse_to_table(
1743 self,
1744 ) -> exp.ToTableProperty:
1745 table = self._parse_table_parts(schema=True)
1746 return self.expression(exp.ToTableProperty, this=table)
1747
1748 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1749 def _parse_ttl(self) -> exp.Expression:
1750 def _parse_ttl_action() -> t.Optional[exp.Expression]:
1751 this = self._parse_bitwise()
1752
1753 if self._match_text_seq("DELETE"):
1754 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1755 if self._match_text_seq("RECOMPRESS"):
1756 return self.expression(
1757 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1758 )
1759 if self._match_text_seq("TO", "DISK"):
1760 return self.expression(
1761 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1762 )
1763 if self._match_text_seq("TO", "VOLUME"):
1764 return self.expression(
1765 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1766 )
1767
1768 return this
1769
1770 expressions = self._parse_csv(_parse_ttl_action)
1771 where = self._parse_where()
1772 group = self._parse_group()
1773
1774 aggregates = None
1775 if group and self._match(TokenType.SET):
1776 aggregates = self._parse_csv(self._parse_set_item)
1777
1778 return self.expression(
1779 exp.MergeTreeTTL,
1780 expressions=expressions,
1781 where=where,
1782 group=group,
1783 aggregates=aggregates,
1784 )
1785
1786 def _parse_statement(self) -> t.Optional[exp.Expression]:
1787 if self._curr is None:
1788 return None
1789
1790 if self._match_set(self.STATEMENT_PARSERS):
1791 comments = self._prev_comments
1792 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self)
1793 stmt.add_comments(comments, prepend=True)
1794 return stmt
1795
1796 if self._match_set(self.dialect.tokenizer.COMMANDS):
1797 return self._parse_command()
1798
1799 expression = self._parse_expression()
1800 expression = self._parse_set_operations(expression) if expression else self._parse_select()
1801 return self._parse_query_modifiers(expression)
1802
1803 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
1804 start = self._prev
1805 temporary = self._match(TokenType.TEMPORARY)
1806 materialized = self._match_text_seq("MATERIALIZED")
1807
1808 kind = self._match_set(self.CREATABLES) and self._prev.text.upper()
1809 if not kind:
1810 return self._parse_as_command(start)
1811
1812 concurrently = self._match_text_seq("CONCURRENTLY")
1813 if_exists = exists or self._parse_exists()
1814
1815 if kind == "COLUMN":
1816 this = self._parse_column()
1817 else:
1818 this = self._parse_table_parts(
1819 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
1820 )
1821
1822 cluster = self._parse_on_property() if self._match(TokenType.ON) else None
1823
1824 if self._match(TokenType.L_PAREN, advance=False):
1825 expressions = self._parse_wrapped_csv(self._parse_types)
1826 else:
1827 expressions = None
1828
1829 return self.expression(
1830 exp.Drop,
1831 exists=if_exists,
1832 this=this,
1833 expressions=expressions,
1834 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind,
1835 temporary=temporary,
1836 materialized=materialized,
1837 cascade=self._match_text_seq("CASCADE"),
1838 constraints=self._match_text_seq("CONSTRAINTS"),
1839 purge=self._match_text_seq("PURGE"),
1840 cluster=cluster,
1841 concurrently=concurrently,
1842 )
1843
1844 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1845 return (
1846 self._match_text_seq("IF")
1847 and (not not_ or self._match(TokenType.NOT))
1848 and self._match(TokenType.EXISTS)
1849 )
1850
1851 def _parse_create(self) -> exp.Create | exp.Command:
1852 # Note: this can't be None because we've matched a statement parser
1853 start = self._prev
1854
1855 replace = (
1856 start.token_type == TokenType.REPLACE
1857 or self._match_pair(TokenType.OR, TokenType.REPLACE)
1858 or self._match_pair(TokenType.OR, TokenType.ALTER)
1859 )
1860 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH)
1861
1862 unique = self._match(TokenType.UNIQUE)
1863
1864 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"):
1865 clustered = True
1866 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq(
1867 "COLUMNSTORE"
1868 ):
1869 clustered = False
1870 else:
1871 clustered = None
1872
1873 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1874 self._advance()
1875
1876 properties = None
1877 create_token = self._match_set(self.CREATABLES) and self._prev
1878
1879 if not create_token:
1880 # exp.Properties.Location.POST_CREATE
1881 properties = self._parse_properties()
1882 create_token = self._match_set(self.CREATABLES) and self._prev
1883
1884 if not properties or not create_token:
1885 return self._parse_as_command(start)
1886
1887 concurrently = self._match_text_seq("CONCURRENTLY")
1888 exists = self._parse_exists(not_=True)
1889 this = None
1890 expression: t.Optional[exp.Expression] = None
1891 indexes = None
1892 no_schema_binding = None
1893 begin = None
1894 end = None
1895 clone = None
1896
1897 def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
1898 nonlocal properties
1899 if properties and temp_props:
1900 properties.expressions.extend(temp_props.expressions)
1901 elif temp_props:
1902 properties = temp_props
1903
1904 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1905 this = self._parse_user_defined_function(kind=create_token.token_type)
1906
1907 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
1908 extend_props(self._parse_properties())
1909
1910 expression = self._match(TokenType.ALIAS) and self._parse_heredoc()
1911 extend_props(self._parse_properties())
1912
1913 if not expression:
1914 if self._match(TokenType.COMMAND):
1915 expression = self._parse_as_command(self._prev)
1916 else:
1917 begin = self._match(TokenType.BEGIN)
1918 return_ = self._match_text_seq("RETURN")
1919
1920 if self._match(TokenType.STRING, advance=False):
1921 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property
1922 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement
1923 expression = self._parse_string()
1924 extend_props(self._parse_properties())
1925 else:
1926 expression = self._parse_user_defined_function_expression()
1927
1928 end = self._match_text_seq("END")
1929
1930 if return_:
1931 expression = self.expression(exp.Return, this=expression)
1932 elif create_token.token_type == TokenType.INDEX:
1933 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c)
1934 if not self._match(TokenType.ON):
1935 index = self._parse_id_var()
1936 anonymous = False
1937 else:
1938 index = None
1939 anonymous = True
1940
1941 this = self._parse_index(index=index, anonymous=anonymous)
1942 elif create_token.token_type in self.DB_CREATABLES:
1943 table_parts = self._parse_table_parts(
1944 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA
1945 )
1946
1947 # exp.Properties.Location.POST_NAME
1948 self._match(TokenType.COMMA)
1949 extend_props(self._parse_properties(before=True))
1950
1951 this = self._parse_schema(this=table_parts)
1952
1953 # exp.Properties.Location.POST_SCHEMA and POST_WITH
1954 extend_props(self._parse_properties())
1955
1956 self._match(TokenType.ALIAS)
1957 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
1958 # exp.Properties.Location.POST_ALIAS
1959 extend_props(self._parse_properties())
1960
1961 if create_token.token_type == TokenType.SEQUENCE:
1962 expression = self._parse_types()
1963 extend_props(self._parse_properties())
1964 else:
1965 expression = self._parse_ddl_select()
1966
1967 if create_token.token_type == TokenType.TABLE:
1968 # exp.Properties.Location.POST_EXPRESSION
1969 extend_props(self._parse_properties())
1970
1971 indexes = []
1972 while True:
1973 index = self._parse_index()
1974
1975 # exp.Properties.Location.POST_INDEX
1976 extend_props(self._parse_properties())
1977 if not index:
1978 break
1979 else:
1980 self._match(TokenType.COMMA)
1981 indexes.append(index)
1982 elif create_token.token_type == TokenType.VIEW:
1983 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1984 no_schema_binding = True
1985 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE):
1986 extend_props(self._parse_properties())
1987
1988 shallow = self._match_text_seq("SHALLOW")
1989
1990 if self._match_texts(self.CLONE_KEYWORDS):
1991 copy = self._prev.text.lower() == "copy"
1992 clone = self.expression(
1993 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy
1994 )
1995
1996 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False):
1997 return self._parse_as_command(start)
1998
1999 create_kind_text = create_token.text.upper()
2000 return self.expression(
2001 exp.Create,
2002 this=this,
2003 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text,
2004 replace=replace,
2005 refresh=refresh,
2006 unique=unique,
2007 expression=expression,
2008 exists=exists,
2009 properties=properties,
2010 indexes=indexes,
2011 no_schema_binding=no_schema_binding,
2012 begin=begin,
2013 end=end,
2014 clone=clone,
2015 concurrently=concurrently,
2016 clustered=clustered,
2017 )
2018
2019 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]:
2020 seq = exp.SequenceProperties()
2021
2022 options = []
2023 index = self._index
2024
2025 while self._curr:
2026 self._match(TokenType.COMMA)
2027 if self._match_text_seq("INCREMENT"):
2028 self._match_text_seq("BY")
2029 self._match_text_seq("=")
2030 seq.set("increment", self._parse_term())
2031 elif self._match_text_seq("MINVALUE"):
2032 seq.set("minvalue", self._parse_term())
2033 elif self._match_text_seq("MAXVALUE"):
2034 seq.set("maxvalue", self._parse_term())
2035 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
2036 self._match_text_seq("=")
2037 seq.set("start", self._parse_term())
2038 elif self._match_text_seq("CACHE"):
2039 # T-SQL allows empty CACHE which is initialized dynamically
2040 seq.set("cache", self._parse_number() or True)
2041 elif self._match_text_seq("OWNED", "BY"):
2042 # "OWNED BY NONE" is the default
2043 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
2044 else:
2045 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
2046 if opt:
2047 options.append(opt)
2048 else:
2049 break
2050
2051 seq.set("options", options if options else None)
2052 return None if self._index == index else seq
2053
2054 def _parse_property_before(self) -> t.Optional[exp.Expression]:
2055 # only used for teradata currently
2056 self._match(TokenType.COMMA)
2057
2058 kwargs = {
2059 "no": self._match_text_seq("NO"),
2060 "dual": self._match_text_seq("DUAL"),
2061 "before": self._match_text_seq("BEFORE"),
2062 "default": self._match_text_seq("DEFAULT"),
2063 "local": (self._match_text_seq("LOCAL") and "LOCAL")
2064 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
2065 "after": self._match_text_seq("AFTER"),
2066 "minimum": self._match_texts(("MIN", "MINIMUM")),
2067 "maximum": self._match_texts(("MAX", "MAXIMUM")),
2068 }
2069
2070 if self._match_texts(self.PROPERTY_PARSERS):
2071 parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
2072 try:
2073 return parser(self, **{k: v for k, v in kwargs.items() if v})
2074 except TypeError:
2075 self.raise_error(f"Cannot parse property '{self._prev.text}'")
2076
2077 return None
2078
2079 def _parse_wrapped_properties(self) -> t.List[exp.Expression]:
2080 return self._parse_wrapped_csv(self._parse_property)
2081
2082 def _parse_property(self) -> t.Optional[exp.Expression]:
2083 if self._match_texts(self.PROPERTY_PARSERS):
2084 return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
2085
2086 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
2087 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
2088
2089 if self._match_text_seq("COMPOUND", "SORTKEY"):
2090 return self._parse_sortkey(compound=True)
2091
2092 if self._match_text_seq("SQL", "SECURITY"):
2093 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
2094
2095 index = self._index
2096 key = self._parse_column()
2097
2098 if not self._match(TokenType.EQ):
2099 self._retreat(index)
2100 return self._parse_sequence_properties()
2101
2102 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise
2103 if isinstance(key, exp.Column):
2104 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name)
2105
2106 value = self._parse_bitwise() or self._parse_var(any_token=True)
2107
2108 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier())
2109 if isinstance(value, exp.Column):
2110 value = exp.var(value.name)
2111
2112 return self.expression(exp.Property, this=key, value=value)
2113
2114 def _parse_stored(self) -> exp.FileFormatProperty:
2115 self._match(TokenType.ALIAS)
2116
2117 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
2118 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
2119
2120 return self.expression(
2121 exp.FileFormatProperty,
2122 this=(
2123 self.expression(
2124 exp.InputOutputFormat, input_format=input_format, output_format=output_format
2125 )
2126 if input_format or output_format
2127 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var()
2128 ),
2129 )
2130
2131 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]:
2132 field = self._parse_field()
2133 if isinstance(field, exp.Identifier) and not field.quoted:
2134 field = exp.var(field)
2135
2136 return field
2137
2138 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E:
2139 self._match(TokenType.EQ)
2140 self._match(TokenType.ALIAS)
2141
2142 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs)
2143
2144 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]:
2145 properties = []
2146 while True:
2147 if before:
2148 prop = self._parse_property_before()
2149 else:
2150 prop = self._parse_property()
2151 if not prop:
2152 break
2153 for p in ensure_list(prop):
2154 properties.append(p)
2155
2156 if properties:
2157 return self.expression(exp.Properties, expressions=properties)
2158
2159 return None
2160
2161 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
2162 return self.expression(
2163 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
2164 )
2165
2166 def _parse_security(self) -> t.Optional[exp.SecurityProperty]:
2167 if self._match_texts(("DEFINER", "INVOKER")):
2168 security_specifier = self._prev.text.upper()
2169 return self.expression(exp.SecurityProperty, this=security_specifier)
2170 return None
2171
2172 def _parse_settings_property(self) -> exp.SettingsProperty:
2173 return self.expression(
2174 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment)
2175 )
2176
2177 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
2178 if self._index >= 2:
2179 pre_volatile_token = self._tokens[self._index - 2]
2180 else:
2181 pre_volatile_token = None
2182
2183 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
2184 return exp.VolatileProperty()
2185
2186 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
2187
2188 def _parse_retention_period(self) -> exp.Var:
2189 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...}
2190 number = self._parse_number()
2191 number_str = f"{number} " if number else ""
2192 unit = self._parse_var(any_token=True)
2193 return exp.var(f"{number_str}{unit}")
2194
2195 def _parse_system_versioning_property(
2196 self, with_: bool = False
2197 ) -> exp.WithSystemVersioningProperty:
2198 self._match(TokenType.EQ)
2199 prop = self.expression(
2200 exp.WithSystemVersioningProperty,
2201 **{ # type: ignore
2202 "on": True,
2203 "with": with_,
2204 },
2205 )
2206
2207 if self._match_text_seq("OFF"):
2208 prop.set("on", False)
2209 return prop
2210
2211 self._match(TokenType.ON)
2212 if self._match(TokenType.L_PAREN):
2213 while self._curr and not self._match(TokenType.R_PAREN):
2214 if self._match_text_seq("HISTORY_TABLE", "="):
2215 prop.set("this", self._parse_table_parts())
2216 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="):
2217 prop.set("data_consistency", self._advance_any() and self._prev.text.upper())
2218 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="):
2219 prop.set("retention_period", self._parse_retention_period())
2220
2221 self._match(TokenType.COMMA)
2222
2223 return prop
2224
2225 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty:
2226 self._match(TokenType.EQ)
2227 on = self._match_text_seq("ON") or not self._match_text_seq("OFF")
2228 prop = self.expression(exp.DataDeletionProperty, on=on)
2229
2230 if self._match(TokenType.L_PAREN):
2231 while self._curr and not self._match(TokenType.R_PAREN):
2232 if self._match_text_seq("FILTER_COLUMN", "="):
2233 prop.set("filter_column", self._parse_column())
2234 elif self._match_text_seq("RETENTION_PERIOD", "="):
2235 prop.set("retention_period", self._parse_retention_period())
2236
2237 self._match(TokenType.COMMA)
2238
2239 return prop
2240
2241 def _parse_distributed_property(self) -> exp.DistributedByProperty:
2242 kind = "HASH"
2243 expressions: t.Optional[t.List[exp.Expression]] = None
2244 if self._match_text_seq("BY", "HASH"):
2245 expressions = self._parse_wrapped_csv(self._parse_id_var)
2246 elif self._match_text_seq("BY", "RANDOM"):
2247 kind = "RANDOM"
2248
2249 # If the BUCKETS keyword is not present, the number of buckets is AUTO
2250 buckets: t.Optional[exp.Expression] = None
2251 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"):
2252 buckets = self._parse_number()
2253
2254 return self.expression(
2255 exp.DistributedByProperty,
2256 expressions=expressions,
2257 kind=kind,
2258 buckets=buckets,
2259 order=self._parse_order(),
2260 )
2261
2262 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E:
2263 self._match_text_seq("KEY")
2264 expressions = self._parse_wrapped_id_vars()
2265 return self.expression(expr_type, expressions=expressions)
2266
2267 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
2268 if self._match_text_seq("(", "SYSTEM_VERSIONING"):
2269 prop = self._parse_system_versioning_property(with_=True)
2270 self._match_r_paren()
2271 return prop
2272
2273 if self._match(TokenType.L_PAREN, advance=False):
2274 return self._parse_wrapped_properties()
2275
2276 if self._match_text_seq("JOURNAL"):
2277 return self._parse_withjournaltable()
2278
2279 if self._match_texts(self.VIEW_ATTRIBUTES):
2280 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper())
2281
2282 if self._match_text_seq("DATA"):
2283 return self._parse_withdata(no=False)
2284 elif self._match_text_seq("NO", "DATA"):
2285 return self._parse_withdata(no=True)
2286
2287 if self._match(TokenType.SERDE_PROPERTIES, advance=False):
2288 return self._parse_serde_properties(with_=True)
2289
2290 if self._match(TokenType.SCHEMA):
2291 return self.expression(
2292 exp.WithSchemaBindingProperty,
2293 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS),
2294 )
2295
2296 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False):
2297 return self.expression(
2298 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option)
2299 )
2300
2301 if not self._next:
2302 return None
2303
2304 return self._parse_withisolatedloading()
2305
2306 def _parse_procedure_option(self) -> exp.Expression | None:
2307 if self._match_text_seq("EXECUTE", "AS"):
2308 return self.expression(
2309 exp.ExecuteAsProperty,
2310 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False)
2311 or self._parse_string(),
2312 )
2313
2314 return self._parse_var_from_options(self.PROCEDURE_OPTIONS)
2315
2316 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
2317 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]:
2318 self._match(TokenType.EQ)
2319
2320 user = self._parse_id_var()
2321 self._match(TokenType.PARAMETER)
2322 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
2323
2324 if not user or not host:
2325 return None
2326
2327 return exp.DefinerProperty(this=f"{user}@{host}")
2328
2329 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
2330 self._match(TokenType.TABLE)
2331 self._match(TokenType.EQ)
2332 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
2333
2334 def _parse_log(self, no: bool = False) -> exp.LogProperty:
2335 return self.expression(exp.LogProperty, no=no)
2336
2337 def _parse_journal(self, **kwargs) -> exp.JournalProperty:
2338 return self.expression(exp.JournalProperty, **kwargs)
2339
2340 def _parse_checksum(self) -> exp.ChecksumProperty:
2341 self._match(TokenType.EQ)
2342
2343 on = None
2344 if self._match(TokenType.ON):
2345 on = True
2346 elif self._match_text_seq("OFF"):
2347 on = False
2348
2349 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
2350
2351 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster:
2352 return self.expression(
2353 exp.Cluster,
2354 expressions=(
2355 self._parse_wrapped_csv(self._parse_ordered)
2356 if wrapped
2357 else self._parse_csv(self._parse_ordered)
2358 ),
2359 )
2360
2361 def _parse_clustered_by(self) -> exp.ClusteredByProperty:
2362 self._match_text_seq("BY")
2363
2364 self._match_l_paren()
2365 expressions = self._parse_csv(self._parse_column)
2366 self._match_r_paren()
2367
2368 if self._match_text_seq("SORTED", "BY"):
2369 self._match_l_paren()
2370 sorted_by = self._parse_csv(self._parse_ordered)
2371 self._match_r_paren()
2372 else:
2373 sorted_by = None
2374
2375 self._match(TokenType.INTO)
2376 buckets = self._parse_number()
2377 self._match_text_seq("BUCKETS")
2378
2379 return self.expression(
2380 exp.ClusteredByProperty,
2381 expressions=expressions,
2382 sorted_by=sorted_by,
2383 buckets=buckets,
2384 )
2385
2386 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]:
2387 if not self._match_text_seq("GRANTS"):
2388 self._retreat(self._index - 1)
2389 return None
2390
2391 return self.expression(exp.CopyGrantsProperty)
2392
2393 def _parse_freespace(self) -> exp.FreespaceProperty:
2394 self._match(TokenType.EQ)
2395 return self.expression(
2396 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
2397 )
2398
2399 def _parse_mergeblockratio(
2400 self, no: bool = False, default: bool = False
2401 ) -> exp.MergeBlockRatioProperty:
2402 if self._match(TokenType.EQ):
2403 return self.expression(
2404 exp.MergeBlockRatioProperty,
2405 this=self._parse_number(),
2406 percent=self._match(TokenType.PERCENT),
2407 )
2408
2409 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default)
2410
2411 def _parse_datablocksize(
2412 self,
2413 default: t.Optional[bool] = None,
2414 minimum: t.Optional[bool] = None,
2415 maximum: t.Optional[bool] = None,
2416 ) -> exp.DataBlocksizeProperty:
2417 self._match(TokenType.EQ)
2418 size = self._parse_number()
2419
2420 units = None
2421 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
2422 units = self._prev.text
2423
2424 return self.expression(
2425 exp.DataBlocksizeProperty,
2426 size=size,
2427 units=units,
2428 default=default,
2429 minimum=minimum,
2430 maximum=maximum,
2431 )
2432
2433 def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
2434 self._match(TokenType.EQ)
2435 always = self._match_text_seq("ALWAYS")
2436 manual = self._match_text_seq("MANUAL")
2437 never = self._match_text_seq("NEVER")
2438 default = self._match_text_seq("DEFAULT")
2439
2440 autotemp = None
2441 if self._match_text_seq("AUTOTEMP"):
2442 autotemp = self._parse_schema()
2443
2444 return self.expression(
2445 exp.BlockCompressionProperty,
2446 always=always,
2447 manual=manual,
2448 never=never,
2449 default=default,
2450 autotemp=autotemp,
2451 )
2452
2453 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]:
2454 index = self._index
2455 no = self._match_text_seq("NO")
2456 concurrent = self._match_text_seq("CONCURRENT")
2457
2458 if not self._match_text_seq("ISOLATED", "LOADING"):
2459 self._retreat(index)
2460 return None
2461
2462 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
2463 return self.expression(
2464 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target
2465 )
2466
2467 def _parse_locking(self) -> exp.LockingProperty:
2468 if self._match(TokenType.TABLE):
2469 kind = "TABLE"
2470 elif self._match(TokenType.VIEW):
2471 kind = "VIEW"
2472 elif self._match(TokenType.ROW):
2473 kind = "ROW"
2474 elif self._match_text_seq("DATABASE"):
2475 kind = "DATABASE"
2476 else:
2477 kind = None
2478
2479 if kind in ("DATABASE", "TABLE", "VIEW"):
2480 this = self._parse_table_parts()
2481 else:
2482 this = None
2483
2484 if self._match(TokenType.FOR):
2485 for_or_in = "FOR"
2486 elif self._match(TokenType.IN):
2487 for_or_in = "IN"
2488 else:
2489 for_or_in = None
2490
2491 if self._match_text_seq("ACCESS"):
2492 lock_type = "ACCESS"
2493 elif self._match_texts(("EXCL", "EXCLUSIVE")):
2494 lock_type = "EXCLUSIVE"
2495 elif self._match_text_seq("SHARE"):
2496 lock_type = "SHARE"
2497 elif self._match_text_seq("READ"):
2498 lock_type = "READ"
2499 elif self._match_text_seq("WRITE"):
2500 lock_type = "WRITE"
2501 elif self._match_text_seq("CHECKSUM"):
2502 lock_type = "CHECKSUM"
2503 else:
2504 lock_type = None
2505
2506 override = self._match_text_seq("OVERRIDE")
2507
2508 return self.expression(
2509 exp.LockingProperty,
2510 this=this,
2511 kind=kind,
2512 for_or_in=for_or_in,
2513 lock_type=lock_type,
2514 override=override,
2515 )
2516
2517 def _parse_partition_by(self) -> t.List[exp.Expression]:
2518 if self._match(TokenType.PARTITION_BY):
2519 return self._parse_csv(self._parse_assignment)
2520 return []
2521
2522 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec:
2523 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]:
2524 if self._match_text_seq("MINVALUE"):
2525 return exp.var("MINVALUE")
2526 if self._match_text_seq("MAXVALUE"):
2527 return exp.var("MAXVALUE")
2528 return self._parse_bitwise()
2529
2530 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None
2531 expression = None
2532 from_expressions = None
2533 to_expressions = None
2534
2535 if self._match(TokenType.IN):
2536 this = self._parse_wrapped_csv(self._parse_bitwise)
2537 elif self._match(TokenType.FROM):
2538 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
2539 self._match_text_seq("TO")
2540 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
2541 elif self._match_text_seq("WITH", "(", "MODULUS"):
2542 this = self._parse_number()
2543 self._match_text_seq(",", "REMAINDER")
2544 expression = self._parse_number()
2545 self._match_r_paren()
2546 else:
2547 self.raise_error("Failed to parse partition bound spec.")
2548
2549 return self.expression(
2550 exp.PartitionBoundSpec,
2551 this=this,
2552 expression=expression,
2553 from_expressions=from_expressions,
2554 to_expressions=to_expressions,
2555 )
2556
2557 # https://www.postgresql.org/docs/current/sql-createtable.html
2558 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]:
2559 if not self._match_text_seq("OF"):
2560 self._retreat(self._index - 1)
2561 return None
2562
2563 this = self._parse_table(schema=True)
2564
2565 if self._match(TokenType.DEFAULT):
2566 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT")
2567 elif self._match_text_seq("FOR", "VALUES"):
2568 expression = self._parse_partition_bound_spec()
2569 else:
2570 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.")
2571
2572 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression)
2573
2574 def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
2575 self._match(TokenType.EQ)
2576 return self.expression(
2577 exp.PartitionedByProperty,
2578 this=self._parse_schema() or self._parse_bracket(self._parse_field()),
2579 )
2580
2581 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
2582 if self._match_text_seq("AND", "STATISTICS"):
2583 statistics = True
2584 elif self._match_text_seq("AND", "NO", "STATISTICS"):
2585 statistics = False
2586 else:
2587 statistics = None
2588
2589 return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
2590
2591 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2592 if self._match_text_seq("SQL"):
2593 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL")
2594 return None
2595
2596 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2597 if self._match_text_seq("SQL", "DATA"):
2598 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA")
2599 return None
2600
2601 def _parse_no_property(self) -> t.Optional[exp.Expression]:
2602 if self._match_text_seq("PRIMARY", "INDEX"):
2603 return exp.NoPrimaryIndexProperty()
2604 if self._match_text_seq("SQL"):
2605 return self.expression(exp.SqlReadWriteProperty, this="NO SQL")
2606 return None
2607
2608 def _parse_on_property(self) -> t.Optional[exp.Expression]:
2609 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
2610 return exp.OnCommitProperty()
2611 if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
2612 return exp.OnCommitProperty(delete=True)
2613 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var()))
2614
2615 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2616 if self._match_text_seq("SQL", "DATA"):
2617 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA")
2618 return None
2619
2620 def _parse_distkey(self) -> exp.DistKeyProperty:
2621 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
2622
2623 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]:
2624 table = self._parse_table(schema=True)
2625
2626 options = []
2627 while self._match_texts(("INCLUDING", "EXCLUDING")):
2628 this = self._prev.text.upper()
2629
2630 id_var = self._parse_id_var()
2631 if not id_var:
2632 return None
2633
2634 options.append(
2635 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper()))
2636 )
2637
2638 return self.expression(exp.LikeProperty, this=table, expressions=options)
2639
2640 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
2641 return self.expression(
2642 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound
2643 )
2644
2645 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
2646 self._match(TokenType.EQ)
2647 return self.expression(
2648 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
2649 )
2650
2651 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty:
2652 self._match_text_seq("WITH", "CONNECTION")
2653 return self.expression(
2654 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts()
2655 )
2656
2657 def _parse_returns(self) -> exp.ReturnsProperty:
2658 value: t.Optional[exp.Expression]
2659 null = None
2660 is_table = self._match(TokenType.TABLE)
2661
2662 if is_table:
2663 if self._match(TokenType.LT):
2664 value = self.expression(
2665 exp.Schema,
2666 this="TABLE",
2667 expressions=self._parse_csv(self._parse_struct_types),
2668 )
2669 if not self._match(TokenType.GT):
2670 self.raise_error("Expecting >")
2671 else:
2672 value = self._parse_schema(exp.var("TABLE"))
2673 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"):
2674 null = True
2675 value = None
2676 else:
2677 value = self._parse_types()
2678
2679 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null)
2680
2681 def _parse_describe(self) -> exp.Describe:
2682 kind = self._match_set(self.CREATABLES) and self._prev.text
2683 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper()
2684 if self._match(TokenType.DOT):
2685 style = None
2686 self._retreat(self._index - 2)
2687
2688 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None
2689
2690 if self._match_set(self.STATEMENT_PARSERS, advance=False):
2691 this = self._parse_statement()
2692 else:
2693 this = self._parse_table(schema=True)
2694
2695 properties = self._parse_properties()
2696 expressions = properties.expressions if properties else None
2697 partition = self._parse_partition()
2698 return self.expression(
2699 exp.Describe,
2700 this=this,
2701 style=style,
2702 kind=kind,
2703 expressions=expressions,
2704 partition=partition,
2705 format=format,
2706 )
2707
2708 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts:
2709 kind = self._prev.text.upper()
2710 expressions = []
2711
2712 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]:
2713 if self._match(TokenType.WHEN):
2714 expression = self._parse_disjunction()
2715 self._match(TokenType.THEN)
2716 else:
2717 expression = None
2718
2719 else_ = self._match(TokenType.ELSE)
2720
2721 if not self._match(TokenType.INTO):
2722 return None
2723
2724 return self.expression(
2725 exp.ConditionalInsert,
2726 this=self.expression(
2727 exp.Insert,
2728 this=self._parse_table(schema=True),
2729 expression=self._parse_derived_table_values(),
2730 ),
2731 expression=expression,
2732 else_=else_,
2733 )
2734
2735 expression = parse_conditional_insert()
2736 while expression is not None:
2737 expressions.append(expression)
2738 expression = parse_conditional_insert()
2739
2740 return self.expression(
2741 exp.MultitableInserts,
2742 kind=kind,
2743 comments=comments,
2744 expressions=expressions,
2745 source=self._parse_table(),
2746 )
2747
2748 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]:
2749 comments = []
2750 hint = self._parse_hint()
2751 overwrite = self._match(TokenType.OVERWRITE)
2752 ignore = self._match(TokenType.IGNORE)
2753 local = self._match_text_seq("LOCAL")
2754 alternative = None
2755 is_function = None
2756
2757 if self._match_text_seq("DIRECTORY"):
2758 this: t.Optional[exp.Expression] = self.expression(
2759 exp.Directory,
2760 this=self._parse_var_or_string(),
2761 local=local,
2762 row_format=self._parse_row_format(match_row=True),
2763 )
2764 else:
2765 if self._match_set((TokenType.FIRST, TokenType.ALL)):
2766 comments += ensure_list(self._prev_comments)
2767 return self._parse_multitable_inserts(comments)
2768
2769 if self._match(TokenType.OR):
2770 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
2771
2772 self._match(TokenType.INTO)
2773 comments += ensure_list(self._prev_comments)
2774 self._match(TokenType.TABLE)
2775 is_function = self._match(TokenType.FUNCTION)
2776
2777 this = (
2778 self._parse_table(schema=True, parse_partition=True)
2779 if not is_function
2780 else self._parse_function()
2781 )
2782 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False):
2783 this.set("alias", self._parse_table_alias())
2784
2785 returning = self._parse_returning()
2786
2787 return self.expression(
2788 exp.Insert,
2789 comments=comments,
2790 hint=hint,
2791 is_function=is_function,
2792 this=this,
2793 stored=self._match_text_seq("STORED") and self._parse_stored(),
2794 by_name=self._match_text_seq("BY", "NAME"),
2795 exists=self._parse_exists(),
2796 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(),
2797 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(),
2798 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(),
2799 expression=self._parse_derived_table_values() or self._parse_ddl_select(),
2800 conflict=self._parse_on_conflict(),
2801 returning=returning or self._parse_returning(),
2802 overwrite=overwrite,
2803 alternative=alternative,
2804 ignore=ignore,
2805 source=self._match(TokenType.TABLE) and self._parse_table(),
2806 )
2807
2808 def _parse_kill(self) -> exp.Kill:
2809 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None
2810
2811 return self.expression(
2812 exp.Kill,
2813 this=self._parse_primary(),
2814 kind=kind,
2815 )
2816
2817 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]:
2818 conflict = self._match_text_seq("ON", "CONFLICT")
2819 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
2820
2821 if not conflict and not duplicate:
2822 return None
2823
2824 conflict_keys = None
2825 constraint = None
2826
2827 if conflict:
2828 if self._match_text_seq("ON", "CONSTRAINT"):
2829 constraint = self._parse_id_var()
2830 elif self._match(TokenType.L_PAREN):
2831 conflict_keys = self._parse_csv(self._parse_id_var)
2832 self._match_r_paren()
2833
2834 action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
2835 if self._prev.token_type == TokenType.UPDATE:
2836 self._match(TokenType.SET)
2837 expressions = self._parse_csv(self._parse_equality)
2838 else:
2839 expressions = None
2840
2841 return self.expression(
2842 exp.OnConflict,
2843 duplicate=duplicate,
2844 expressions=expressions,
2845 action=action,
2846 conflict_keys=conflict_keys,
2847 constraint=constraint,
2848 where=self._parse_where(),
2849 )
2850
2851 def _parse_returning(self) -> t.Optional[exp.Returning]:
2852 if not self._match(TokenType.RETURNING):
2853 return None
2854 return self.expression(
2855 exp.Returning,
2856 expressions=self._parse_csv(self._parse_expression),
2857 into=self._match(TokenType.INTO) and self._parse_table_part(),
2858 )
2859
2860 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2861 if not self._match(TokenType.FORMAT):
2862 return None
2863 return self._parse_row_format()
2864
2865 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]:
2866 index = self._index
2867 with_ = with_ or self._match_text_seq("WITH")
2868
2869 if not self._match(TokenType.SERDE_PROPERTIES):
2870 self._retreat(index)
2871 return None
2872 return self.expression(
2873 exp.SerdeProperties,
2874 **{ # type: ignore
2875 "expressions": self._parse_wrapped_properties(),
2876 "with": with_,
2877 },
2878 )
2879
2880 def _parse_row_format(
2881 self, match_row: bool = False
2882 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2883 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
2884 return None
2885
2886 if self._match_text_seq("SERDE"):
2887 this = self._parse_string()
2888
2889 serde_properties = self._parse_serde_properties()
2890
2891 return self.expression(
2892 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties
2893 )
2894
2895 self._match_text_seq("DELIMITED")
2896
2897 kwargs = {}
2898
2899 if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
2900 kwargs["fields"] = self._parse_string()
2901 if self._match_text_seq("ESCAPED", "BY"):
2902 kwargs["escaped"] = self._parse_string()
2903 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
2904 kwargs["collection_items"] = self._parse_string()
2905 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
2906 kwargs["map_keys"] = self._parse_string()
2907 if self._match_text_seq("LINES", "TERMINATED", "BY"):
2908 kwargs["lines"] = self._parse_string()
2909 if self._match_text_seq("NULL", "DEFINED", "AS"):
2910 kwargs["null"] = self._parse_string()
2911
2912 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore
2913
2914 def _parse_load(self) -> exp.LoadData | exp.Command:
2915 if self._match_text_seq("DATA"):
2916 local = self._match_text_seq("LOCAL")
2917 self._match_text_seq("INPATH")
2918 inpath = self._parse_string()
2919 overwrite = self._match(TokenType.OVERWRITE)
2920 self._match_pair(TokenType.INTO, TokenType.TABLE)
2921
2922 return self.expression(
2923 exp.LoadData,
2924 this=self._parse_table(schema=True),
2925 local=local,
2926 overwrite=overwrite,
2927 inpath=inpath,
2928 partition=self._parse_partition(),
2929 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
2930 serde=self._match_text_seq("SERDE") and self._parse_string(),
2931 )
2932 return self._parse_as_command(self._prev)
2933
2934 def _parse_delete(self) -> exp.Delete:
2935 # This handles MySQL's "Multiple-Table Syntax"
2936 # https://dev.mysql.com/doc/refman/8.0/en/delete.html
2937 tables = None
2938 if not self._match(TokenType.FROM, advance=False):
2939 tables = self._parse_csv(self._parse_table) or None
2940
2941 returning = self._parse_returning()
2942
2943 return self.expression(
2944 exp.Delete,
2945 tables=tables,
2946 this=self._match(TokenType.FROM) and self._parse_table(joins=True),
2947 using=self._match(TokenType.USING) and self._parse_table(joins=True),
2948 cluster=self._match(TokenType.ON) and self._parse_on_property(),
2949 where=self._parse_where(),
2950 returning=returning or self._parse_returning(),
2951 limit=self._parse_limit(),
2952 )
2953
2954 def _parse_update(self) -> exp.Update:
2955 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS)
2956 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
2957 returning = self._parse_returning()
2958 return self.expression(
2959 exp.Update,
2960 **{ # type: ignore
2961 "this": this,
2962 "expressions": expressions,
2963 "from": self._parse_from(joins=True),
2964 "where": self._parse_where(),
2965 "returning": returning or self._parse_returning(),
2966 "order": self._parse_order(),
2967 "limit": self._parse_limit(),
2968 },
2969 )
2970
2971 def _parse_uncache(self) -> exp.Uncache:
2972 if not self._match(TokenType.TABLE):
2973 self.raise_error("Expecting TABLE after UNCACHE")
2974
2975 return self.expression(
2976 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True)
2977 )
2978
2979 def _parse_cache(self) -> exp.Cache:
2980 lazy = self._match_text_seq("LAZY")
2981 self._match(TokenType.TABLE)
2982 table = self._parse_table(schema=True)
2983
2984 options = []
2985 if self._match_text_seq("OPTIONS"):
2986 self._match_l_paren()
2987 k = self._parse_string()
2988 self._match(TokenType.EQ)
2989 v = self._parse_string()
2990 options = [k, v]
2991 self._match_r_paren()
2992
2993 self._match(TokenType.ALIAS)
2994 return self.expression(
2995 exp.Cache,
2996 this=table,
2997 lazy=lazy,
2998 options=options,
2999 expression=self._parse_select(nested=True),
3000 )
3001
3002 def _parse_partition(self) -> t.Optional[exp.Partition]:
3003 if not self._match_texts(self.PARTITION_KEYWORDS):
3004 return None
3005
3006 return self.expression(
3007 exp.Partition,
3008 subpartition=self._prev.text.upper() == "SUBPARTITION",
3009 expressions=self._parse_wrapped_csv(self._parse_assignment),
3010 )
3011
3012 def _parse_value(self) -> t.Optional[exp.Tuple]:
3013 def _parse_value_expression() -> t.Optional[exp.Expression]:
3014 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT):
3015 return exp.var(self._prev.text.upper())
3016 return self._parse_expression()
3017
3018 if self._match(TokenType.L_PAREN):
3019 expressions = self._parse_csv(_parse_value_expression)
3020 self._match_r_paren()
3021 return self.expression(exp.Tuple, expressions=expressions)
3022
3023 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows.
3024 expression = self._parse_expression()
3025 if expression:
3026 return self.expression(exp.Tuple, expressions=[expression])
3027 return None
3028
3029 def _parse_projections(self) -> t.List[exp.Expression]:
3030 return self._parse_expressions()
3031
3032 def _parse_select(
3033 self,
3034 nested: bool = False,
3035 table: bool = False,
3036 parse_subquery_alias: bool = True,
3037 parse_set_operation: bool = True,
3038 ) -> t.Optional[exp.Expression]:
3039 cte = self._parse_with()
3040
3041 if cte:
3042 this = self._parse_statement()
3043
3044 if not this:
3045 self.raise_error("Failed to parse any statement following CTE")
3046 return cte
3047
3048 if "with" in this.arg_types:
3049 this.set("with", cte)
3050 else:
3051 self.raise_error(f"{this.key} does not support CTE")
3052 this = cte
3053
3054 return this
3055
3056 # duckdb supports leading with FROM x
3057 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None
3058
3059 if self._match(TokenType.SELECT):
3060 comments = self._prev_comments
3061
3062 hint = self._parse_hint()
3063
3064 if self._next and not self._next.token_type == TokenType.DOT:
3065 all_ = self._match(TokenType.ALL)
3066 distinct = self._match_set(self.DISTINCT_TOKENS)
3067 else:
3068 all_, distinct = None, None
3069
3070 kind = (
3071 self._match(TokenType.ALIAS)
3072 and self._match_texts(("STRUCT", "VALUE"))
3073 and self._prev.text.upper()
3074 )
3075
3076 if distinct:
3077 distinct = self.expression(
3078 exp.Distinct,
3079 on=self._parse_value() if self._match(TokenType.ON) else None,
3080 )
3081
3082 if all_ and distinct:
3083 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
3084
3085 operation_modifiers = []
3086 while self._curr and self._match_texts(self.OPERATION_MODIFIERS):
3087 operation_modifiers.append(exp.var(self._prev.text.upper()))
3088
3089 limit = self._parse_limit(top=True)
3090 projections = self._parse_projections()
3091
3092 this = self.expression(
3093 exp.Select,
3094 kind=kind,
3095 hint=hint,
3096 distinct=distinct,
3097 expressions=projections,
3098 limit=limit,
3099 operation_modifiers=operation_modifiers or None,
3100 )
3101 this.comments = comments
3102
3103 into = self._parse_into()
3104 if into:
3105 this.set("into", into)
3106
3107 if not from_:
3108 from_ = self._parse_from()
3109
3110 if from_:
3111 this.set("from", from_)
3112
3113 this = self._parse_query_modifiers(this)
3114 elif (table or nested) and self._match(TokenType.L_PAREN):
3115 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)):
3116 this = self._parse_simplified_pivot(
3117 is_unpivot=self._prev.token_type == TokenType.UNPIVOT
3118 )
3119 elif self._match(TokenType.FROM):
3120 from_ = self._parse_from(skip_from_token=True)
3121 # Support parentheses for duckdb FROM-first syntax
3122 select = self._parse_select()
3123 if select:
3124 select.set("from", from_)
3125 this = select
3126 else:
3127 this = exp.select("*").from_(t.cast(exp.From, from_))
3128 else:
3129 this = (
3130 self._parse_table()
3131 if table
3132 else self._parse_select(nested=True, parse_set_operation=False)
3133 )
3134
3135 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers
3136 # in case a modifier (e.g. join) is following
3137 if table and isinstance(this, exp.Values) and this.alias:
3138 alias = this.args["alias"].pop()
3139 this = exp.Table(this=this, alias=alias)
3140
3141 this = self._parse_query_modifiers(self._parse_set_operations(this))
3142
3143 self._match_r_paren()
3144
3145 # We return early here so that the UNION isn't attached to the subquery by the
3146 # following call to _parse_set_operations, but instead becomes the parent node
3147 return self._parse_subquery(this, parse_alias=parse_subquery_alias)
3148 elif self._match(TokenType.VALUES, advance=False):
3149 this = self._parse_derived_table_values()
3150 elif from_:
3151 this = exp.select("*").from_(from_.this, copy=False)
3152 elif self._match(TokenType.SUMMARIZE):
3153 table = self._match(TokenType.TABLE)
3154 this = self._parse_select() or self._parse_string() or self._parse_table()
3155 return self.expression(exp.Summarize, this=this, table=table)
3156 elif self._match(TokenType.DESCRIBE):
3157 this = self._parse_describe()
3158 elif self._match_text_seq("STREAM"):
3159 this = self._parse_function()
3160 if this:
3161 this = self.expression(exp.Stream, this=this)
3162 else:
3163 self._retreat(self._index - 1)
3164 else:
3165 this = None
3166
3167 return self._parse_set_operations(this) if parse_set_operation else this
3168
3169 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
3170 if not skip_with_token and not self._match(TokenType.WITH):
3171 return None
3172
3173 comments = self._prev_comments
3174 recursive = self._match(TokenType.RECURSIVE)
3175
3176 last_comments = None
3177 expressions = []
3178 while True:
3179 expressions.append(self._parse_cte())
3180 if last_comments:
3181 expressions[-1].add_comments(last_comments)
3182
3183 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
3184 break
3185 else:
3186 self._match(TokenType.WITH)
3187
3188 last_comments = self._prev_comments
3189
3190 return self.expression(
3191 exp.With, comments=comments, expressions=expressions, recursive=recursive
3192 )
3193
3194 def _parse_cte(self) -> t.Optional[exp.CTE]:
3195 index = self._index
3196
3197 alias = self._parse_table_alias(self.ID_VAR_TOKENS)
3198 if not alias or not alias.this:
3199 self.raise_error("Expected CTE to have alias")
3200
3201 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE:
3202 self._retreat(index)
3203 return None
3204
3205 comments = self._prev_comments
3206
3207 if self._match_text_seq("NOT", "MATERIALIZED"):
3208 materialized = False
3209 elif self._match_text_seq("MATERIALIZED"):
3210 materialized = True
3211 else:
3212 materialized = None
3213
3214 cte = self.expression(
3215 exp.CTE,
3216 this=self._parse_wrapped(self._parse_statement),
3217 alias=alias,
3218 materialized=materialized,
3219 comments=comments,
3220 )
3221
3222 if isinstance(cte.this, exp.Values):
3223 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True)))
3224
3225 return cte
3226
3227 def _parse_table_alias(
3228 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
3229 ) -> t.Optional[exp.TableAlias]:
3230 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
3231 # so this section tries to parse the clause version and if it fails, it treats the token
3232 # as an identifier (alias)
3233 if self._can_parse_limit_or_offset():
3234 return None
3235
3236 any_token = self._match(TokenType.ALIAS)
3237 alias = (
3238 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
3239 or self._parse_string_as_identifier()
3240 )
3241
3242 index = self._index
3243 if self._match(TokenType.L_PAREN):
3244 columns = self._parse_csv(self._parse_function_parameter)
3245 self._match_r_paren() if columns else self._retreat(index)
3246 else:
3247 columns = None
3248
3249 if not alias and not columns:
3250 return None
3251
3252 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns)
3253
3254 # We bubble up comments from the Identifier to the TableAlias
3255 if isinstance(alias, exp.Identifier):
3256 table_alias.add_comments(alias.pop_comments())
3257
3258 return table_alias
3259
3260 def _parse_subquery(
3261 self, this: t.Optional[exp.Expression], parse_alias: bool = True
3262 ) -> t.Optional[exp.Subquery]:
3263 if not this:
3264 return None
3265
3266 return self.expression(
3267 exp.Subquery,
3268 this=this,
3269 pivots=self._parse_pivots(),
3270 alias=self._parse_table_alias() if parse_alias else None,
3271 sample=self._parse_table_sample(),
3272 )
3273
3274 def _implicit_unnests_to_explicit(self, this: E) -> E:
3275 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm
3276
3277 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name}
3278 for i, join in enumerate(this.args.get("joins") or []):
3279 table = join.this
3280 normalized_table = table.copy()
3281 normalized_table.meta["maybe_column"] = True
3282 normalized_table = _norm(normalized_table, dialect=self.dialect)
3283
3284 if isinstance(table, exp.Table) and not join.args.get("on"):
3285 if normalized_table.parts[0].name in refs:
3286 table_as_column = table.to_column()
3287 unnest = exp.Unnest(expressions=[table_as_column])
3288
3289 # Table.to_column creates a parent Alias node that we want to convert to
3290 # a TableAlias and attach to the Unnest, so it matches the parser's output
3291 if isinstance(table.args.get("alias"), exp.TableAlias):
3292 table_as_column.replace(table_as_column.this)
3293 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False)
3294
3295 table.replace(unnest)
3296
3297 refs.add(normalized_table.alias_or_name)
3298
3299 return this
3300
3301 def _parse_query_modifiers(
3302 self, this: t.Optional[exp.Expression]
3303 ) -> t.Optional[exp.Expression]:
3304 if isinstance(this, (exp.Query, exp.Table)):
3305 for join in self._parse_joins():
3306 this.append("joins", join)
3307 for lateral in iter(self._parse_lateral, None):
3308 this.append("laterals", lateral)
3309
3310 while True:
3311 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False):
3312 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type]
3313 key, expression = parser(self)
3314
3315 if expression:
3316 this.set(key, expression)
3317 if key == "limit":
3318 offset = expression.args.pop("offset", None)
3319
3320 if offset:
3321 offset = exp.Offset(expression=offset)
3322 this.set("offset", offset)
3323
3324 limit_by_expressions = expression.expressions
3325 expression.set("expressions", None)
3326 offset.set("expressions", limit_by_expressions)
3327 continue
3328 break
3329
3330 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"):
3331 this = self._implicit_unnests_to_explicit(this)
3332
3333 return this
3334
3335 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]:
3336 start = self._curr
3337 while self._curr:
3338 self._advance()
3339
3340 end = self._tokens[self._index - 1]
3341 return exp.Hint(expressions=[self._find_sql(start, end)])
3342
3343 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]:
3344 return self._parse_function_call()
3345
3346 def _parse_hint_body(self) -> t.Optional[exp.Hint]:
3347 start_index = self._index
3348 should_fallback_to_string = False
3349
3350 hints = []
3351 try:
3352 for hint in iter(
3353 lambda: self._parse_csv(
3354 lambda: self._parse_hint_function_call() or self._parse_var(upper=True),
3355 ),
3356 [],
3357 ):
3358 hints.extend(hint)
3359 except ParseError:
3360 should_fallback_to_string = True
3361
3362 if should_fallback_to_string or self._curr:
3363 self._retreat(start_index)
3364 return self._parse_hint_fallback_to_string()
3365
3366 return self.expression(exp.Hint, expressions=hints)
3367
3368 def _parse_hint(self) -> t.Optional[exp.Hint]:
3369 if self._match(TokenType.HINT) and self._prev_comments:
3370 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect)
3371
3372 return None
3373
3374 def _parse_into(self) -> t.Optional[exp.Into]:
3375 if not self._match(TokenType.INTO):
3376 return None
3377
3378 temp = self._match(TokenType.TEMPORARY)
3379 unlogged = self._match_text_seq("UNLOGGED")
3380 self._match(TokenType.TABLE)
3381
3382 return self.expression(
3383 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
3384 )
3385
3386 def _parse_from(
3387 self, joins: bool = False, skip_from_token: bool = False
3388 ) -> t.Optional[exp.From]:
3389 if not skip_from_token and not self._match(TokenType.FROM):
3390 return None
3391
3392 return self.expression(
3393 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
3394 )
3395
3396 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure:
3397 return self.expression(
3398 exp.MatchRecognizeMeasure,
3399 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(),
3400 this=self._parse_expression(),
3401 )
3402
3403 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]:
3404 if not self._match(TokenType.MATCH_RECOGNIZE):
3405 return None
3406
3407 self._match_l_paren()
3408
3409 partition = self._parse_partition_by()
3410 order = self._parse_order()
3411
3412 measures = (
3413 self._parse_csv(self._parse_match_recognize_measure)
3414 if self._match_text_seq("MEASURES")
3415 else None
3416 )
3417
3418 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
3419 rows = exp.var("ONE ROW PER MATCH")
3420 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
3421 text = "ALL ROWS PER MATCH"
3422 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
3423 text += " SHOW EMPTY MATCHES"
3424 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
3425 text += " OMIT EMPTY MATCHES"
3426 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
3427 text += " WITH UNMATCHED ROWS"
3428 rows = exp.var(text)
3429 else:
3430 rows = None
3431
3432 if self._match_text_seq("AFTER", "MATCH", "SKIP"):
3433 text = "AFTER MATCH SKIP"
3434 if self._match_text_seq("PAST", "LAST", "ROW"):
3435 text += " PAST LAST ROW"
3436 elif self._match_text_seq("TO", "NEXT", "ROW"):
3437 text += " TO NEXT ROW"
3438 elif self._match_text_seq("TO", "FIRST"):
3439 text += f" TO FIRST {self._advance_any().text}" # type: ignore
3440 elif self._match_text_seq("TO", "LAST"):
3441 text += f" TO LAST {self._advance_any().text}" # type: ignore
3442 after = exp.var(text)
3443 else:
3444 after = None
3445
3446 if self._match_text_seq("PATTERN"):
3447 self._match_l_paren()
3448
3449 if not self._curr:
3450 self.raise_error("Expecting )", self._curr)
3451
3452 paren = 1
3453 start = self._curr
3454
3455 while self._curr and paren > 0:
3456 if self._curr.token_type == TokenType.L_PAREN:
3457 paren += 1
3458 if self._curr.token_type == TokenType.R_PAREN:
3459 paren -= 1
3460
3461 end = self._prev
3462 self._advance()
3463
3464 if paren > 0:
3465 self.raise_error("Expecting )", self._curr)
3466
3467 pattern = exp.var(self._find_sql(start, end))
3468 else:
3469 pattern = None
3470
3471 define = (
3472 self._parse_csv(self._parse_name_as_expression)
3473 if self._match_text_seq("DEFINE")
3474 else None
3475 )
3476
3477 self._match_r_paren()
3478
3479 return self.expression(
3480 exp.MatchRecognize,
3481 partition_by=partition,
3482 order=order,
3483 measures=measures,
3484 rows=rows,
3485 after=after,
3486 pattern=pattern,
3487 define=define,
3488 alias=self._parse_table_alias(),
3489 )
3490
3491 def _parse_lateral(self) -> t.Optional[exp.Lateral]:
3492 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
3493 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY):
3494 cross_apply = False
3495
3496 if cross_apply is not None:
3497 this = self._parse_select(table=True)
3498 view = None
3499 outer = None
3500 elif self._match(TokenType.LATERAL):
3501 this = self._parse_select(table=True)
3502 view = self._match(TokenType.VIEW)
3503 outer = self._match(TokenType.OUTER)
3504 else:
3505 return None
3506
3507 if not this:
3508 this = (
3509 self._parse_unnest()
3510 or self._parse_function()
3511 or self._parse_id_var(any_token=False)
3512 )
3513
3514 while self._match(TokenType.DOT):
3515 this = exp.Dot(
3516 this=this,
3517 expression=self._parse_function() or self._parse_id_var(any_token=False),
3518 )
3519
3520 if view:
3521 table = self._parse_id_var(any_token=False)
3522 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
3523 table_alias: t.Optional[exp.TableAlias] = self.expression(
3524 exp.TableAlias, this=table, columns=columns
3525 )
3526 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias:
3527 # We move the alias from the lateral's child node to the lateral itself
3528 table_alias = this.args["alias"].pop()
3529 else:
3530 table_alias = self._parse_table_alias()
3531
3532 return self.expression(
3533 exp.Lateral,
3534 this=this,
3535 view=view,
3536 outer=outer,
3537 alias=table_alias,
3538 cross_apply=cross_apply,
3539 )
3540
3541 def _parse_join_parts(
3542 self,
3543 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
3544 return (
3545 self._match_set(self.JOIN_METHODS) and self._prev,
3546 self._match_set(self.JOIN_SIDES) and self._prev,
3547 self._match_set(self.JOIN_KINDS) and self._prev,
3548 )
3549
3550 def _parse_using_identifiers(self) -> t.List[exp.Expression]:
3551 def _parse_column_as_identifier() -> t.Optional[exp.Expression]:
3552 this = self._parse_column()
3553 if isinstance(this, exp.Column):
3554 return this.this
3555 return this
3556
3557 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True)
3558
3559 def _parse_join(
3560 self, skip_join_token: bool = False, parse_bracket: bool = False
3561 ) -> t.Optional[exp.Join]:
3562 if self._match(TokenType.COMMA):
3563 return self.expression(exp.Join, this=self._parse_table())
3564
3565 index = self._index
3566 method, side, kind = self._parse_join_parts()
3567 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
3568 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN)
3569
3570 if not skip_join_token and not join:
3571 self._retreat(index)
3572 kind = None
3573 method = None
3574 side = None
3575
3576 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
3577 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
3578
3579 if not skip_join_token and not join and not outer_apply and not cross_apply:
3580 return None
3581
3582 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)}
3583 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA):
3584 kwargs["expressions"] = self._parse_csv(
3585 lambda: self._parse_table(parse_bracket=parse_bracket)
3586 )
3587
3588 if method:
3589 kwargs["method"] = method.text
3590 if side:
3591 kwargs["side"] = side.text
3592 if kind:
3593 kwargs["kind"] = kind.text
3594 if hint:
3595 kwargs["hint"] = hint
3596
3597 if self._match(TokenType.MATCH_CONDITION):
3598 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
3599
3600 if self._match(TokenType.ON):
3601 kwargs["on"] = self._parse_assignment()
3602 elif self._match(TokenType.USING):
3603 kwargs["using"] = self._parse_using_identifiers()
3604 elif (
3605 not (outer_apply or cross_apply)
3606 and not isinstance(kwargs["this"], exp.Unnest)
3607 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY))
3608 ):
3609 index = self._index
3610 joins: t.Optional[list] = list(self._parse_joins())
3611
3612 if joins and self._match(TokenType.ON):
3613 kwargs["on"] = self._parse_assignment()
3614 elif joins and self._match(TokenType.USING):
3615 kwargs["using"] = self._parse_using_identifiers()
3616 else:
3617 joins = None
3618 self._retreat(index)
3619
3620 kwargs["this"].set("joins", joins if joins else None)
3621
3622 comments = [c for token in (method, side, kind) if token for c in token.comments]
3623 return self.expression(exp.Join, comments=comments, **kwargs)
3624
3625 def _parse_opclass(self) -> t.Optional[exp.Expression]:
3626 this = self._parse_assignment()
3627
3628 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
3629 return this
3630
3631 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False):
3632 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts())
3633
3634 return this
3635
3636 def _parse_index_params(self) -> exp.IndexParameters:
3637 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
3638
3639 if self._match(TokenType.L_PAREN, advance=False):
3640 columns = self._parse_wrapped_csv(self._parse_with_operator)
3641 else:
3642 columns = None
3643
3644 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
3645 partition_by = self._parse_partition_by()
3646 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
3647 tablespace = (
3648 self._parse_var(any_token=True)
3649 if self._match_text_seq("USING", "INDEX", "TABLESPACE")
3650 else None
3651 )
3652 where = self._parse_where()
3653
3654 on = self._parse_field() if self._match(TokenType.ON) else None
3655
3656 return self.expression(
3657 exp.IndexParameters,
3658 using=using,
3659 columns=columns,
3660 include=include,
3661 partition_by=partition_by,
3662 where=where,
3663 with_storage=with_storage,
3664 tablespace=tablespace,
3665 on=on,
3666 )
3667
3668 def _parse_index(
3669 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False
3670 ) -> t.Optional[exp.Index]:
3671 if index or anonymous:
3672 unique = None
3673 primary = None
3674 amp = None
3675
3676 self._match(TokenType.ON)
3677 self._match(TokenType.TABLE) # hive
3678 table = self._parse_table_parts(schema=True)
3679 else:
3680 unique = self._match(TokenType.UNIQUE)
3681 primary = self._match_text_seq("PRIMARY")
3682 amp = self._match_text_seq("AMP")
3683
3684 if not self._match(TokenType.INDEX):
3685 return None
3686
3687 index = self._parse_id_var()
3688 table = None
3689
3690 params = self._parse_index_params()
3691
3692 return self.expression(
3693 exp.Index,
3694 this=index,
3695 table=table,
3696 unique=unique,
3697 primary=primary,
3698 amp=amp,
3699 params=params,
3700 )
3701
3702 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
3703 hints: t.List[exp.Expression] = []
3704 if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
3705 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
3706 hints.append(
3707 self.expression(
3708 exp.WithTableHint,
3709 expressions=self._parse_csv(
3710 lambda: self._parse_function() or self._parse_var(any_token=True)
3711 ),
3712 )
3713 )
3714 self._match_r_paren()
3715 else:
3716 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
3717 while self._match_set(self.TABLE_INDEX_HINT_TOKENS):
3718 hint = exp.IndexTableHint(this=self._prev.text.upper())
3719
3720 self._match_set((TokenType.INDEX, TokenType.KEY))
3721 if self._match(TokenType.FOR):
3722 hint.set("target", self._advance_any() and self._prev.text.upper())
3723
3724 hint.set("expressions", self._parse_wrapped_id_vars())
3725 hints.append(hint)
3726
3727 return hints or None
3728
3729 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
3730 return (
3731 (not schema and self._parse_function(optional_parens=False))
3732 or self._parse_id_var(any_token=False)
3733 or self._parse_string_as_identifier()
3734 or self._parse_placeholder()
3735 )
3736
3737 def _parse_table_parts(
3738 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
3739 ) -> exp.Table:
3740 catalog = None
3741 db = None
3742 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema)
3743
3744 while self._match(TokenType.DOT):
3745 if catalog:
3746 # This allows nesting the table in arbitrarily many dot expressions if needed
3747 table = self.expression(
3748 exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
3749 )
3750 else:
3751 catalog = db
3752 db = table
3753 # "" used for tsql FROM a..b case
3754 table = self._parse_table_part(schema=schema) or ""
3755
3756 if (
3757 wildcard
3758 and self._is_connected()
3759 and (isinstance(table, exp.Identifier) or not table)
3760 and self._match(TokenType.STAR)
3761 ):
3762 if isinstance(table, exp.Identifier):
3763 table.args["this"] += "*"
3764 else:
3765 table = exp.Identifier(this="*")
3766
3767 # We bubble up comments from the Identifier to the Table
3768 comments = table.pop_comments() if isinstance(table, exp.Expression) else None
3769
3770 if is_db_reference:
3771 catalog = db
3772 db = table
3773 table = None
3774
3775 if not table and not is_db_reference:
3776 self.raise_error(f"Expected table name but got {self._curr}")
3777 if not db and is_db_reference:
3778 self.raise_error(f"Expected database name but got {self._curr}")
3779
3780 table = self.expression(
3781 exp.Table,
3782 comments=comments,
3783 this=table,
3784 db=db,
3785 catalog=catalog,
3786 )
3787
3788 changes = self._parse_changes()
3789 if changes:
3790 table.set("changes", changes)
3791
3792 at_before = self._parse_historical_data()
3793 if at_before:
3794 table.set("when", at_before)
3795
3796 pivots = self._parse_pivots()
3797 if pivots:
3798 table.set("pivots", pivots)
3799
3800 return table
3801
3802 def _parse_table(
3803 self,
3804 schema: bool = False,
3805 joins: bool = False,
3806 alias_tokens: t.Optional[t.Collection[TokenType]] = None,
3807 parse_bracket: bool = False,
3808 is_db_reference: bool = False,
3809 parse_partition: bool = False,
3810 ) -> t.Optional[exp.Expression]:
3811 lateral = self._parse_lateral()
3812 if lateral:
3813 return lateral
3814
3815 unnest = self._parse_unnest()
3816 if unnest:
3817 return unnest
3818
3819 values = self._parse_derived_table_values()
3820 if values:
3821 return values
3822
3823 subquery = self._parse_select(table=True)
3824 if subquery:
3825 if not subquery.args.get("pivots"):
3826 subquery.set("pivots", self._parse_pivots())
3827 return subquery
3828
3829 bracket = parse_bracket and self._parse_bracket(None)
3830 bracket = self.expression(exp.Table, this=bracket) if bracket else None
3831
3832 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv