sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 813 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 814 exp.Group: lambda self: self._parse_group(), 815 exp.Having: lambda self: self._parse_having(), 816 exp.Hint: lambda self: self._parse_hint_body(), 817 exp.Identifier: lambda self: self._parse_id_var(), 818 exp.Join: lambda self: self._parse_join(), 819 exp.Lambda: lambda self: self._parse_lambda(), 820 exp.Lateral: lambda self: self._parse_lateral(), 821 exp.Limit: lambda self: self._parse_limit(), 822 exp.Offset: lambda self: self._parse_offset(), 823 exp.Order: lambda self: self._parse_order(), 824 exp.Ordered: lambda self: self._parse_ordered(), 825 exp.Properties: lambda self: self._parse_properties(), 826 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 827 exp.Qualify: lambda self: self._parse_qualify(), 828 exp.Returning: lambda self: self._parse_returning(), 829 exp.Select: lambda self: self._parse_select(), 830 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 831 exp.Table: lambda self: self._parse_table_parts(), 832 exp.TableAlias: lambda self: self._parse_table_alias(), 833 exp.Tuple: lambda self: self._parse_value(values=False), 834 exp.Whens: lambda self: self._parse_when_matched(), 835 exp.Where: lambda self: self._parse_where(), 836 exp.Window: lambda self: self._parse_named_window(), 837 exp.With: lambda self: self._parse_with(), 838 "JOIN_TYPE": lambda self: self._parse_join_parts(), 839 } 840 841 STATEMENT_PARSERS = { 842 TokenType.ALTER: lambda self: self._parse_alter(), 843 TokenType.ANALYZE: lambda self: self._parse_analyze(), 844 TokenType.BEGIN: lambda self: self._parse_transaction(), 845 TokenType.CACHE: lambda self: self._parse_cache(), 846 TokenType.COMMENT: lambda self: self._parse_comment(), 847 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 848 TokenType.COPY: lambda self: self._parse_copy(), 849 TokenType.CREATE: lambda self: self._parse_create(), 850 TokenType.DELETE: lambda self: self._parse_delete(), 851 TokenType.DESC: lambda self: self._parse_describe(), 852 TokenType.DESCRIBE: lambda self: self._parse_describe(), 853 TokenType.DROP: lambda self: self._parse_drop(), 854 TokenType.GRANT: lambda self: self._parse_grant(), 855 TokenType.REVOKE: lambda self: self._parse_revoke(), 856 TokenType.INSERT: lambda self: self._parse_insert(), 857 TokenType.KILL: lambda self: self._parse_kill(), 858 TokenType.LOAD: lambda self: self._parse_load(), 859 TokenType.MERGE: lambda self: self._parse_merge(), 860 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 861 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 862 TokenType.REFRESH: lambda self: self._parse_refresh(), 863 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 864 TokenType.SET: lambda self: self._parse_set(), 865 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 866 TokenType.UNCACHE: lambda self: self._parse_uncache(), 867 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 868 TokenType.UPDATE: lambda self: self._parse_update(), 869 TokenType.USE: lambda self: self._parse_use(), 870 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 871 } 872 873 UNARY_PARSERS = { 874 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 875 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 876 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 877 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 878 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 879 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 880 } 881 882 STRING_PARSERS = { 883 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 884 exp.RawString, this=token.text 885 ), 886 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 887 exp.National, this=token.text 888 ), 889 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 890 TokenType.STRING: lambda self, token: self.expression( 891 exp.Literal, this=token.text, is_string=True 892 ), 893 TokenType.UNICODE_STRING: lambda self, token: self.expression( 894 exp.UnicodeString, 895 this=token.text, 896 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 897 ), 898 } 899 900 NUMERIC_PARSERS = { 901 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 902 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 903 TokenType.HEX_STRING: lambda self, token: self.expression( 904 exp.HexString, 905 this=token.text, 906 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 907 ), 908 TokenType.NUMBER: lambda self, token: self.expression( 909 exp.Literal, this=token.text, is_string=False 910 ), 911 } 912 913 PRIMARY_PARSERS = { 914 **STRING_PARSERS, 915 **NUMERIC_PARSERS, 916 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 917 TokenType.NULL: lambda self, _: self.expression(exp.Null), 918 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 919 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 920 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 921 TokenType.STAR: lambda self, _: self._parse_star_ops(), 922 } 923 924 PLACEHOLDER_PARSERS = { 925 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 926 TokenType.PARAMETER: lambda self: self._parse_parameter(), 927 TokenType.COLON: lambda self: ( 928 self.expression(exp.Placeholder, this=self._prev.text) 929 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 930 else None 931 ), 932 } 933 934 RANGE_PARSERS = { 935 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 936 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 937 TokenType.GLOB: binary_range_parser(exp.Glob), 938 TokenType.ILIKE: binary_range_parser(exp.ILike), 939 TokenType.IN: lambda self, this: self._parse_in(this), 940 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 941 TokenType.IS: lambda self, this: self._parse_is(this), 942 TokenType.LIKE: binary_range_parser(exp.Like), 943 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 944 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 945 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 946 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 947 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 948 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 949 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 950 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 951 } 952 953 PIPE_SYNTAX_TRANSFORM_PARSERS = { 954 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 955 "AS": lambda self, query: self._build_pipe_cte( 956 query, [exp.Star()], self._parse_table_alias() 957 ), 958 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 959 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 960 "ORDER BY": lambda self, query: query.order_by( 961 self._parse_order(), append=False, copy=False 962 ), 963 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 965 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 966 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 967 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 968 } 969 970 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 971 "ALLOWED_VALUES": lambda self: self.expression( 972 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 973 ), 974 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 975 "AUTO": lambda self: self._parse_auto_property(), 976 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 977 "BACKUP": lambda self: self.expression( 978 exp.BackupProperty, this=self._parse_var(any_token=True) 979 ), 980 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 981 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHECKSUM": lambda self: self._parse_checksum(), 984 "CLUSTER BY": lambda self: self._parse_cluster(), 985 "CLUSTERED": lambda self: self._parse_clustered_by(), 986 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 987 exp.CollateProperty, **kwargs 988 ), 989 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 990 "CONTAINS": lambda self: self._parse_contains_property(), 991 "COPY": lambda self: self._parse_copy_property(), 992 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 993 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 994 "DEFINER": lambda self: self._parse_definer(), 995 "DETERMINISTIC": lambda self: self.expression( 996 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 997 ), 998 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 999 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1000 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1001 "DISTKEY": lambda self: self._parse_distkey(), 1002 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1003 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1004 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1005 "ENVIRONMENT": lambda self: self.expression( 1006 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1007 ), 1008 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1009 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1010 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1011 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1012 "FREESPACE": lambda self: self._parse_freespace(), 1013 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1014 "HEAP": lambda self: self.expression(exp.HeapProperty), 1015 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1016 "IMMUTABLE": lambda self: self.expression( 1017 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1018 ), 1019 "INHERITS": lambda self: self.expression( 1020 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1021 ), 1022 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1023 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1024 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1025 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1026 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1027 "LIKE": lambda self: self._parse_create_like(), 1028 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1029 "LOCK": lambda self: self._parse_locking(), 1030 "LOCKING": lambda self: self._parse_locking(), 1031 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1032 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1033 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1034 "MODIFIES": lambda self: self._parse_modifies_property(), 1035 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1036 "NO": lambda self: self._parse_no_property(), 1037 "ON": lambda self: self._parse_on_property(), 1038 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1039 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1040 "PARTITION": lambda self: self._parse_partitioned_of(), 1041 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1044 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1045 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1046 "READS": lambda self: self._parse_reads_property(), 1047 "REMOTE": lambda self: self._parse_remote_with_connection(), 1048 "RETURNS": lambda self: self._parse_returns(), 1049 "STRICT": lambda self: self.expression(exp.StrictProperty), 1050 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1051 "ROW": lambda self: self._parse_row(), 1052 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1053 "SAMPLE": lambda self: self.expression( 1054 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1055 ), 1056 "SECURE": lambda self: self.expression(exp.SecureProperty), 1057 "SECURITY": lambda self: self._parse_security(), 1058 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1059 "SETTINGS": lambda self: self._parse_settings_property(), 1060 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1061 "SORTKEY": lambda self: self._parse_sortkey(), 1062 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1063 "STABLE": lambda self: self.expression( 1064 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1065 ), 1066 "STORED": lambda self: self._parse_stored(), 1067 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1068 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1069 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1070 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1071 "TO": lambda self: self._parse_to_table(), 1072 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1073 "TRANSFORM": lambda self: self.expression( 1074 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1075 ), 1076 "TTL": lambda self: self._parse_ttl(), 1077 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1078 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1079 "VOLATILE": lambda self: self._parse_volatile_property(), 1080 "WITH": lambda self: self._parse_with_property(), 1081 } 1082 1083 CONSTRAINT_PARSERS = { 1084 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1085 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1086 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1087 "CHARACTER SET": lambda self: self.expression( 1088 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1089 ), 1090 "CHECK": lambda self: self.expression( 1091 exp.CheckColumnConstraint, 1092 this=self._parse_wrapped(self._parse_assignment), 1093 enforced=self._match_text_seq("ENFORCED"), 1094 ), 1095 "COLLATE": lambda self: self.expression( 1096 exp.CollateColumnConstraint, 1097 this=self._parse_identifier() or self._parse_column(), 1098 ), 1099 "COMMENT": lambda self: self.expression( 1100 exp.CommentColumnConstraint, this=self._parse_string() 1101 ), 1102 "COMPRESS": lambda self: self._parse_compress(), 1103 "CLUSTERED": lambda self: self.expression( 1104 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "NONCLUSTERED": lambda self: self.expression( 1107 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1108 ), 1109 "DEFAULT": lambda self: self.expression( 1110 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1111 ), 1112 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1113 "EPHEMERAL": lambda self: self.expression( 1114 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1115 ), 1116 "EXCLUDE": lambda self: self.expression( 1117 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1118 ), 1119 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1120 "FORMAT": lambda self: self.expression( 1121 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1122 ), 1123 "GENERATED": lambda self: self._parse_generated_as_identity(), 1124 "IDENTITY": lambda self: self._parse_auto_increment(), 1125 "INLINE": lambda self: self._parse_inline(), 1126 "LIKE": lambda self: self._parse_create_like(), 1127 "NOT": lambda self: self._parse_not_constraint(), 1128 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1129 "ON": lambda self: ( 1130 self._match(TokenType.UPDATE) 1131 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1132 ) 1133 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1134 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1135 "PERIOD": lambda self: self._parse_period_for_system_time(), 1136 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1137 "REFERENCES": lambda self: self._parse_references(match=False), 1138 "TITLE": lambda self: self.expression( 1139 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1140 ), 1141 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1142 "UNIQUE": lambda self: self._parse_unique(), 1143 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1144 "WITH": lambda self: self.expression( 1145 exp.Properties, expressions=self._parse_wrapped_properties() 1146 ), 1147 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 } 1150 1151 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1152 if not self._match(TokenType.L_PAREN, advance=False): 1153 # Partitioning by bucket or truncate follows the syntax: 1154 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1155 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1156 self._retreat(self._index - 1) 1157 return None 1158 1159 klass = ( 1160 exp.PartitionedByBucket 1161 if self._prev.text.upper() == "BUCKET" 1162 else exp.PartitionByTruncate 1163 ) 1164 1165 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1166 this, expression = seq_get(args, 0), seq_get(args, 1) 1167 1168 if isinstance(this, exp.Literal): 1169 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1170 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1171 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1172 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1173 # 1174 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1175 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1176 this, expression = expression, this 1177 1178 return self.expression(klass, this=this, expression=expression) 1179 1180 ALTER_PARSERS = { 1181 "ADD": lambda self: self._parse_alter_table_add(), 1182 "AS": lambda self: self._parse_select(), 1183 "ALTER": lambda self: self._parse_alter_table_alter(), 1184 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1185 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1186 "DROP": lambda self: self._parse_alter_table_drop(), 1187 "RENAME": lambda self: self._parse_alter_table_rename(), 1188 "SET": lambda self: self._parse_alter_table_set(), 1189 "SWAP": lambda self: self.expression( 1190 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1191 ), 1192 } 1193 1194 ALTER_ALTER_PARSERS = { 1195 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1196 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1197 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1198 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1199 } 1200 1201 SCHEMA_UNNAMED_CONSTRAINTS = { 1202 "CHECK", 1203 "EXCLUDE", 1204 "FOREIGN KEY", 1205 "LIKE", 1206 "PERIOD", 1207 "PRIMARY KEY", 1208 "UNIQUE", 1209 "BUCKET", 1210 "TRUNCATE", 1211 } 1212 1213 NO_PAREN_FUNCTION_PARSERS = { 1214 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1215 "CASE": lambda self: self._parse_case(), 1216 "CONNECT_BY_ROOT": lambda self: self.expression( 1217 exp.ConnectByRoot, this=self._parse_column() 1218 ), 1219 "IF": lambda self: self._parse_if(), 1220 } 1221 1222 INVALID_FUNC_NAME_TOKENS = { 1223 TokenType.IDENTIFIER, 1224 TokenType.STRING, 1225 } 1226 1227 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1228 1229 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1230 1231 FUNCTION_PARSERS = { 1232 **{ 1233 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1234 }, 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1237 }, 1238 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1239 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1240 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1241 "DECODE": lambda self: self._parse_decode(), 1242 "EXTRACT": lambda self: self._parse_extract(), 1243 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1244 "GAP_FILL": lambda self: self._parse_gap_fill(), 1245 "JSON_OBJECT": lambda self: self._parse_json_object(), 1246 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1247 "JSON_TABLE": lambda self: self._parse_json_table(), 1248 "MATCH": lambda self: self._parse_match_against(), 1249 "NORMALIZE": lambda self: self._parse_normalize(), 1250 "OPENJSON": lambda self: self._parse_open_json(), 1251 "OVERLAY": lambda self: self._parse_overlay(), 1252 "POSITION": lambda self: self._parse_position(), 1253 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "STRING_AGG": lambda self: self._parse_string_agg(), 1255 "SUBSTRING": lambda self: self._parse_substring(), 1256 "TRIM": lambda self: self._parse_trim(), 1257 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1259 "XMLELEMENT": lambda self: self.expression( 1260 exp.XMLElement, 1261 this=self._match_text_seq("NAME") and self._parse_id_var(), 1262 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1263 ), 1264 "XMLTABLE": lambda self: self._parse_xml_table(), 1265 } 1266 1267 QUERY_MODIFIER_PARSERS = { 1268 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1269 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1270 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1271 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1272 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1273 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1274 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1275 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1276 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1277 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1278 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1279 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1280 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1281 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.CLUSTER_BY: lambda self: ( 1284 "cluster", 1285 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1286 ), 1287 TokenType.DISTRIBUTE_BY: lambda self: ( 1288 "distribute", 1289 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1290 ), 1291 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1292 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1293 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1294 } 1295 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1296 1297 SET_PARSERS = { 1298 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1299 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1300 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1301 "TRANSACTION": lambda self: self._parse_set_transaction(), 1302 } 1303 1304 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1305 1306 TYPE_LITERAL_PARSERS = { 1307 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1308 } 1309 1310 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1311 1312 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1313 1314 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1315 1316 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1317 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1318 "ISOLATION": ( 1319 ("LEVEL", "REPEATABLE", "READ"), 1320 ("LEVEL", "READ", "COMMITTED"), 1321 ("LEVEL", "READ", "UNCOMITTED"), 1322 ("LEVEL", "SERIALIZABLE"), 1323 ), 1324 "READ": ("WRITE", "ONLY"), 1325 } 1326 1327 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1328 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1329 ) 1330 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1331 1332 CREATE_SEQUENCE: OPTIONS_TYPE = { 1333 "SCALE": ("EXTEND", "NOEXTEND"), 1334 "SHARD": ("EXTEND", "NOEXTEND"), 1335 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1336 **dict.fromkeys( 1337 ( 1338 "SESSION", 1339 "GLOBAL", 1340 "KEEP", 1341 "NOKEEP", 1342 "ORDER", 1343 "NOORDER", 1344 "NOCACHE", 1345 "CYCLE", 1346 "NOCYCLE", 1347 "NOMINVALUE", 1348 "NOMAXVALUE", 1349 "NOSCALE", 1350 "NOSHARD", 1351 ), 1352 tuple(), 1353 ), 1354 } 1355 1356 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1357 1358 USABLES: OPTIONS_TYPE = dict.fromkeys( 1359 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1360 ) 1361 1362 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1363 1364 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1365 "TYPE": ("EVOLUTION",), 1366 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1367 } 1368 1369 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1370 1371 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1372 1373 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1374 "NOT": ("ENFORCED",), 1375 "MATCH": ( 1376 "FULL", 1377 "PARTIAL", 1378 "SIMPLE", 1379 ), 1380 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1381 "USING": ( 1382 "BTREE", 1383 "HASH", 1384 ), 1385 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1386 } 1387 1388 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1389 "NO": ("OTHERS",), 1390 "CURRENT": ("ROW",), 1391 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1392 } 1393 1394 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1395 1396 CLONE_KEYWORDS = {"CLONE", "COPY"} 1397 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1398 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1399 1400 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1401 1402 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1403 1404 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1405 1406 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1407 1408 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1409 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1410 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1411 1412 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1413 1414 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1415 1416 ADD_CONSTRAINT_TOKENS = { 1417 TokenType.CONSTRAINT, 1418 TokenType.FOREIGN_KEY, 1419 TokenType.INDEX, 1420 TokenType.KEY, 1421 TokenType.PRIMARY_KEY, 1422 TokenType.UNIQUE, 1423 } 1424 1425 DISTINCT_TOKENS = {TokenType.DISTINCT} 1426 1427 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1428 1429 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1430 1431 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1432 1433 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1434 1435 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1436 1437 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1438 1439 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1440 1441 # The style options for the DESCRIBE statement 1442 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1443 1444 # The style options for the ANALYZE statement 1445 ANALYZE_STYLES = { 1446 "BUFFER_USAGE_LIMIT", 1447 "FULL", 1448 "LOCAL", 1449 "NO_WRITE_TO_BINLOG", 1450 "SAMPLE", 1451 "SKIP_LOCKED", 1452 "VERBOSE", 1453 } 1454 1455 ANALYZE_EXPRESSION_PARSERS = { 1456 "ALL": lambda self: self._parse_analyze_columns(), 1457 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1458 "DELETE": lambda self: self._parse_analyze_delete(), 1459 "DROP": lambda self: self._parse_analyze_histogram(), 1460 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1461 "LIST": lambda self: self._parse_analyze_list(), 1462 "PREDICATE": lambda self: self._parse_analyze_columns(), 1463 "UPDATE": lambda self: self._parse_analyze_histogram(), 1464 "VALIDATE": lambda self: self._parse_analyze_validate(), 1465 } 1466 1467 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1468 1469 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1470 1471 OPERATION_MODIFIERS: t.Set[str] = set() 1472 1473 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1474 1475 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1476 1477 STRICT_CAST = True 1478 1479 PREFIXED_PIVOT_COLUMNS = False 1480 IDENTIFY_PIVOT_STRINGS = False 1481 1482 LOG_DEFAULTS_TO_LN = False 1483 1484 # Whether the table sample clause expects CSV syntax 1485 TABLESAMPLE_CSV = False 1486 1487 # The default method used for table sampling 1488 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1489 1490 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1491 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1492 1493 # Whether the TRIM function expects the characters to trim as its first argument 1494 TRIM_PATTERN_FIRST = False 1495 1496 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1497 STRING_ALIASES = False 1498 1499 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1500 MODIFIERS_ATTACHED_TO_SET_OP = True 1501 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1502 1503 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1504 NO_PAREN_IF_COMMANDS = True 1505 1506 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1507 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1508 1509 # Whether the `:` operator is used to extract a value from a VARIANT column 1510 COLON_IS_VARIANT_EXTRACT = False 1511 1512 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1513 # If this is True and '(' is not found, the keyword will be treated as an identifier 1514 VALUES_FOLLOWED_BY_PAREN = True 1515 1516 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1517 SUPPORTS_IMPLICIT_UNNEST = False 1518 1519 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1520 INTERVAL_SPANS = True 1521 1522 # Whether a PARTITION clause can follow a table reference 1523 SUPPORTS_PARTITION_SELECTION = False 1524 1525 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1526 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1527 1528 # Whether the 'AS' keyword is optional in the CTE definition syntax 1529 OPTIONAL_ALIAS_TOKEN_CTE = True 1530 1531 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1532 ALTER_RENAME_REQUIRES_COLUMN = True 1533 1534 # Whether Alter statements are allowed to contain Partition specifications 1535 ALTER_TABLE_PARTITIONS = False 1536 1537 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1538 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1539 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1540 # as BigQuery, where all joins have the same precedence. 1541 JOINS_HAVE_EQUAL_PRECEDENCE = False 1542 1543 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1544 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1545 1546 # Whether map literals support arbitrary expressions as keys. 1547 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1548 # When False, keys are typically restricted to identifiers. 1549 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1550 1551 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1552 # is true for Snowflake but not for BigQuery which can also process strings 1553 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1554 1555 # Dialects like Databricks support JOINS without join criteria 1556 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1557 ADD_JOIN_ON_TRUE = False 1558 1559 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1560 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1561 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1562 1563 __slots__ = ( 1564 "error_level", 1565 "error_message_context", 1566 "max_errors", 1567 "dialect", 1568 "sql", 1569 "errors", 1570 "_tokens", 1571 "_index", 1572 "_curr", 1573 "_next", 1574 "_prev", 1575 "_prev_comments", 1576 "_pipe_cte_counter", 1577 ) 1578 1579 # Autofilled 1580 SHOW_TRIE: t.Dict = {} 1581 SET_TRIE: t.Dict = {} 1582 1583 def __init__( 1584 self, 1585 error_level: t.Optional[ErrorLevel] = None, 1586 error_message_context: int = 100, 1587 max_errors: int = 3, 1588 dialect: DialectType = None, 1589 ): 1590 from sqlglot.dialects import Dialect 1591 1592 self.error_level = error_level or ErrorLevel.IMMEDIATE 1593 self.error_message_context = error_message_context 1594 self.max_errors = max_errors 1595 self.dialect = Dialect.get_or_raise(dialect) 1596 self.reset() 1597 1598 def reset(self): 1599 self.sql = "" 1600 self.errors = [] 1601 self._tokens = [] 1602 self._index = 0 1603 self._curr = None 1604 self._next = None 1605 self._prev = None 1606 self._prev_comments = None 1607 self._pipe_cte_counter = 0 1608 1609 def parse( 1610 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1611 ) -> t.List[t.Optional[exp.Expression]]: 1612 """ 1613 Parses a list of tokens and returns a list of syntax trees, one tree 1614 per parsed SQL statement. 1615 1616 Args: 1617 raw_tokens: The list of tokens. 1618 sql: The original SQL string, used to produce helpful debug messages. 1619 1620 Returns: 1621 The list of the produced syntax trees. 1622 """ 1623 return self._parse( 1624 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1625 ) 1626 1627 def parse_into( 1628 self, 1629 expression_types: exp.IntoType, 1630 raw_tokens: t.List[Token], 1631 sql: t.Optional[str] = None, 1632 ) -> t.List[t.Optional[exp.Expression]]: 1633 """ 1634 Parses a list of tokens into a given Expression type. If a collection of Expression 1635 types is given instead, this method will try to parse the token list into each one 1636 of them, stopping at the first for which the parsing succeeds. 1637 1638 Args: 1639 expression_types: The expression type(s) to try and parse the token list into. 1640 raw_tokens: The list of tokens. 1641 sql: The original SQL string, used to produce helpful debug messages. 1642 1643 Returns: 1644 The target Expression. 1645 """ 1646 errors = [] 1647 for expression_type in ensure_list(expression_types): 1648 parser = self.EXPRESSION_PARSERS.get(expression_type) 1649 if not parser: 1650 raise TypeError(f"No parser registered for {expression_type}") 1651 1652 try: 1653 return self._parse(parser, raw_tokens, sql) 1654 except ParseError as e: 1655 e.errors[0]["into_expression"] = expression_type 1656 errors.append(e) 1657 1658 raise ParseError( 1659 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1660 errors=merge_errors(errors), 1661 ) from errors[-1] 1662 1663 def _parse( 1664 self, 1665 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1666 raw_tokens: t.List[Token], 1667 sql: t.Optional[str] = None, 1668 ) -> t.List[t.Optional[exp.Expression]]: 1669 self.reset() 1670 self.sql = sql or "" 1671 1672 total = len(raw_tokens) 1673 chunks: t.List[t.List[Token]] = [[]] 1674 1675 for i, token in enumerate(raw_tokens): 1676 if token.token_type == TokenType.SEMICOLON: 1677 if token.comments: 1678 chunks.append([token]) 1679 1680 if i < total - 1: 1681 chunks.append([]) 1682 else: 1683 chunks[-1].append(token) 1684 1685 expressions = [] 1686 1687 for tokens in chunks: 1688 self._index = -1 1689 self._tokens = tokens 1690 self._advance() 1691 1692 expressions.append(parse_method(self)) 1693 1694 if self._index < len(self._tokens): 1695 self.raise_error("Invalid expression / Unexpected token") 1696 1697 self.check_errors() 1698 1699 return expressions 1700 1701 def check_errors(self) -> None: 1702 """Logs or raises any found errors, depending on the chosen error level setting.""" 1703 if self.error_level == ErrorLevel.WARN: 1704 for error in self.errors: 1705 logger.error(str(error)) 1706 elif self.error_level == ErrorLevel.RAISE and self.errors: 1707 raise ParseError( 1708 concat_messages(self.errors, self.max_errors), 1709 errors=merge_errors(self.errors), 1710 ) 1711 1712 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1713 """ 1714 Appends an error in the list of recorded errors or raises it, depending on the chosen 1715 error level setting. 1716 """ 1717 token = token or self._curr or self._prev or Token.string("") 1718 start = token.start 1719 end = token.end + 1 1720 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1721 highlight = self.sql[start:end] 1722 end_context = self.sql[end : end + self.error_message_context] 1723 1724 error = ParseError.new( 1725 f"{message}. Line {token.line}, Col: {token.col}.\n" 1726 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1727 description=message, 1728 line=token.line, 1729 col=token.col, 1730 start_context=start_context, 1731 highlight=highlight, 1732 end_context=end_context, 1733 ) 1734 1735 if self.error_level == ErrorLevel.IMMEDIATE: 1736 raise error 1737 1738 self.errors.append(error) 1739 1740 def expression( 1741 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1742 ) -> E: 1743 """ 1744 Creates a new, validated Expression. 1745 1746 Args: 1747 exp_class: The expression class to instantiate. 1748 comments: An optional list of comments to attach to the expression. 1749 kwargs: The arguments to set for the expression along with their respective values. 1750 1751 Returns: 1752 The target expression. 1753 """ 1754 instance = exp_class(**kwargs) 1755 instance.add_comments(comments) if comments else self._add_comments(instance) 1756 return self.validate_expression(instance) 1757 1758 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1759 if expression and self._prev_comments: 1760 expression.add_comments(self._prev_comments) 1761 self._prev_comments = None 1762 1763 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1764 """ 1765 Validates an Expression, making sure that all its mandatory arguments are set. 1766 1767 Args: 1768 expression: The expression to validate. 1769 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1770 1771 Returns: 1772 The validated expression. 1773 """ 1774 if self.error_level != ErrorLevel.IGNORE: 1775 for error_message in expression.error_messages(args): 1776 self.raise_error(error_message) 1777 1778 return expression 1779 1780 def _find_sql(self, start: Token, end: Token) -> str: 1781 return self.sql[start.start : end.end + 1] 1782 1783 def _is_connected(self) -> bool: 1784 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1785 1786 def _advance(self, times: int = 1) -> None: 1787 self._index += times 1788 self._curr = seq_get(self._tokens, self._index) 1789 self._next = seq_get(self._tokens, self._index + 1) 1790 1791 if self._index > 0: 1792 self._prev = self._tokens[self._index - 1] 1793 self._prev_comments = self._prev.comments 1794 else: 1795 self._prev = None 1796 self._prev_comments = None 1797 1798 def _retreat(self, index: int) -> None: 1799 if index != self._index: 1800 self._advance(index - self._index) 1801 1802 def _warn_unsupported(self) -> None: 1803 if len(self._tokens) <= 1: 1804 return 1805 1806 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1807 # interested in emitting a warning for the one being currently processed. 1808 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1809 1810 logger.warning( 1811 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1812 ) 1813 1814 def _parse_command(self) -> exp.Command: 1815 self._warn_unsupported() 1816 return self.expression( 1817 exp.Command, 1818 comments=self._prev_comments, 1819 this=self._prev.text.upper(), 1820 expression=self._parse_string(), 1821 ) 1822 1823 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1824 """ 1825 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1826 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1827 solve this by setting & resetting the parser state accordingly 1828 """ 1829 index = self._index 1830 error_level = self.error_level 1831 1832 self.error_level = ErrorLevel.IMMEDIATE 1833 try: 1834 this = parse_method() 1835 except ParseError: 1836 this = None 1837 finally: 1838 if not this or retreat: 1839 self._retreat(index) 1840 self.error_level = error_level 1841 1842 return this 1843 1844 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1845 start = self._prev 1846 exists = self._parse_exists() if allow_exists else None 1847 1848 self._match(TokenType.ON) 1849 1850 materialized = self._match_text_seq("MATERIALIZED") 1851 kind = self._match_set(self.CREATABLES) and self._prev 1852 if not kind: 1853 return self._parse_as_command(start) 1854 1855 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1856 this = self._parse_user_defined_function(kind=kind.token_type) 1857 elif kind.token_type == TokenType.TABLE: 1858 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1859 elif kind.token_type == TokenType.COLUMN: 1860 this = self._parse_column() 1861 else: 1862 this = self._parse_id_var() 1863 1864 self._match(TokenType.IS) 1865 1866 return self.expression( 1867 exp.Comment, 1868 this=this, 1869 kind=kind.text, 1870 expression=self._parse_string(), 1871 exists=exists, 1872 materialized=materialized, 1873 ) 1874 1875 def _parse_to_table( 1876 self, 1877 ) -> exp.ToTableProperty: 1878 table = self._parse_table_parts(schema=True) 1879 return self.expression(exp.ToTableProperty, this=table) 1880 1881 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1882 def _parse_ttl(self) -> exp.Expression: 1883 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1884 this = self._parse_bitwise() 1885 1886 if self._match_text_seq("DELETE"): 1887 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1888 if self._match_text_seq("RECOMPRESS"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1891 ) 1892 if self._match_text_seq("TO", "DISK"): 1893 return self.expression( 1894 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1895 ) 1896 if self._match_text_seq("TO", "VOLUME"): 1897 return self.expression( 1898 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1899 ) 1900 1901 return this 1902 1903 expressions = self._parse_csv(_parse_ttl_action) 1904 where = self._parse_where() 1905 group = self._parse_group() 1906 1907 aggregates = None 1908 if group and self._match(TokenType.SET): 1909 aggregates = self._parse_csv(self._parse_set_item) 1910 1911 return self.expression( 1912 exp.MergeTreeTTL, 1913 expressions=expressions, 1914 where=where, 1915 group=group, 1916 aggregates=aggregates, 1917 ) 1918 1919 def _parse_statement(self) -> t.Optional[exp.Expression]: 1920 if self._curr is None: 1921 return None 1922 1923 if self._match_set(self.STATEMENT_PARSERS): 1924 comments = self._prev_comments 1925 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1926 stmt.add_comments(comments, prepend=True) 1927 return stmt 1928 1929 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1930 return self._parse_command() 1931 1932 expression = self._parse_expression() 1933 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1934 return self._parse_query_modifiers(expression) 1935 1936 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1937 start = self._prev 1938 temporary = self._match(TokenType.TEMPORARY) 1939 materialized = self._match_text_seq("MATERIALIZED") 1940 1941 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1942 if not kind: 1943 return self._parse_as_command(start) 1944 1945 concurrently = self._match_text_seq("CONCURRENTLY") 1946 if_exists = exists or self._parse_exists() 1947 1948 if kind == "COLUMN": 1949 this = self._parse_column() 1950 else: 1951 this = self._parse_table_parts( 1952 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1953 ) 1954 1955 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1956 1957 if self._match(TokenType.L_PAREN, advance=False): 1958 expressions = self._parse_wrapped_csv(self._parse_types) 1959 else: 1960 expressions = None 1961 1962 return self.expression( 1963 exp.Drop, 1964 exists=if_exists, 1965 this=this, 1966 expressions=expressions, 1967 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1968 temporary=temporary, 1969 materialized=materialized, 1970 cascade=self._match_text_seq("CASCADE"), 1971 constraints=self._match_text_seq("CONSTRAINTS"), 1972 purge=self._match_text_seq("PURGE"), 1973 cluster=cluster, 1974 concurrently=concurrently, 1975 ) 1976 1977 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1978 return ( 1979 self._match_text_seq("IF") 1980 and (not not_ or self._match(TokenType.NOT)) 1981 and self._match(TokenType.EXISTS) 1982 ) 1983 1984 def _parse_create(self) -> exp.Create | exp.Command: 1985 # Note: this can't be None because we've matched a statement parser 1986 start = self._prev 1987 1988 replace = ( 1989 start.token_type == TokenType.REPLACE 1990 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1991 or self._match_pair(TokenType.OR, TokenType.ALTER) 1992 ) 1993 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1994 1995 unique = self._match(TokenType.UNIQUE) 1996 1997 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1998 clustered = True 1999 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2000 "COLUMNSTORE" 2001 ): 2002 clustered = False 2003 else: 2004 clustered = None 2005 2006 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2007 self._advance() 2008 2009 properties = None 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not create_token: 2013 # exp.Properties.Location.POST_CREATE 2014 properties = self._parse_properties() 2015 create_token = self._match_set(self.CREATABLES) and self._prev 2016 2017 if not properties or not create_token: 2018 return self._parse_as_command(start) 2019 2020 concurrently = self._match_text_seq("CONCURRENTLY") 2021 exists = self._parse_exists(not_=True) 2022 this = None 2023 expression: t.Optional[exp.Expression] = None 2024 indexes = None 2025 no_schema_binding = None 2026 begin = None 2027 end = None 2028 clone = None 2029 2030 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2031 nonlocal properties 2032 if properties and temp_props: 2033 properties.expressions.extend(temp_props.expressions) 2034 elif temp_props: 2035 properties = temp_props 2036 2037 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2038 this = self._parse_user_defined_function(kind=create_token.token_type) 2039 2040 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2041 extend_props(self._parse_properties()) 2042 2043 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2044 extend_props(self._parse_properties()) 2045 2046 if not expression: 2047 if self._match(TokenType.COMMAND): 2048 expression = self._parse_as_command(self._prev) 2049 else: 2050 begin = self._match(TokenType.BEGIN) 2051 return_ = self._match_text_seq("RETURN") 2052 2053 if self._match(TokenType.STRING, advance=False): 2054 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2055 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2056 expression = self._parse_string() 2057 extend_props(self._parse_properties()) 2058 else: 2059 expression = self._parse_user_defined_function_expression() 2060 2061 end = self._match_text_seq("END") 2062 2063 if return_: 2064 expression = self.expression(exp.Return, this=expression) 2065 elif create_token.token_type == TokenType.INDEX: 2066 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2067 if not self._match(TokenType.ON): 2068 index = self._parse_id_var() 2069 anonymous = False 2070 else: 2071 index = None 2072 anonymous = True 2073 2074 this = self._parse_index(index=index, anonymous=anonymous) 2075 elif create_token.token_type in self.DB_CREATABLES: 2076 table_parts = self._parse_table_parts( 2077 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2078 ) 2079 2080 # exp.Properties.Location.POST_NAME 2081 self._match(TokenType.COMMA) 2082 extend_props(self._parse_properties(before=True)) 2083 2084 this = self._parse_schema(this=table_parts) 2085 2086 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2087 extend_props(self._parse_properties()) 2088 2089 has_alias = self._match(TokenType.ALIAS) 2090 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2091 # exp.Properties.Location.POST_ALIAS 2092 extend_props(self._parse_properties()) 2093 2094 if create_token.token_type == TokenType.SEQUENCE: 2095 expression = self._parse_types() 2096 props = self._parse_properties() 2097 if props: 2098 sequence_props = exp.SequenceProperties() 2099 options = [] 2100 for prop in props: 2101 if isinstance(prop, exp.SequenceProperties): 2102 for arg, value in prop.args.items(): 2103 if arg == "options": 2104 options.extend(value) 2105 else: 2106 sequence_props.set(arg, value) 2107 prop.pop() 2108 2109 if options: 2110 sequence_props.set("options", options) 2111 2112 props.append("expressions", sequence_props) 2113 extend_props(props) 2114 else: 2115 expression = self._parse_ddl_select() 2116 2117 # Some dialects also support using a table as an alias instead of a SELECT. 2118 # Here we fallback to this as an alternative. 2119 if not expression and has_alias: 2120 expression = self._try_parse(self._parse_table_parts) 2121 2122 if create_token.token_type == TokenType.TABLE: 2123 # exp.Properties.Location.POST_EXPRESSION 2124 extend_props(self._parse_properties()) 2125 2126 indexes = [] 2127 while True: 2128 index = self._parse_index() 2129 2130 # exp.Properties.Location.POST_INDEX 2131 extend_props(self._parse_properties()) 2132 if not index: 2133 break 2134 else: 2135 self._match(TokenType.COMMA) 2136 indexes.append(index) 2137 elif create_token.token_type == TokenType.VIEW: 2138 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2139 no_schema_binding = True 2140 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2141 extend_props(self._parse_properties()) 2142 2143 shallow = self._match_text_seq("SHALLOW") 2144 2145 if self._match_texts(self.CLONE_KEYWORDS): 2146 copy = self._prev.text.lower() == "copy" 2147 clone = self.expression( 2148 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2149 ) 2150 2151 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2152 return self._parse_as_command(start) 2153 2154 create_kind_text = create_token.text.upper() 2155 return self.expression( 2156 exp.Create, 2157 this=this, 2158 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2159 replace=replace, 2160 refresh=refresh, 2161 unique=unique, 2162 expression=expression, 2163 exists=exists, 2164 properties=properties, 2165 indexes=indexes, 2166 no_schema_binding=no_schema_binding, 2167 begin=begin, 2168 end=end, 2169 clone=clone, 2170 concurrently=concurrently, 2171 clustered=clustered, 2172 ) 2173 2174 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2175 seq = exp.SequenceProperties() 2176 2177 options = [] 2178 index = self._index 2179 2180 while self._curr: 2181 self._match(TokenType.COMMA) 2182 if self._match_text_seq("INCREMENT"): 2183 self._match_text_seq("BY") 2184 self._match_text_seq("=") 2185 seq.set("increment", self._parse_term()) 2186 elif self._match_text_seq("MINVALUE"): 2187 seq.set("minvalue", self._parse_term()) 2188 elif self._match_text_seq("MAXVALUE"): 2189 seq.set("maxvalue", self._parse_term()) 2190 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2191 self._match_text_seq("=") 2192 seq.set("start", self._parse_term()) 2193 elif self._match_text_seq("CACHE"): 2194 # T-SQL allows empty CACHE which is initialized dynamically 2195 seq.set("cache", self._parse_number() or True) 2196 elif self._match_text_seq("OWNED", "BY"): 2197 # "OWNED BY NONE" is the default 2198 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2199 else: 2200 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2201 if opt: 2202 options.append(opt) 2203 else: 2204 break 2205 2206 seq.set("options", options if options else None) 2207 return None if self._index == index else seq 2208 2209 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2210 # only used for teradata currently 2211 self._match(TokenType.COMMA) 2212 2213 kwargs = { 2214 "no": self._match_text_seq("NO"), 2215 "dual": self._match_text_seq("DUAL"), 2216 "before": self._match_text_seq("BEFORE"), 2217 "default": self._match_text_seq("DEFAULT"), 2218 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2219 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2220 "after": self._match_text_seq("AFTER"), 2221 "minimum": self._match_texts(("MIN", "MINIMUM")), 2222 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2223 } 2224 2225 if self._match_texts(self.PROPERTY_PARSERS): 2226 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2227 try: 2228 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2229 except TypeError: 2230 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2231 2232 return None 2233 2234 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2235 return self._parse_wrapped_csv(self._parse_property) 2236 2237 def _parse_property(self) -> t.Optional[exp.Expression]: 2238 if self._match_texts(self.PROPERTY_PARSERS): 2239 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2240 2241 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2242 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2243 2244 if self._match_text_seq("COMPOUND", "SORTKEY"): 2245 return self._parse_sortkey(compound=True) 2246 2247 if self._match_text_seq("SQL", "SECURITY"): 2248 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2249 2250 index = self._index 2251 2252 seq_props = self._parse_sequence_properties() 2253 if seq_props: 2254 return seq_props 2255 2256 self._retreat(index) 2257 key = self._parse_column() 2258 2259 if not self._match(TokenType.EQ): 2260 self._retreat(index) 2261 return None 2262 2263 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2264 if isinstance(key, exp.Column): 2265 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2266 2267 value = self._parse_bitwise() or self._parse_var(any_token=True) 2268 2269 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2270 if isinstance(value, exp.Column): 2271 value = exp.var(value.name) 2272 2273 return self.expression(exp.Property, this=key, value=value) 2274 2275 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2276 if self._match_text_seq("BY"): 2277 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2278 2279 self._match(TokenType.ALIAS) 2280 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2281 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2282 2283 return self.expression( 2284 exp.FileFormatProperty, 2285 this=( 2286 self.expression( 2287 exp.InputOutputFormat, 2288 input_format=input_format, 2289 output_format=output_format, 2290 ) 2291 if input_format or output_format 2292 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2293 ), 2294 hive_format=True, 2295 ) 2296 2297 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2298 field = self._parse_field() 2299 if isinstance(field, exp.Identifier) and not field.quoted: 2300 field = exp.var(field) 2301 2302 return field 2303 2304 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2305 self._match(TokenType.EQ) 2306 self._match(TokenType.ALIAS) 2307 2308 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2309 2310 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2311 properties = [] 2312 while True: 2313 if before: 2314 prop = self._parse_property_before() 2315 else: 2316 prop = self._parse_property() 2317 if not prop: 2318 break 2319 for p in ensure_list(prop): 2320 properties.append(p) 2321 2322 if properties: 2323 return self.expression(exp.Properties, expressions=properties) 2324 2325 return None 2326 2327 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2328 return self.expression( 2329 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2330 ) 2331 2332 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2333 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2334 security_specifier = self._prev.text.upper() 2335 return self.expression(exp.SecurityProperty, this=security_specifier) 2336 return None 2337 2338 def _parse_settings_property(self) -> exp.SettingsProperty: 2339 return self.expression( 2340 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2341 ) 2342 2343 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2344 if self._index >= 2: 2345 pre_volatile_token = self._tokens[self._index - 2] 2346 else: 2347 pre_volatile_token = None 2348 2349 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2350 return exp.VolatileProperty() 2351 2352 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2353 2354 def _parse_retention_period(self) -> exp.Var: 2355 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2356 number = self._parse_number() 2357 number_str = f"{number} " if number else "" 2358 unit = self._parse_var(any_token=True) 2359 return exp.var(f"{number_str}{unit}") 2360 2361 def _parse_system_versioning_property( 2362 self, with_: bool = False 2363 ) -> exp.WithSystemVersioningProperty: 2364 self._match(TokenType.EQ) 2365 prop = self.expression( 2366 exp.WithSystemVersioningProperty, 2367 **{ # type: ignore 2368 "on": True, 2369 "with": with_, 2370 }, 2371 ) 2372 2373 if self._match_text_seq("OFF"): 2374 prop.set("on", False) 2375 return prop 2376 2377 self._match(TokenType.ON) 2378 if self._match(TokenType.L_PAREN): 2379 while self._curr and not self._match(TokenType.R_PAREN): 2380 if self._match_text_seq("HISTORY_TABLE", "="): 2381 prop.set("this", self._parse_table_parts()) 2382 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2383 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2384 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2385 prop.set("retention_period", self._parse_retention_period()) 2386 2387 self._match(TokenType.COMMA) 2388 2389 return prop 2390 2391 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2392 self._match(TokenType.EQ) 2393 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2394 prop = self.expression(exp.DataDeletionProperty, on=on) 2395 2396 if self._match(TokenType.L_PAREN): 2397 while self._curr and not self._match(TokenType.R_PAREN): 2398 if self._match_text_seq("FILTER_COLUMN", "="): 2399 prop.set("filter_column", self._parse_column()) 2400 elif self._match_text_seq("RETENTION_PERIOD", "="): 2401 prop.set("retention_period", self._parse_retention_period()) 2402 2403 self._match(TokenType.COMMA) 2404 2405 return prop 2406 2407 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2408 kind = "HASH" 2409 expressions: t.Optional[t.List[exp.Expression]] = None 2410 if self._match_text_seq("BY", "HASH"): 2411 expressions = self._parse_wrapped_csv(self._parse_id_var) 2412 elif self._match_text_seq("BY", "RANDOM"): 2413 kind = "RANDOM" 2414 2415 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2416 buckets: t.Optional[exp.Expression] = None 2417 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2418 buckets = self._parse_number() 2419 2420 return self.expression( 2421 exp.DistributedByProperty, 2422 expressions=expressions, 2423 kind=kind, 2424 buckets=buckets, 2425 order=self._parse_order(), 2426 ) 2427 2428 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2429 self._match_text_seq("KEY") 2430 expressions = self._parse_wrapped_id_vars() 2431 return self.expression(expr_type, expressions=expressions) 2432 2433 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2434 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2435 prop = self._parse_system_versioning_property(with_=True) 2436 self._match_r_paren() 2437 return prop 2438 2439 if self._match(TokenType.L_PAREN, advance=False): 2440 return self._parse_wrapped_properties() 2441 2442 if self._match_text_seq("JOURNAL"): 2443 return self._parse_withjournaltable() 2444 2445 if self._match_texts(self.VIEW_ATTRIBUTES): 2446 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2447 2448 if self._match_text_seq("DATA"): 2449 return self._parse_withdata(no=False) 2450 elif self._match_text_seq("NO", "DATA"): 2451 return self._parse_withdata(no=True) 2452 2453 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2454 return self._parse_serde_properties(with_=True) 2455 2456 if self._match(TokenType.SCHEMA): 2457 return self.expression( 2458 exp.WithSchemaBindingProperty, 2459 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2460 ) 2461 2462 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2463 return self.expression( 2464 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2465 ) 2466 2467 if not self._next: 2468 return None 2469 2470 return self._parse_withisolatedloading() 2471 2472 def _parse_procedure_option(self) -> exp.Expression | None: 2473 if self._match_text_seq("EXECUTE", "AS"): 2474 return self.expression( 2475 exp.ExecuteAsProperty, 2476 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2477 or self._parse_string(), 2478 ) 2479 2480 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2481 2482 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2483 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2484 self._match(TokenType.EQ) 2485 2486 user = self._parse_id_var() 2487 self._match(TokenType.PARAMETER) 2488 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2489 2490 if not user or not host: 2491 return None 2492 2493 return exp.DefinerProperty(this=f"{user}@{host}") 2494 2495 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2496 self._match(TokenType.TABLE) 2497 self._match(TokenType.EQ) 2498 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2499 2500 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2501 return self.expression(exp.LogProperty, no=no) 2502 2503 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2504 return self.expression(exp.JournalProperty, **kwargs) 2505 2506 def _parse_checksum(self) -> exp.ChecksumProperty: 2507 self._match(TokenType.EQ) 2508 2509 on = None 2510 if self._match(TokenType.ON): 2511 on = True 2512 elif self._match_text_seq("OFF"): 2513 on = False 2514 2515 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2516 2517 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2518 return self.expression( 2519 exp.Cluster, 2520 expressions=( 2521 self._parse_wrapped_csv(self._parse_ordered) 2522 if wrapped 2523 else self._parse_csv(self._parse_ordered) 2524 ), 2525 ) 2526 2527 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2528 self._match_text_seq("BY") 2529 2530 self._match_l_paren() 2531 expressions = self._parse_csv(self._parse_column) 2532 self._match_r_paren() 2533 2534 if self._match_text_seq("SORTED", "BY"): 2535 self._match_l_paren() 2536 sorted_by = self._parse_csv(self._parse_ordered) 2537 self._match_r_paren() 2538 else: 2539 sorted_by = None 2540 2541 self._match(TokenType.INTO) 2542 buckets = self._parse_number() 2543 self._match_text_seq("BUCKETS") 2544 2545 return self.expression( 2546 exp.ClusteredByProperty, 2547 expressions=expressions, 2548 sorted_by=sorted_by, 2549 buckets=buckets, 2550 ) 2551 2552 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2553 if not self._match_text_seq("GRANTS"): 2554 self._retreat(self._index - 1) 2555 return None 2556 2557 return self.expression(exp.CopyGrantsProperty) 2558 2559 def _parse_freespace(self) -> exp.FreespaceProperty: 2560 self._match(TokenType.EQ) 2561 return self.expression( 2562 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2563 ) 2564 2565 def _parse_mergeblockratio( 2566 self, no: bool = False, default: bool = False 2567 ) -> exp.MergeBlockRatioProperty: 2568 if self._match(TokenType.EQ): 2569 return self.expression( 2570 exp.MergeBlockRatioProperty, 2571 this=self._parse_number(), 2572 percent=self._match(TokenType.PERCENT), 2573 ) 2574 2575 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2576 2577 def _parse_datablocksize( 2578 self, 2579 default: t.Optional[bool] = None, 2580 minimum: t.Optional[bool] = None, 2581 maximum: t.Optional[bool] = None, 2582 ) -> exp.DataBlocksizeProperty: 2583 self._match(TokenType.EQ) 2584 size = self._parse_number() 2585 2586 units = None 2587 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2588 units = self._prev.text 2589 2590 return self.expression( 2591 exp.DataBlocksizeProperty, 2592 size=size, 2593 units=units, 2594 default=default, 2595 minimum=minimum, 2596 maximum=maximum, 2597 ) 2598 2599 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2600 self._match(TokenType.EQ) 2601 always = self._match_text_seq("ALWAYS") 2602 manual = self._match_text_seq("MANUAL") 2603 never = self._match_text_seq("NEVER") 2604 default = self._match_text_seq("DEFAULT") 2605 2606 autotemp = None 2607 if self._match_text_seq("AUTOTEMP"): 2608 autotemp = self._parse_schema() 2609 2610 return self.expression( 2611 exp.BlockCompressionProperty, 2612 always=always, 2613 manual=manual, 2614 never=never, 2615 default=default, 2616 autotemp=autotemp, 2617 ) 2618 2619 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2620 index = self._index 2621 no = self._match_text_seq("NO") 2622 concurrent = self._match_text_seq("CONCURRENT") 2623 2624 if not self._match_text_seq("ISOLATED", "LOADING"): 2625 self._retreat(index) 2626 return None 2627 2628 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2629 return self.expression( 2630 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2631 ) 2632 2633 def _parse_locking(self) -> exp.LockingProperty: 2634 if self._match(TokenType.TABLE): 2635 kind = "TABLE" 2636 elif self._match(TokenType.VIEW): 2637 kind = "VIEW" 2638 elif self._match(TokenType.ROW): 2639 kind = "ROW" 2640 elif self._match_text_seq("DATABASE"): 2641 kind = "DATABASE" 2642 else: 2643 kind = None 2644 2645 if kind in ("DATABASE", "TABLE", "VIEW"): 2646 this = self._parse_table_parts() 2647 else: 2648 this = None 2649 2650 if self._match(TokenType.FOR): 2651 for_or_in = "FOR" 2652 elif self._match(TokenType.IN): 2653 for_or_in = "IN" 2654 else: 2655 for_or_in = None 2656 2657 if self._match_text_seq("ACCESS"): 2658 lock_type = "ACCESS" 2659 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2660 lock_type = "EXCLUSIVE" 2661 elif self._match_text_seq("SHARE"): 2662 lock_type = "SHARE" 2663 elif self._match_text_seq("READ"): 2664 lock_type = "READ" 2665 elif self._match_text_seq("WRITE"): 2666 lock_type = "WRITE" 2667 elif self._match_text_seq("CHECKSUM"): 2668 lock_type = "CHECKSUM" 2669 else: 2670 lock_type = None 2671 2672 override = self._match_text_seq("OVERRIDE") 2673 2674 return self.expression( 2675 exp.LockingProperty, 2676 this=this, 2677 kind=kind, 2678 for_or_in=for_or_in, 2679 lock_type=lock_type, 2680 override=override, 2681 ) 2682 2683 def _parse_partition_by(self) -> t.List[exp.Expression]: 2684 if self._match(TokenType.PARTITION_BY): 2685 return self._parse_csv(self._parse_assignment) 2686 return [] 2687 2688 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2689 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2690 if self._match_text_seq("MINVALUE"): 2691 return exp.var("MINVALUE") 2692 if self._match_text_seq("MAXVALUE"): 2693 return exp.var("MAXVALUE") 2694 return self._parse_bitwise() 2695 2696 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2697 expression = None 2698 from_expressions = None 2699 to_expressions = None 2700 2701 if self._match(TokenType.IN): 2702 this = self._parse_wrapped_csv(self._parse_bitwise) 2703 elif self._match(TokenType.FROM): 2704 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2705 self._match_text_seq("TO") 2706 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 elif self._match_text_seq("WITH", "(", "MODULUS"): 2708 this = self._parse_number() 2709 self._match_text_seq(",", "REMAINDER") 2710 expression = self._parse_number() 2711 self._match_r_paren() 2712 else: 2713 self.raise_error("Failed to parse partition bound spec.") 2714 2715 return self.expression( 2716 exp.PartitionBoundSpec, 2717 this=this, 2718 expression=expression, 2719 from_expressions=from_expressions, 2720 to_expressions=to_expressions, 2721 ) 2722 2723 # https://www.postgresql.org/docs/current/sql-createtable.html 2724 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2725 if not self._match_text_seq("OF"): 2726 self._retreat(self._index - 1) 2727 return None 2728 2729 this = self._parse_table(schema=True) 2730 2731 if self._match(TokenType.DEFAULT): 2732 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2733 elif self._match_text_seq("FOR", "VALUES"): 2734 expression = self._parse_partition_bound_spec() 2735 else: 2736 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2737 2738 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2739 2740 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2741 self._match(TokenType.EQ) 2742 return self.expression( 2743 exp.PartitionedByProperty, 2744 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2745 ) 2746 2747 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2748 if self._match_text_seq("AND", "STATISTICS"): 2749 statistics = True 2750 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2751 statistics = False 2752 else: 2753 statistics = None 2754 2755 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2756 2757 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL"): 2759 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2760 return None 2761 2762 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2763 if self._match_text_seq("SQL", "DATA"): 2764 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2765 return None 2766 2767 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2768 if self._match_text_seq("PRIMARY", "INDEX"): 2769 return exp.NoPrimaryIndexProperty() 2770 if self._match_text_seq("SQL"): 2771 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2772 return None 2773 2774 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2775 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2776 return exp.OnCommitProperty() 2777 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2778 return exp.OnCommitProperty(delete=True) 2779 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2780 2781 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2782 if self._match_text_seq("SQL", "DATA"): 2783 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2784 return None 2785 2786 def _parse_distkey(self) -> exp.DistKeyProperty: 2787 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2788 2789 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2790 table = self._parse_table(schema=True) 2791 2792 options = [] 2793 while self._match_texts(("INCLUDING", "EXCLUDING")): 2794 this = self._prev.text.upper() 2795 2796 id_var = self._parse_id_var() 2797 if not id_var: 2798 return None 2799 2800 options.append( 2801 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2802 ) 2803 2804 return self.expression(exp.LikeProperty, this=table, expressions=options) 2805 2806 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2807 return self.expression( 2808 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2809 ) 2810 2811 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2812 self._match(TokenType.EQ) 2813 return self.expression( 2814 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2815 ) 2816 2817 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2818 self._match_text_seq("WITH", "CONNECTION") 2819 return self.expression( 2820 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2821 ) 2822 2823 def _parse_returns(self) -> exp.ReturnsProperty: 2824 value: t.Optional[exp.Expression] 2825 null = None 2826 is_table = self._match(TokenType.TABLE) 2827 2828 if is_table: 2829 if self._match(TokenType.LT): 2830 value = self.expression( 2831 exp.Schema, 2832 this="TABLE", 2833 expressions=self._parse_csv(self._parse_struct_types), 2834 ) 2835 if not self._match(TokenType.GT): 2836 self.raise_error("Expecting >") 2837 else: 2838 value = self._parse_schema(exp.var("TABLE")) 2839 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2840 null = True 2841 value = None 2842 else: 2843 value = self._parse_types() 2844 2845 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2846 2847 def _parse_describe(self) -> exp.Describe: 2848 kind = self._match_set(self.CREATABLES) and self._prev.text 2849 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2850 if self._match(TokenType.DOT): 2851 style = None 2852 self._retreat(self._index - 2) 2853 2854 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2855 2856 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2857 this = self._parse_statement() 2858 else: 2859 this = self._parse_table(schema=True) 2860 2861 properties = self._parse_properties() 2862 expressions = properties.expressions if properties else None 2863 partition = self._parse_partition() 2864 return self.expression( 2865 exp.Describe, 2866 this=this, 2867 style=style, 2868 kind=kind, 2869 expressions=expressions, 2870 partition=partition, 2871 format=format, 2872 ) 2873 2874 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2875 kind = self._prev.text.upper() 2876 expressions = [] 2877 2878 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2879 if self._match(TokenType.WHEN): 2880 expression = self._parse_disjunction() 2881 self._match(TokenType.THEN) 2882 else: 2883 expression = None 2884 2885 else_ = self._match(TokenType.ELSE) 2886 2887 if not self._match(TokenType.INTO): 2888 return None 2889 2890 return self.expression( 2891 exp.ConditionalInsert, 2892 this=self.expression( 2893 exp.Insert, 2894 this=self._parse_table(schema=True), 2895 expression=self._parse_derived_table_values(), 2896 ), 2897 expression=expression, 2898 else_=else_, 2899 ) 2900 2901 expression = parse_conditional_insert() 2902 while expression is not None: 2903 expressions.append(expression) 2904 expression = parse_conditional_insert() 2905 2906 return self.expression( 2907 exp.MultitableInserts, 2908 kind=kind, 2909 comments=comments, 2910 expressions=expressions, 2911 source=self._parse_table(), 2912 ) 2913 2914 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2915 comments = [] 2916 hint = self._parse_hint() 2917 overwrite = self._match(TokenType.OVERWRITE) 2918 ignore = self._match(TokenType.IGNORE) 2919 local = self._match_text_seq("LOCAL") 2920 alternative = None 2921 is_function = None 2922 2923 if self._match_text_seq("DIRECTORY"): 2924 this: t.Optional[exp.Expression] = self.expression( 2925 exp.Directory, 2926 this=self._parse_var_or_string(), 2927 local=local, 2928 row_format=self._parse_row_format(match_row=True), 2929 ) 2930 else: 2931 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2932 comments += ensure_list(self._prev_comments) 2933 return self._parse_multitable_inserts(comments) 2934 2935 if self._match(TokenType.OR): 2936 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2937 2938 self._match(TokenType.INTO) 2939 comments += ensure_list(self._prev_comments) 2940 self._match(TokenType.TABLE) 2941 is_function = self._match(TokenType.FUNCTION) 2942 2943 this = ( 2944 self._parse_table(schema=True, parse_partition=True) 2945 if not is_function 2946 else self._parse_function() 2947 ) 2948 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2949 this.set("alias", self._parse_table_alias()) 2950 2951 returning = self._parse_returning() 2952 2953 return self.expression( 2954 exp.Insert, 2955 comments=comments, 2956 hint=hint, 2957 is_function=is_function, 2958 this=this, 2959 stored=self._match_text_seq("STORED") and self._parse_stored(), 2960 by_name=self._match_text_seq("BY", "NAME"), 2961 exists=self._parse_exists(), 2962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2963 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2964 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2965 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2966 conflict=self._parse_on_conflict(), 2967 returning=returning or self._parse_returning(), 2968 overwrite=overwrite, 2969 alternative=alternative, 2970 ignore=ignore, 2971 source=self._match(TokenType.TABLE) and self._parse_table(), 2972 ) 2973 2974 def _parse_kill(self) -> exp.Kill: 2975 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2976 2977 return self.expression( 2978 exp.Kill, 2979 this=self._parse_primary(), 2980 kind=kind, 2981 ) 2982 2983 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2984 conflict = self._match_text_seq("ON", "CONFLICT") 2985 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2986 2987 if not conflict and not duplicate: 2988 return None 2989 2990 conflict_keys = None 2991 constraint = None 2992 2993 if conflict: 2994 if self._match_text_seq("ON", "CONSTRAINT"): 2995 constraint = self._parse_id_var() 2996 elif self._match(TokenType.L_PAREN): 2997 conflict_keys = self._parse_csv(self._parse_id_var) 2998 self._match_r_paren() 2999 3000 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3001 if self._prev.token_type == TokenType.UPDATE: 3002 self._match(TokenType.SET) 3003 expressions = self._parse_csv(self._parse_equality) 3004 else: 3005 expressions = None 3006 3007 return self.expression( 3008 exp.OnConflict, 3009 duplicate=duplicate, 3010 expressions=expressions, 3011 action=action, 3012 conflict_keys=conflict_keys, 3013 constraint=constraint, 3014 where=self._parse_where(), 3015 ) 3016 3017 def _parse_returning(self) -> t.Optional[exp.Returning]: 3018 if not self._match(TokenType.RETURNING): 3019 return None 3020 return self.expression( 3021 exp.Returning, 3022 expressions=self._parse_csv(self._parse_expression), 3023 into=self._match(TokenType.INTO) and self._parse_table_part(), 3024 ) 3025 3026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3027 if not self._match(TokenType.FORMAT): 3028 return None 3029 return self._parse_row_format() 3030 3031 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3032 index = self._index 3033 with_ = with_ or self._match_text_seq("WITH") 3034 3035 if not self._match(TokenType.SERDE_PROPERTIES): 3036 self._retreat(index) 3037 return None 3038 return self.expression( 3039 exp.SerdeProperties, 3040 **{ # type: ignore 3041 "expressions": self._parse_wrapped_properties(), 3042 "with": with_, 3043 }, 3044 ) 3045 3046 def _parse_row_format( 3047 self, match_row: bool = False 3048 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3049 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3050 return None 3051 3052 if self._match_text_seq("SERDE"): 3053 this = self._parse_string() 3054 3055 serde_properties = self._parse_serde_properties() 3056 3057 return self.expression( 3058 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3059 ) 3060 3061 self._match_text_seq("DELIMITED") 3062 3063 kwargs = {} 3064 3065 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3066 kwargs["fields"] = self._parse_string() 3067 if self._match_text_seq("ESCAPED", "BY"): 3068 kwargs["escaped"] = self._parse_string() 3069 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3070 kwargs["collection_items"] = self._parse_string() 3071 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3072 kwargs["map_keys"] = self._parse_string() 3073 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3074 kwargs["lines"] = self._parse_string() 3075 if self._match_text_seq("NULL", "DEFINED", "AS"): 3076 kwargs["null"] = self._parse_string() 3077 3078 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3079 3080 def _parse_load(self) -> exp.LoadData | exp.Command: 3081 if self._match_text_seq("DATA"): 3082 local = self._match_text_seq("LOCAL") 3083 self._match_text_seq("INPATH") 3084 inpath = self._parse_string() 3085 overwrite = self._match(TokenType.OVERWRITE) 3086 self._match_pair(TokenType.INTO, TokenType.TABLE) 3087 3088 return self.expression( 3089 exp.LoadData, 3090 this=self._parse_table(schema=True), 3091 local=local, 3092 overwrite=overwrite, 3093 inpath=inpath, 3094 partition=self._parse_partition(), 3095 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3096 serde=self._match_text_seq("SERDE") and self._parse_string(), 3097 ) 3098 return self._parse_as_command(self._prev) 3099 3100 def _parse_delete(self) -> exp.Delete: 3101 # This handles MySQL's "Multiple-Table Syntax" 3102 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3103 tables = None 3104 if not self._match(TokenType.FROM, advance=False): 3105 tables = self._parse_csv(self._parse_table) or None 3106 3107 returning = self._parse_returning() 3108 3109 return self.expression( 3110 exp.Delete, 3111 tables=tables, 3112 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3113 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3114 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3115 where=self._parse_where(), 3116 returning=returning or self._parse_returning(), 3117 limit=self._parse_limit(), 3118 ) 3119 3120 def _parse_update(self) -> exp.Update: 3121 kwargs: t.Dict[str, t.Any] = { 3122 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3123 } 3124 while self._curr: 3125 if self._match(TokenType.SET): 3126 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3127 elif self._match(TokenType.RETURNING, advance=False): 3128 kwargs["returning"] = self._parse_returning() 3129 elif self._match(TokenType.FROM, advance=False): 3130 kwargs["from"] = self._parse_from(joins=True) 3131 elif self._match(TokenType.WHERE, advance=False): 3132 kwargs["where"] = self._parse_where() 3133 elif self._match(TokenType.ORDER_BY, advance=False): 3134 kwargs["order"] = self._parse_order() 3135 elif self._match(TokenType.LIMIT, advance=False): 3136 kwargs["limit"] = self._parse_limit() 3137 else: 3138 break 3139 3140 return self.expression(exp.Update, **kwargs) 3141 3142 def _parse_use(self) -> exp.Use: 3143 return self.expression( 3144 exp.Use, 3145 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3146 this=self._parse_table(schema=False), 3147 ) 3148 3149 def _parse_uncache(self) -> exp.Uncache: 3150 if not self._match(TokenType.TABLE): 3151 self.raise_error("Expecting TABLE after UNCACHE") 3152 3153 return self.expression( 3154 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3155 ) 3156 3157 def _parse_cache(self) -> exp.Cache: 3158 lazy = self._match_text_seq("LAZY") 3159 self._match(TokenType.TABLE) 3160 table = self._parse_table(schema=True) 3161 3162 options = [] 3163 if self._match_text_seq("OPTIONS"): 3164 self._match_l_paren() 3165 k = self._parse_string() 3166 self._match(TokenType.EQ) 3167 v = self._parse_string() 3168 options = [k, v] 3169 self._match_r_paren() 3170 3171 self._match(TokenType.ALIAS) 3172 return self.expression( 3173 exp.Cache, 3174 this=table, 3175 lazy=lazy, 3176 options=options, 3177 expression=self._parse_select(nested=True), 3178 ) 3179 3180 def _parse_partition(self) -> t.Optional[exp.Partition]: 3181 if not self._match_texts(self.PARTITION_KEYWORDS): 3182 return None 3183 3184 return self.expression( 3185 exp.Partition, 3186 subpartition=self._prev.text.upper() == "SUBPARTITION", 3187 expressions=self._parse_wrapped_csv(self._parse_assignment), 3188 ) 3189 3190 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3191 def _parse_value_expression() -> t.Optional[exp.Expression]: 3192 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3193 return exp.var(self._prev.text.upper()) 3194 return self._parse_expression() 3195 3196 if self._match(TokenType.L_PAREN): 3197 expressions = self._parse_csv(_parse_value_expression) 3198 self._match_r_paren() 3199 return self.expression(exp.Tuple, expressions=expressions) 3200 3201 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3202 expression = self._parse_expression() 3203 if expression: 3204 return self.expression(exp.Tuple, expressions=[expression]) 3205 return None 3206 3207 def _parse_projections(self) -> t.List[exp.Expression]: 3208 return self._parse_expressions() 3209 3210 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3211 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3212 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3213 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3214 ) 3215 elif self._match(TokenType.FROM): 3216 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3217 # Support parentheses for duckdb FROM-first syntax 3218 select = self._parse_select(from_=from_) 3219 if select: 3220 if not select.args.get("from"): 3221 select.set("from", from_) 3222 this = select 3223 else: 3224 this = exp.select("*").from_(t.cast(exp.From, from_)) 3225 else: 3226 this = ( 3227 self._parse_table(consume_pipe=True) 3228 if table 3229 else self._parse_select(nested=True, parse_set_operation=False) 3230 ) 3231 3232 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3233 # in case a modifier (e.g. join) is following 3234 if table and isinstance(this, exp.Values) and this.alias: 3235 alias = this.args["alias"].pop() 3236 this = exp.Table(this=this, alias=alias) 3237 3238 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3239 3240 return this 3241 3242 def _parse_select( 3243 self, 3244 nested: bool = False, 3245 table: bool = False, 3246 parse_subquery_alias: bool = True, 3247 parse_set_operation: bool = True, 3248 consume_pipe: bool = True, 3249 from_: t.Optional[exp.From] = None, 3250 ) -> t.Optional[exp.Expression]: 3251 query = self._parse_select_query( 3252 nested=nested, 3253 table=table, 3254 parse_subquery_alias=parse_subquery_alias, 3255 parse_set_operation=parse_set_operation, 3256 ) 3257 3258 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3259 if not query and from_: 3260 query = exp.select("*").from_(from_) 3261 if isinstance(query, exp.Query): 3262 query = self._parse_pipe_syntax_query(query) 3263 query = query.subquery(copy=False) if query and table else query 3264 3265 return query 3266 3267 def _parse_select_query( 3268 self, 3269 nested: bool = False, 3270 table: bool = False, 3271 parse_subquery_alias: bool = True, 3272 parse_set_operation: bool = True, 3273 ) -> t.Optional[exp.Expression]: 3274 cte = self._parse_with() 3275 3276 if cte: 3277 this = self._parse_statement() 3278 3279 if not this: 3280 self.raise_error("Failed to parse any statement following CTE") 3281 return cte 3282 3283 if "with" in this.arg_types: 3284 this.set("with", cte) 3285 else: 3286 self.raise_error(f"{this.key} does not support CTE") 3287 this = cte 3288 3289 return this 3290 3291 # duckdb supports leading with FROM x 3292 from_ = ( 3293 self._parse_from(consume_pipe=True) 3294 if self._match(TokenType.FROM, advance=False) 3295 else None 3296 ) 3297 3298 if self._match(TokenType.SELECT): 3299 comments = self._prev_comments 3300 3301 hint = self._parse_hint() 3302 3303 if self._next and not self._next.token_type == TokenType.DOT: 3304 all_ = self._match(TokenType.ALL) 3305 distinct = self._match_set(self.DISTINCT_TOKENS) 3306 else: 3307 all_, distinct = None, None 3308 3309 kind = ( 3310 self._match(TokenType.ALIAS) 3311 and self._match_texts(("STRUCT", "VALUE")) 3312 and self._prev.text.upper() 3313 ) 3314 3315 if distinct: 3316 distinct = self.expression( 3317 exp.Distinct, 3318 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3319 ) 3320 3321 if all_ and distinct: 3322 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3323 3324 operation_modifiers = [] 3325 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3326 operation_modifiers.append(exp.var(self._prev.text.upper())) 3327 3328 limit = self._parse_limit(top=True) 3329 projections = self._parse_projections() 3330 3331 this = self.expression( 3332 exp.Select, 3333 kind=kind, 3334 hint=hint, 3335 distinct=distinct, 3336 expressions=projections, 3337 limit=limit, 3338 operation_modifiers=operation_modifiers or None, 3339 ) 3340 this.comments = comments 3341 3342 into = self._parse_into() 3343 if into: 3344 this.set("into", into) 3345 3346 if not from_: 3347 from_ = self._parse_from() 3348 3349 if from_: 3350 this.set("from", from_) 3351 3352 this = self._parse_query_modifiers(this) 3353 elif (table or nested) and self._match(TokenType.L_PAREN): 3354 this = self._parse_wrapped_select(table=table) 3355 3356 # We return early here so that the UNION isn't attached to the subquery by the 3357 # following call to _parse_set_operations, but instead becomes the parent node 3358 self._match_r_paren() 3359 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3360 elif self._match(TokenType.VALUES, advance=False): 3361 this = self._parse_derived_table_values() 3362 elif from_: 3363 this = exp.select("*").from_(from_.this, copy=False) 3364 elif self._match(TokenType.SUMMARIZE): 3365 table = self._match(TokenType.TABLE) 3366 this = self._parse_select() or self._parse_string() or self._parse_table() 3367 return self.expression(exp.Summarize, this=this, table=table) 3368 elif self._match(TokenType.DESCRIBE): 3369 this = self._parse_describe() 3370 elif self._match_text_seq("STREAM"): 3371 this = self._parse_function() 3372 if this: 3373 this = self.expression(exp.Stream, this=this) 3374 else: 3375 self._retreat(self._index - 1) 3376 else: 3377 this = None 3378 3379 return self._parse_set_operations(this) if parse_set_operation else this 3380 3381 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3382 self._match_text_seq("SEARCH") 3383 3384 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3385 3386 if not kind: 3387 return None 3388 3389 self._match_text_seq("FIRST", "BY") 3390 3391 return self.expression( 3392 exp.RecursiveWithSearch, 3393 kind=kind, 3394 this=self._parse_id_var(), 3395 expression=self._match_text_seq("SET") and self._parse_id_var(), 3396 using=self._match_text_seq("USING") and self._parse_id_var(), 3397 ) 3398 3399 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3400 if not skip_with_token and not self._match(TokenType.WITH): 3401 return None 3402 3403 comments = self._prev_comments 3404 recursive = self._match(TokenType.RECURSIVE) 3405 3406 last_comments = None 3407 expressions = [] 3408 while True: 3409 cte = self._parse_cte() 3410 if isinstance(cte, exp.CTE): 3411 expressions.append(cte) 3412 if last_comments: 3413 cte.add_comments(last_comments) 3414 3415 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3416 break 3417 else: 3418 self._match(TokenType.WITH) 3419 3420 last_comments = self._prev_comments 3421 3422 return self.expression( 3423 exp.With, 3424 comments=comments, 3425 expressions=expressions, 3426 recursive=recursive, 3427 search=self._parse_recursive_with_search(), 3428 ) 3429 3430 def _parse_cte(self) -> t.Optional[exp.CTE]: 3431 index = self._index 3432 3433 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3434 if not alias or not alias.this: 3435 self.raise_error("Expected CTE to have alias") 3436 3437 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3438 self._retreat(index) 3439 return None 3440 3441 comments = self._prev_comments 3442 3443 if self._match_text_seq("NOT", "MATERIALIZED"): 3444 materialized = False 3445 elif self._match_text_seq("MATERIALIZED"): 3446 materialized = True 3447 else: 3448 materialized = None 3449 3450 cte = self.expression( 3451 exp.CTE, 3452 this=self._parse_wrapped(self._parse_statement), 3453 alias=alias, 3454 materialized=materialized, 3455 comments=comments, 3456 ) 3457 3458 values = cte.this 3459 if isinstance(values, exp.Values): 3460 if values.alias: 3461 cte.set("this", exp.select("*").from_(values)) 3462 else: 3463 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3464 3465 return cte 3466 3467 def _parse_table_alias( 3468 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3469 ) -> t.Optional[exp.TableAlias]: 3470 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3471 # so this section tries to parse the clause version and if it fails, it treats the token 3472 # as an identifier (alias) 3473 if self._can_parse_limit_or_offset(): 3474 return None 3475 3476 any_token = self._match(TokenType.ALIAS) 3477 alias = ( 3478 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3479 or self._parse_string_as_identifier() 3480 ) 3481 3482 index = self._index 3483 if self._match(TokenType.L_PAREN): 3484 columns = self._parse_csv(self._parse_function_parameter) 3485 self._match_r_paren() if columns else self._retreat(index) 3486 else: 3487 columns = None 3488 3489 if not alias and not columns: 3490 return None 3491 3492 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3493 3494 # We bubble up comments from the Identifier to the TableAlias 3495 if isinstance(alias, exp.Identifier): 3496 table_alias.add_comments(alias.pop_comments()) 3497 3498 return table_alias 3499 3500 def _parse_subquery( 3501 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3502 ) -> t.Optional[exp.Subquery]: 3503 if not this: 3504 return None 3505 3506 return self.expression( 3507 exp.Subquery, 3508 this=this, 3509 pivots=self._parse_pivots(), 3510 alias=self._parse_table_alias() if parse_alias else None, 3511 sample=self._parse_table_sample(), 3512 ) 3513 3514 def _implicit_unnests_to_explicit(self, this: E) -> E: 3515 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3516 3517 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3518 for i, join in enumerate(this.args.get("joins") or []): 3519 table = join.this 3520 normalized_table = table.copy() 3521 normalized_table.meta["maybe_column"] = True 3522 normalized_table = _norm(normalized_table, dialect=self.dialect) 3523 3524 if isinstance(table, exp.Table) and not join.args.get("on"): 3525 if normalized_table.parts[0].name in refs: 3526 table_as_column = table.to_column() 3527 unnest = exp.Unnest(expressions=[table_as_column]) 3528 3529 # Table.to_column creates a parent Alias node that we want to convert to 3530 # a TableAlias and attach to the Unnest, so it matches the parser's output 3531 if isinstance(table.args.get("alias"), exp.TableAlias): 3532 table_as_column.replace(table_as_column.this) 3533 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3534 3535 table.replace(unnest) 3536 3537 refs.add(normalized_table.alias_or_name) 3538 3539 return this 3540 3541 @t.overload 3542 def _parse_query_modifiers(self, this: E) -> E: ... 3543 3544 @t.overload 3545 def _parse_query_modifiers(self, this: None) -> None: ... 3546 3547 def _parse_query_modifiers(self, this): 3548 if isinstance(this, self.MODIFIABLES): 3549 for join in self._parse_joins(): 3550 this.append("joins", join) 3551 for lateral in iter(self._parse_lateral, None): 3552 this.append("laterals", lateral) 3553 3554 while True: 3555 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3556 modifier_token = self._curr 3557 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3558 key, expression = parser(self) 3559 3560 if expression: 3561 if this.args.get(key): 3562 self.raise_error( 3563 f"Found multiple '{modifier_token.text.upper()}' clauses", 3564 token=modifier_token, 3565 ) 3566 3567 this.set(key, expression) 3568 if key == "limit": 3569 offset = expression.args.pop("offset", None) 3570 3571 if offset: 3572 offset = exp.Offset(expression=offset) 3573 this.set("offset", offset) 3574 3575 limit_by_expressions = expression.expressions 3576 expression.set("expressions", None) 3577 offset.set("expressions", limit_by_expressions) 3578 continue 3579 break 3580 3581 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3582 this = self._implicit_unnests_to_explicit(this) 3583 3584 return this 3585 3586 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3587 start = self._curr 3588 while self._curr: 3589 self._advance() 3590 3591 end = self._tokens[self._index - 1] 3592 return exp.Hint(expressions=[self._find_sql(start, end)]) 3593 3594 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3595 return self._parse_function_call() 3596 3597 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3598 start_index = self._index 3599 should_fallback_to_string = False 3600 3601 hints = [] 3602 try: 3603 for hint in iter( 3604 lambda: self._parse_csv( 3605 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3606 ), 3607 [], 3608 ): 3609 hints.extend(hint) 3610 except ParseError: 3611 should_fallback_to_string = True 3612 3613 if should_fallback_to_string or self._curr: 3614 self._retreat(start_index) 3615 return self._parse_hint_fallback_to_string() 3616 3617 return self.expression(exp.Hint, expressions=hints) 3618 3619 def _parse_hint(self) -> t.Optional[exp.Hint]: 3620 if self._match(TokenType.HINT) and self._prev_comments: 3621 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3622 3623 return None 3624 3625 def _parse_into(self) -> t.Optional[exp.Into]: 3626 if not self._match(TokenType.INTO): 3627 return None 3628 3629 temp = self._match(TokenType.TEMPORARY) 3630 unlogged = self._match_text_seq("UNLOGGED") 3631 self._match(TokenType.TABLE) 3632 3633 return self.expression( 3634 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3635 ) 3636 3637 def _parse_from( 3638 self, 3639 joins: bool = False, 3640 skip_from_token: bool = False, 3641 consume_pipe: bool = False, 3642 ) -> t.Optional[exp.From]: 3643 if not skip_from_token and not self._match(TokenType.FROM): 3644 return None 3645 3646 return self.expression( 3647 exp.From, 3648 comments=self._prev_comments, 3649 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3650 ) 3651 3652 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3653 return self.expression( 3654 exp.MatchRecognizeMeasure, 3655 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3656 this=self._parse_expression(), 3657 ) 3658 3659 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3660 if not self._match(TokenType.MATCH_RECOGNIZE): 3661 return None 3662 3663 self._match_l_paren() 3664 3665 partition = self._parse_partition_by() 3666 order = self._parse_order() 3667 3668 measures = ( 3669 self._parse_csv(self._parse_match_recognize_measure) 3670 if self._match_text_seq("MEASURES") 3671 else None 3672 ) 3673 3674 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3675 rows = exp.var("ONE ROW PER MATCH") 3676 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3677 text = "ALL ROWS PER MATCH" 3678 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3679 text += " SHOW EMPTY MATCHES" 3680 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3681 text += " OMIT EMPTY MATCHES" 3682 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3683 text += " WITH UNMATCHED ROWS" 3684 rows = exp.var(text) 3685 else: 3686 rows = None 3687 3688 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3689 text = "AFTER MATCH SKIP" 3690 if self._match_text_seq("PAST", "LAST", "ROW"): 3691 text += " PAST LAST ROW" 3692 elif self._match_text_seq("TO", "NEXT", "ROW"): 3693 text += " TO NEXT ROW" 3694 elif self._match_text_seq("TO", "FIRST"): 3695 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3696 elif self._match_text_seq("TO", "LAST"): 3697 text += f" TO LAST {self._advance_any().text}" # type: ignore 3698 after = exp.var(text) 3699 else: 3700 after = None 3701 3702 if self._match_text_seq("PATTERN"): 3703 self._match_l_paren() 3704 3705 if not self._curr: 3706 self.raise_error("Expecting )", self._curr) 3707 3708 paren = 1 3709 start = self._curr 3710 3711 while self._curr and paren > 0: 3712 if self._curr.token_type == TokenType.L_PAREN: 3713 paren += 1 3714 if self._curr.token_type == TokenType.R_PAREN: 3715 paren -= 1 3716 3717 end = self._prev 3718 self._advance() 3719 3720 if paren > 0: 3721 self.raise_error("Expecting )", self._curr) 3722 3723 pattern = exp.var(self._find_sql(start, end)) 3724 else: 3725 pattern = None 3726 3727 define = ( 3728 self._parse_csv(self._parse_name_as_expression) 3729 if self._match_text_seq("DEFINE") 3730 else None 3731 ) 3732 3733 self._match_r_paren() 3734 3735 return self.expression( 3736 exp.MatchRecognize, 3737 partition_by=partition, 3738 order=order, 3739 measures=measures, 3740 rows=rows, 3741 after=after, 3742 pattern=pattern, 3743 define=define, 3744 alias=self._parse_table_alias(), 3745 ) 3746 3747 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3748 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3749 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3750 cross_apply = False 3751 3752 if cross_apply is not None: 3753 this = self._parse_select(table=True) 3754 view = None 3755 outer = None 3756 elif self._match(TokenType.LATERAL): 3757 this = self._parse_select(table=True) 3758 view = self._match(TokenType.VIEW) 3759 outer = self._match(TokenType.OUTER) 3760 else: 3761 return None 3762 3763 if not this: 3764 this = ( 3765 self._parse_unnest() 3766 or self._parse_function() 3767 or self._parse_id_var(any_token=False) 3768 ) 3769 3770 while self._match(TokenType.DOT): 3771 this = exp.Dot( 3772 this=this, 3773 expression=self._parse_function() or self._parse_id_var(any_token=False), 3774 ) 3775 3776 ordinality: t.Optional[bool] = None 3777 3778 if view: 3779 table = self._parse_id_var(any_token=False) 3780 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3781 table_alias: t.Optional[exp.TableAlias] = self.expression( 3782 exp.TableAlias, this=table, columns=columns 3783 ) 3784 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3785 # We move the alias from the lateral's child node to the lateral itself 3786 table_alias = this.args["alias"].pop() 3787 else: 3788 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3789 table_alias = self._parse_table_alias() 3790 3791 return self.expression( 3792 exp.Lateral, 3793 this=this, 3794 view=view, 3795 outer=outer, 3796 alias=table_alias, 3797 cross_apply=cross_apply, 3798 ordinality=ordinality, 3799 ) 3800 3801 def _parse_join_parts( 3802 self, 3803 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3804 return ( 3805 self._match_set(self.JOIN_METHODS) and self._prev, 3806 self._match_set(self.JOIN_SIDES) and self._prev, 3807 self._match_set(self.JOIN_KINDS) and self._prev, 3808 ) 3809 3810 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3811 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3812 this = self._parse_column() 3813 if isinstance(this, exp.Column): 3814 return this.this 3815 return this 3816 3817 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3818 3819 def _parse_join( 3820 self, skip_join_token: bool = False, parse_bracket: bool = False 3821 ) -> t.Optional[exp.Join]: 3822 if self._match(TokenType.COMMA): 3823 table = self._try_parse(self._parse_table) 3824 cross_join = self.expression(exp.Join, this=table) if table else None 3825 3826 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3827 cross_join.set("kind", "CROSS") 3828 3829 return cross_join 3830 3831 index = self._index 3832 method, side, kind = self._parse_join_parts() 3833 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3834 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3835 join_comments = self._prev_comments 3836 3837 if not skip_join_token and not join: 3838 self._retreat(index) 3839 kind = None 3840 method = None 3841 side = None 3842 3843 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3844 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3845 3846 if not skip_join_token and not join and not outer_apply and not cross_apply: 3847 return None 3848 3849 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3850 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3851 kwargs["expressions"] = self._parse_csv( 3852 lambda: self._parse_table(parse_bracket=parse_bracket) 3853 ) 3854 3855 if method: 3856 kwargs["method"] = method.text 3857 if side: 3858 kwargs["side"] = side.text 3859 if kind: 3860 kwargs["kind"] = kind.text 3861 if hint: 3862 kwargs["hint"] = hint 3863 3864 if self._match(TokenType.MATCH_CONDITION): 3865 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3866 3867 if self._match(TokenType.ON): 3868 kwargs["on"] = self._parse_assignment() 3869 elif self._match(TokenType.USING): 3870 kwargs["using"] = self._parse_using_identifiers() 3871 elif ( 3872 not method 3873 and not (outer_apply or cross_apply) 3874 and not isinstance(kwargs["this"], exp.Unnest) 3875 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3876 ): 3877 index = self._index 3878 joins: t.Optional[list] = list(self._parse_joins()) 3879 3880 if joins and self._match(TokenType.ON): 3881 kwargs["on"] = self._parse_assignment() 3882 elif joins and self._match(TokenType.USING): 3883 kwargs["using"] = self._parse_using_identifiers() 3884 else: 3885 joins = None 3886 self._retreat(index) 3887 3888 kwargs["this"].set("joins", joins if joins else None) 3889 3890 kwargs["pivots"] = self._parse_pivots() 3891 3892 comments = [c for token in (method, side, kind) if token for c in token.comments] 3893 comments = (join_comments or []) + comments 3894 3895 if ( 3896 self.ADD_JOIN_ON_TRUE 3897 and not kwargs.get("on") 3898 and not kwargs.get("using") 3899 and not kwargs.get("method") 3900 and kwargs.get("kind") in (None, "INNER", "OUTER") 3901 ): 3902 kwargs["on"] = exp.true() 3903 3904 return self.expression(exp.Join, comments=comments, **kwargs) 3905 3906 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3907 this = self._parse_assignment() 3908 3909 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3910 return this 3911 3912 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3913 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3914 3915 return this 3916 3917 def _parse_index_params(self) -> exp.IndexParameters: 3918 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3919 3920 if self._match(TokenType.L_PAREN, advance=False): 3921 columns = self._parse_wrapped_csv(self._parse_with_operator) 3922 else: 3923 columns = None 3924 3925 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3926 partition_by = self._parse_partition_by() 3927 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3928 tablespace = ( 3929 self._parse_var(any_token=True) 3930 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3931 else None 3932 ) 3933 where = self._parse_where() 3934 3935 on = self._parse_field() if self._match(TokenType.ON) else None 3936 3937 return self.expression( 3938 exp.IndexParameters, 3939 using=using, 3940 columns=columns, 3941 include=include, 3942 partition_by=partition_by, 3943 where=where, 3944 with_storage=with_storage, 3945 tablespace=tablespace, 3946 on=on, 3947 ) 3948 3949 def _parse_index( 3950 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3951 ) -> t.Optional[exp.Index]: 3952 if index or anonymous: 3953 unique = None 3954 primary = None 3955 amp = None 3956 3957 self._match(TokenType.ON) 3958 self._match(TokenType.TABLE) # hive 3959 table = self._parse_table_parts(schema=True) 3960 else: 3961 unique = self._match(TokenType.UNIQUE) 3962 primary = self._match_text_seq("PRIMARY") 3963 amp = self._match_text_seq("AMP") 3964 3965 if not self._match(TokenType.INDEX): 3966 return None 3967 3968 index = self._parse_id_var() 3969 table = None 3970 3971 params = self._parse_index_params() 3972 3973 return self.expression( 3974 exp.Index, 3975 this=index, 3976 table=table, 3977 unique=unique, 3978 primary=primary, 3979 amp=amp, 3980 params=params, 3981 ) 3982 3983 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3984 hints: t.List[exp.Expression] = [] 3985 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3986 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3987 hints.append( 3988 self.expression( 3989 exp.WithTableHint, 3990 expressions=self._parse_csv( 3991 lambda: self._parse_function() or self._parse_var(any_token=True) 3992 ), 3993 ) 3994 ) 3995 self._match_r_paren() 3996 else: 3997 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3998 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3999 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4000 4001 self._match_set((TokenType.INDEX, TokenType.KEY)) 4002 if self._match(TokenType.FOR): 4003 hint.set("target", self._advance_any() and self._prev.text.upper()) 4004 4005 hint.set("expressions", self._parse_wrapped_id_vars()) 4006 hints.append(hint) 4007 4008 return hints or None 4009 4010 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4011 return ( 4012 (not schema and self._parse_function(optional_parens=False)) 4013 or self._parse_id_var(any_token=False) 4014 or self._parse_string_as_identifier() 4015 or self._parse_placeholder() 4016 ) 4017 4018 def _parse_table_parts( 4019 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4020 ) -> exp.Table: 4021 catalog = None 4022 db = None 4023 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4024 4025 while self._match(TokenType.DOT): 4026 if catalog: 4027 # This allows nesting the table in arbitrarily many dot expressions if needed 4028 table = self.expression( 4029 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4030 ) 4031 else: 4032 catalog = db 4033 db = table 4034 # "" used for tsql FROM a..b case 4035 table = self._parse_table_part(schema=schema) or "" 4036 4037 if ( 4038 wildcard 4039 and self._is_connected() 4040 and (isinstance(table, exp.Identifier) or not table) 4041 and self._match(TokenType.STAR) 4042 ): 4043 if isinstance(table, exp.Identifier): 4044 table.args["this"] += "*" 4045 else: 4046 table = exp.Identifier(this="*") 4047 4048 # We bubble up comments from the Identifier to the Table 4049 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4050 4051 if is_db_reference: 4052 catalog = db 4053 db = table 4054 table = None 4055 4056 if not table and not is_db_reference: 4057 self.raise_error(f"Expected table name but got {self._curr}") 4058 if not db and is_db_reference: 4059 self.raise_error(f"Expected database name but got {self._curr}") 4060 4061 table = self.expression( 4062 exp.Table, 4063 comments=comments, 4064 this=table, 4065 db=db, 4066 catalog=catalog, 4067 ) 4068 4069 changes = self._parse_changes() 4070 if changes: 4071 table.set("changes", changes) 4072 4073 at_before = self._parse_historical_data() 4074 if at_before: 4075 table.set("when", at_before) 4076 4077 pivots = self._parse_pivots() 4078 if pivots: 4079 table.set("pivots", pivots) 4080 4081 return table 4082 4083 def _parse_table( 4084 self, 4085 schema: bool = False, 4086 joins: bool = False, 4087 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4088 parse_bracket: bool = False, 4089 is_db_reference: bool = False, 4090 parse_partition: bool = False, 4091 consume_pipe: bool = False, 4092 ) -> t.Optional[exp.Expression]: 4093 lateral = self._parse_lateral() 4094 if lateral: 4095 return lateral 4096 4097 unnest = self._parse_unnest() 4098 if unnest: 4099 return unnest 4100 4101 values = self._parse_derived_table_values() 4102 if values: 4103 return values 4104 4105 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4106 if subquery: 4107 if not subquery.args.get("pivots"): 4108 subquery.set("pivots", self._parse_pivots()) 4109 return subquery 4110 4111 bracket = parse_bracket and self._parse_bracket(None) 4112 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4113 4114 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4115 self._parse_table 4116 ) 4117 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4118 4119 only = self._match(TokenType.ONLY) 4120 4121 this = t.cast( 4122 exp.Expression, 4123 bracket 4124 or rows_from 4125 or self._parse_bracket( 4126 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4127 ), 4128 ) 4129 4130 if only: 4131 this.set("only", only) 4132 4133 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4134 self._match_text_seq("*") 4135 4136 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4137 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4138 this.set("partition", self._parse_partition()) 4139 4140 if schema: 4141 return self._parse_schema(this=this) 4142 4143 version = self._parse_version() 4144 4145 if version: 4146 this.set("version", version) 4147 4148 if self.dialect.ALIAS_POST_TABLESAMPLE: 4149 this.set("sample", self._parse_table_sample()) 4150 4151 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4152 if alias: 4153 this.set("alias", alias) 4154 4155 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4156 return self.expression( 4157 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4158 ) 4159 4160 this.set("hints", self._parse_table_hints()) 4161 4162 if not this.args.get("pivots"): 4163 this.set("pivots", self._parse_pivots()) 4164 4165 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4166 this.set("sample", self._parse_table_sample()) 4167 4168 if joins: 4169 for join in self._parse_joins(): 4170 this.append("joins", join) 4171 4172 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4173 this.set("ordinality", True) 4174 this.set("alias", self._parse_table_alias()) 4175 4176 return this 4177 4178 def _parse_version(self) -> t.Optional[exp.Version]: 4179 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4180 this = "TIMESTAMP" 4181 elif self._match(TokenType.VERSION_SNAPSHOT): 4182 this = "VERSION" 4183 else: 4184 return None 4185 4186 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4187 kind = self._prev.text.upper() 4188 start = self._parse_bitwise() 4189 self._match_texts(("TO", "AND")) 4190 end = self._parse_bitwise() 4191 expression: t.Optional[exp.Expression] = self.expression( 4192 exp.Tuple, expressions=[start, end] 4193 ) 4194 elif self._match_text_seq("CONTAINED", "IN"): 4195 kind = "CONTAINED IN" 4196 expression = self.expression( 4197 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4198 ) 4199 elif self._match(TokenType.ALL): 4200 kind = "ALL" 4201 expression = None 4202 else: 4203 self._match_text_seq("AS", "OF") 4204 kind = "AS OF" 4205 expression = self._parse_type() 4206 4207 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4208 4209 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4210 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4211 index = self._index 4212 historical_data = None 4213 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4214 this = self._prev.text.upper() 4215 kind = ( 4216 self._match(TokenType.L_PAREN) 4217 and self._match_texts(self.HISTORICAL_DATA_KIND) 4218 and self._prev.text.upper() 4219 ) 4220 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4221 4222 if expression: 4223 self._match_r_paren() 4224 historical_data = self.expression( 4225 exp.HistoricalData, this=this, kind=kind, expression=expression 4226 ) 4227 else: 4228 self._retreat(index) 4229 4230 return historical_data 4231 4232 def _parse_changes(self) -> t.Optional[exp.Changes]: 4233 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4234 return None 4235 4236 information = self._parse_var(any_token=True) 4237 self._match_r_paren() 4238 4239 return self.expression( 4240 exp.Changes, 4241 information=information, 4242 at_before=self._parse_historical_data(), 4243 end=self._parse_historical_data(), 4244 ) 4245 4246 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4247 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4248 return None 4249 4250 self._advance() 4251 4252 expressions = self._parse_wrapped_csv(self._parse_equality) 4253 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4254 4255 alias = self._parse_table_alias() if with_alias else None 4256 4257 if alias: 4258 if self.dialect.UNNEST_COLUMN_ONLY: 4259 if alias.args.get("columns"): 4260 self.raise_error("Unexpected extra column alias in unnest.") 4261 4262 alias.set("columns", [alias.this]) 4263 alias.set("this", None) 4264 4265 columns = alias.args.get("columns") or [] 4266 if offset and len(expressions) < len(columns): 4267 offset = columns.pop() 4268 4269 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4270 self._match(TokenType.ALIAS) 4271 offset = self._parse_id_var( 4272 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4273 ) or exp.to_identifier("offset") 4274 4275 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4276 4277 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4278 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4279 if not is_derived and not ( 4280 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4281 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4282 ): 4283 return None 4284 4285 expressions = self._parse_csv(self._parse_value) 4286 alias = self._parse_table_alias() 4287 4288 if is_derived: 4289 self._match_r_paren() 4290 4291 return self.expression( 4292 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4293 ) 4294 4295 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4296 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4297 as_modifier and self._match_text_seq("USING", "SAMPLE") 4298 ): 4299 return None 4300 4301 bucket_numerator = None 4302 bucket_denominator = None 4303 bucket_field = None 4304 percent = None 4305 size = None 4306 seed = None 4307 4308 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4309 matched_l_paren = self._match(TokenType.L_PAREN) 4310 4311 if self.TABLESAMPLE_CSV: 4312 num = None 4313 expressions = self._parse_csv(self._parse_primary) 4314 else: 4315 expressions = None 4316 num = ( 4317 self._parse_factor() 4318 if self._match(TokenType.NUMBER, advance=False) 4319 else self._parse_primary() or self._parse_placeholder() 4320 ) 4321 4322 if self._match_text_seq("BUCKET"): 4323 bucket_numerator = self._parse_number() 4324 self._match_text_seq("OUT", "OF") 4325 bucket_denominator = bucket_denominator = self._parse_number() 4326 self._match(TokenType.ON) 4327 bucket_field = self._parse_field() 4328 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4329 percent = num 4330 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4331 size = num 4332 else: 4333 percent = num 4334 4335 if matched_l_paren: 4336 self._match_r_paren() 4337 4338 if self._match(TokenType.L_PAREN): 4339 method = self._parse_var(upper=True) 4340 seed = self._match(TokenType.COMMA) and self._parse_number() 4341 self._match_r_paren() 4342 elif self._match_texts(("SEED", "REPEATABLE")): 4343 seed = self._parse_wrapped(self._parse_number) 4344 4345 if not method and self.DEFAULT_SAMPLING_METHOD: 4346 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4347 4348 return self.expression( 4349 exp.TableSample, 4350 expressions=expressions, 4351 method=method, 4352 bucket_numerator=bucket_numerator, 4353 bucket_denominator=bucket_denominator, 4354 bucket_field=bucket_field, 4355 percent=percent, 4356 size=size, 4357 seed=seed, 4358 ) 4359 4360 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4361 return list(iter(self._parse_pivot, None)) or None 4362 4363 def _parse_joins(self) -> t.Iterator[exp.Join]: 4364 return iter(self._parse_join, None) 4365 4366 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4367 if not self._match(TokenType.INTO): 4368 return None 4369 4370 return self.expression( 4371 exp.UnpivotColumns, 4372 this=self._match_text_seq("NAME") and self._parse_column(), 4373 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4374 ) 4375 4376 # https://duckdb.org/docs/sql/statements/pivot 4377 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4378 def _parse_on() -> t.Optional[exp.Expression]: 4379 this = self._parse_bitwise() 4380 4381 if self._match(TokenType.IN): 4382 # PIVOT ... ON col IN (row_val1, row_val2) 4383 return self._parse_in(this) 4384 if self._match(TokenType.ALIAS, advance=False): 4385 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4386 return self._parse_alias(this) 4387 4388 return this 4389 4390 this = self._parse_table() 4391 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4392 into = self._parse_unpivot_columns() 4393 using = self._match(TokenType.USING) and self._parse_csv( 4394 lambda: self._parse_alias(self._parse_function()) 4395 ) 4396 group = self._parse_group() 4397 4398 return self.expression( 4399 exp.Pivot, 4400 this=this, 4401 expressions=expressions, 4402 using=using, 4403 group=group, 4404 unpivot=is_unpivot, 4405 into=into, 4406 ) 4407 4408 def _parse_pivot_in(self) -> exp.In: 4409 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4410 this = self._parse_select_or_expression() 4411 4412 self._match(TokenType.ALIAS) 4413 alias = self._parse_bitwise() 4414 if alias: 4415 if isinstance(alias, exp.Column) and not alias.db: 4416 alias = alias.this 4417 return self.expression(exp.PivotAlias, this=this, alias=alias) 4418 4419 return this 4420 4421 value = self._parse_column() 4422 4423 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4424 self.raise_error("Expecting IN (") 4425 4426 if self._match(TokenType.ANY): 4427 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4428 else: 4429 exprs = self._parse_csv(_parse_aliased_expression) 4430 4431 self._match_r_paren() 4432 return self.expression(exp.In, this=value, expressions=exprs) 4433 4434 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4435 func = self._parse_function() 4436 if not func: 4437 if self._prev and self._prev.token_type == TokenType.COMMA: 4438 return None 4439 self.raise_error("Expecting an aggregation function in PIVOT") 4440 4441 return self._parse_alias(func) 4442 4443 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4444 index = self._index 4445 include_nulls = None 4446 4447 if self._match(TokenType.PIVOT): 4448 unpivot = False 4449 elif self._match(TokenType.UNPIVOT): 4450 unpivot = True 4451 4452 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4453 if self._match_text_seq("INCLUDE", "NULLS"): 4454 include_nulls = True 4455 elif self._match_text_seq("EXCLUDE", "NULLS"): 4456 include_nulls = False 4457 else: 4458 return None 4459 4460 expressions = [] 4461 4462 if not self._match(TokenType.L_PAREN): 4463 self._retreat(index) 4464 return None 4465 4466 if unpivot: 4467 expressions = self._parse_csv(self._parse_column) 4468 else: 4469 expressions = self._parse_csv(self._parse_pivot_aggregation) 4470 4471 if not expressions: 4472 self.raise_error("Failed to parse PIVOT's aggregation list") 4473 4474 if not self._match(TokenType.FOR): 4475 self.raise_error("Expecting FOR") 4476 4477 fields = [] 4478 while True: 4479 field = self._try_parse(self._parse_pivot_in) 4480 if not field: 4481 break 4482 fields.append(field) 4483 4484 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4485 self._parse_bitwise 4486 ) 4487 4488 group = self._parse_group() 4489 4490 self._match_r_paren() 4491 4492 pivot = self.expression( 4493 exp.Pivot, 4494 expressions=expressions, 4495 fields=fields, 4496 unpivot=unpivot, 4497 include_nulls=include_nulls, 4498 default_on_null=default_on_null, 4499 group=group, 4500 ) 4501 4502 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4503 pivot.set("alias", self._parse_table_alias()) 4504 4505 if not unpivot: 4506 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4507 4508 columns: t.List[exp.Expression] = [] 4509 all_fields = [] 4510 for pivot_field in pivot.fields: 4511 pivot_field_expressions = pivot_field.expressions 4512 4513 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4514 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4515 continue 4516 4517 all_fields.append( 4518 [ 4519 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4520 for fld in pivot_field_expressions 4521 ] 4522 ) 4523 4524 if all_fields: 4525 if names: 4526 all_fields.append(names) 4527 4528 # Generate all possible combinations of the pivot columns 4529 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4530 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4531 for fld_parts_tuple in itertools.product(*all_fields): 4532 fld_parts = list(fld_parts_tuple) 4533 4534 if names and self.PREFIXED_PIVOT_COLUMNS: 4535 # Move the "name" to the front of the list 4536 fld_parts.insert(0, fld_parts.pop(-1)) 4537 4538 columns.append(exp.to_identifier("_".join(fld_parts))) 4539 4540 pivot.set("columns", columns) 4541 4542 return pivot 4543 4544 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4545 return [agg.alias for agg in aggregations if agg.alias] 4546 4547 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4548 if not skip_where_token and not self._match(TokenType.PREWHERE): 4549 return None 4550 4551 return self.expression( 4552 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4553 ) 4554 4555 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4556 if not skip_where_token and not self._match(TokenType.WHERE): 4557 return None 4558 4559 return self.expression( 4560 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4561 ) 4562 4563 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4564 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4565 return None 4566 comments = self._prev_comments 4567 4568 elements: t.Dict[str, t.Any] = defaultdict(list) 4569 4570 if self._match(TokenType.ALL): 4571 elements["all"] = True 4572 elif self._match(TokenType.DISTINCT): 4573 elements["all"] = False 4574 4575 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4576 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4577 4578 while True: 4579 index = self._index 4580 4581 elements["expressions"].extend( 4582 self._parse_csv( 4583 lambda: None 4584 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4585 else self._parse_assignment() 4586 ) 4587 ) 4588 4589 before_with_index = self._index 4590 with_prefix = self._match(TokenType.WITH) 4591 4592 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4593 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4594 elements[key].append(cube_or_rollup) 4595 elif grouping_sets := self._parse_grouping_sets(): 4596 elements["grouping_sets"].append(grouping_sets) 4597 elif self._match_text_seq("TOTALS"): 4598 elements["totals"] = True # type: ignore 4599 4600 if before_with_index <= self._index <= before_with_index + 1: 4601 self._retreat(before_with_index) 4602 break 4603 4604 if index == self._index: 4605 break 4606 4607 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4608 4609 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4610 if self._match(TokenType.CUBE): 4611 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4612 elif self._match(TokenType.ROLLUP): 4613 kind = exp.Rollup 4614 else: 4615 return None 4616 4617 return self.expression( 4618 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4619 ) 4620 4621 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4622 if self._match(TokenType.GROUPING_SETS): 4623 return self.expression( 4624 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4625 ) 4626 return None 4627 4628 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4629 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4630 4631 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4632 if not skip_having_token and not self._match(TokenType.HAVING): 4633 return None 4634 return self.expression( 4635 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4636 ) 4637 4638 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4639 if not self._match(TokenType.QUALIFY): 4640 return None 4641 return self.expression(exp.Qualify, this=self._parse_assignment()) 4642 4643 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4644 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4645 exp.Prior, this=self._parse_bitwise() 4646 ) 4647 connect = self._parse_assignment() 4648 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4649 return connect 4650 4651 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4652 if skip_start_token: 4653 start = None 4654 elif self._match(TokenType.START_WITH): 4655 start = self._parse_assignment() 4656 else: 4657 return None 4658 4659 self._match(TokenType.CONNECT_BY) 4660 nocycle = self._match_text_seq("NOCYCLE") 4661 connect = self._parse_connect_with_prior() 4662 4663 if not start and self._match(TokenType.START_WITH): 4664 start = self._parse_assignment() 4665 4666 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4667 4668 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4669 this = self._parse_id_var(any_token=True) 4670 if self._match(TokenType.ALIAS): 4671 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4672 return this 4673 4674 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4675 if self._match_text_seq("INTERPOLATE"): 4676 return self._parse_wrapped_csv(self._parse_name_as_expression) 4677 return None 4678 4679 def _parse_order( 4680 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4681 ) -> t.Optional[exp.Expression]: 4682 siblings = None 4683 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4684 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4685 return this 4686 4687 siblings = True 4688 4689 return self.expression( 4690 exp.Order, 4691 comments=self._prev_comments, 4692 this=this, 4693 expressions=self._parse_csv(self._parse_ordered), 4694 siblings=siblings, 4695 ) 4696 4697 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4698 if not self._match(token): 4699 return None 4700 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4701 4702 def _parse_ordered( 4703 self, parse_method: t.Optional[t.Callable] = None 4704 ) -> t.Optional[exp.Ordered]: 4705 this = parse_method() if parse_method else self._parse_assignment() 4706 if not this: 4707 return None 4708 4709 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4710 this = exp.var("ALL") 4711 4712 asc = self._match(TokenType.ASC) 4713 desc = self._match(TokenType.DESC) or (asc and False) 4714 4715 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4716 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4717 4718 nulls_first = is_nulls_first or False 4719 explicitly_null_ordered = is_nulls_first or is_nulls_last 4720 4721 if ( 4722 not explicitly_null_ordered 4723 and ( 4724 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4725 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4726 ) 4727 and self.dialect.NULL_ORDERING != "nulls_are_last" 4728 ): 4729 nulls_first = True 4730 4731 if self._match_text_seq("WITH", "FILL"): 4732 with_fill = self.expression( 4733 exp.WithFill, 4734 **{ # type: ignore 4735 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4736 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4737 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4738 "interpolate": self._parse_interpolate(), 4739 }, 4740 ) 4741 else: 4742 with_fill = None 4743 4744 return self.expression( 4745 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4746 ) 4747 4748 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4749 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4750 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4751 self._match_text_seq("ONLY") 4752 with_ties = self._match_text_seq("WITH", "TIES") 4753 4754 if not (percent or rows or with_ties): 4755 return None 4756 4757 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4758 4759 def _parse_limit( 4760 self, 4761 this: t.Optional[exp.Expression] = None, 4762 top: bool = False, 4763 skip_limit_token: bool = False, 4764 ) -> t.Optional[exp.Expression]: 4765 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4766 comments = self._prev_comments 4767 if top: 4768 limit_paren = self._match(TokenType.L_PAREN) 4769 expression = self._parse_term() if limit_paren else self._parse_number() 4770 4771 if limit_paren: 4772 self._match_r_paren() 4773 4774 else: 4775 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4776 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4777 # consume the factor plus parse the percentage separately 4778 expression = self._try_parse(self._parse_term) or self._parse_factor() 4779 4780 limit_options = self._parse_limit_options() 4781 4782 if self._match(TokenType.COMMA): 4783 offset = expression 4784 expression = self._parse_term() 4785 else: 4786 offset = None 4787 4788 limit_exp = self.expression( 4789 exp.Limit, 4790 this=this, 4791 expression=expression, 4792 offset=offset, 4793 comments=comments, 4794 limit_options=limit_options, 4795 expressions=self._parse_limit_by(), 4796 ) 4797 4798 return limit_exp 4799 4800 if self._match(TokenType.FETCH): 4801 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4802 direction = self._prev.text.upper() if direction else "FIRST" 4803 4804 count = self._parse_field(tokens=self.FETCH_TOKENS) 4805 4806 return self.expression( 4807 exp.Fetch, 4808 direction=direction, 4809 count=count, 4810 limit_options=self._parse_limit_options(), 4811 ) 4812 4813 return this 4814 4815 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4816 if not self._match(TokenType.OFFSET): 4817 return this 4818 4819 count = self._parse_term() 4820 self._match_set((TokenType.ROW, TokenType.ROWS)) 4821 4822 return self.expression( 4823 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4824 ) 4825 4826 def _can_parse_limit_or_offset(self) -> bool: 4827 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4828 return False 4829 4830 index = self._index 4831 result = bool( 4832 self._try_parse(self._parse_limit, retreat=True) 4833 or self._try_parse(self._parse_offset, retreat=True) 4834 ) 4835 self._retreat(index) 4836 return result 4837 4838 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4839 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4840 4841 def _parse_locks(self) -> t.List[exp.Lock]: 4842 locks = [] 4843 while True: 4844 update, key = None, None 4845 if self._match_text_seq("FOR", "UPDATE"): 4846 update = True 4847 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4848 "LOCK", "IN", "SHARE", "MODE" 4849 ): 4850 update = False 4851 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4852 update, key = False, True 4853 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4854 update, key = True, True 4855 else: 4856 break 4857 4858 expressions = None 4859 if self._match_text_seq("OF"): 4860 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4861 4862 wait: t.Optional[bool | exp.Expression] = None 4863 if self._match_text_seq("NOWAIT"): 4864 wait = True 4865 elif self._match_text_seq("WAIT"): 4866 wait = self._parse_primary() 4867 elif self._match_text_seq("SKIP", "LOCKED"): 4868 wait = False 4869 4870 locks.append( 4871 self.expression( 4872 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4873 ) 4874 ) 4875 4876 return locks 4877 4878 def parse_set_operation( 4879 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4880 ) -> t.Optional[exp.Expression]: 4881 start = self._index 4882 _, side_token, kind_token = self._parse_join_parts() 4883 4884 side = side_token.text if side_token else None 4885 kind = kind_token.text if kind_token else None 4886 4887 if not self._match_set(self.SET_OPERATIONS): 4888 self._retreat(start) 4889 return None 4890 4891 token_type = self._prev.token_type 4892 4893 if token_type == TokenType.UNION: 4894 operation: t.Type[exp.SetOperation] = exp.Union 4895 elif token_type == TokenType.EXCEPT: 4896 operation = exp.Except 4897 else: 4898 operation = exp.Intersect 4899 4900 comments = self._prev.comments 4901 4902 if self._match(TokenType.DISTINCT): 4903 distinct: t.Optional[bool] = True 4904 elif self._match(TokenType.ALL): 4905 distinct = False 4906 else: 4907 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4908 if distinct is None: 4909 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4910 4911 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4912 "STRICT", "CORRESPONDING" 4913 ) 4914 if self._match_text_seq("CORRESPONDING"): 4915 by_name = True 4916 if not side and not kind: 4917 kind = "INNER" 4918 4919 on_column_list = None 4920 if by_name and self._match_texts(("ON", "BY")): 4921 on_column_list = self._parse_wrapped_csv(self._parse_column) 4922 4923 expression = self._parse_select( 4924 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4925 ) 4926 4927 return self.expression( 4928 operation, 4929 comments=comments, 4930 this=this, 4931 distinct=distinct, 4932 by_name=by_name, 4933 expression=expression, 4934 side=side, 4935 kind=kind, 4936 on=on_column_list, 4937 ) 4938 4939 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4940 while this: 4941 setop = self.parse_set_operation(this) 4942 if not setop: 4943 break 4944 this = setop 4945 4946 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4947 expression = this.expression 4948 4949 if expression: 4950 for arg in self.SET_OP_MODIFIERS: 4951 expr = expression.args.get(arg) 4952 if expr: 4953 this.set(arg, expr.pop()) 4954 4955 return this 4956 4957 def _parse_expression(self) -> t.Optional[exp.Expression]: 4958 return self._parse_alias(self._parse_assignment()) 4959 4960 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4961 this = self._parse_disjunction() 4962 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4963 # This allows us to parse <non-identifier token> := <expr> 4964 this = exp.column( 4965 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4966 ) 4967 4968 while self._match_set(self.ASSIGNMENT): 4969 if isinstance(this, exp.Column) and len(this.parts) == 1: 4970 this = this.this 4971 4972 this = self.expression( 4973 self.ASSIGNMENT[self._prev.token_type], 4974 this=this, 4975 comments=self._prev_comments, 4976 expression=self._parse_assignment(), 4977 ) 4978 4979 return this 4980 4981 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4982 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4983 4984 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4985 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4986 4987 def _parse_equality(self) -> t.Optional[exp.Expression]: 4988 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4989 4990 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4991 return self._parse_tokens(self._parse_range, self.COMPARISON) 4992 4993 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4994 this = this or self._parse_bitwise() 4995 negate = self._match(TokenType.NOT) 4996 4997 if self._match_set(self.RANGE_PARSERS): 4998 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4999 if not expression: 5000 return this 5001 5002 this = expression 5003 elif self._match(TokenType.ISNULL): 5004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5005 5006 # Postgres supports ISNULL and NOTNULL for conditions. 5007 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5008 if self._match(TokenType.NOTNULL): 5009 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5010 this = self.expression(exp.Not, this=this) 5011 5012 if negate: 5013 this = self._negate_range(this) 5014 5015 if self._match(TokenType.IS): 5016 this = self._parse_is(this) 5017 5018 return this 5019 5020 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5021 if not this: 5022 return this 5023 5024 return self.expression(exp.Not, this=this) 5025 5026 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5027 index = self._index - 1 5028 negate = self._match(TokenType.NOT) 5029 5030 if self._match_text_seq("DISTINCT", "FROM"): 5031 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5032 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5033 5034 if self._match(TokenType.JSON): 5035 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5036 5037 if self._match_text_seq("WITH"): 5038 _with = True 5039 elif self._match_text_seq("WITHOUT"): 5040 _with = False 5041 else: 5042 _with = None 5043 5044 unique = self._match(TokenType.UNIQUE) 5045 self._match_text_seq("KEYS") 5046 expression: t.Optional[exp.Expression] = self.expression( 5047 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5048 ) 5049 else: 5050 expression = self._parse_primary() or self._parse_null() 5051 if not expression: 5052 self._retreat(index) 5053 return None 5054 5055 this = self.expression(exp.Is, this=this, expression=expression) 5056 return self.expression(exp.Not, this=this) if negate else this 5057 5058 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5059 unnest = self._parse_unnest(with_alias=False) 5060 if unnest: 5061 this = self.expression(exp.In, this=this, unnest=unnest) 5062 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5063 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5064 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5065 5066 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5067 this = self.expression( 5068 exp.In, 5069 this=this, 5070 query=self._parse_query_modifiers(query).subquery(copy=False), 5071 ) 5072 else: 5073 this = self.expression(exp.In, this=this, expressions=expressions) 5074 5075 if matched_l_paren: 5076 self._match_r_paren(this) 5077 elif not self._match(TokenType.R_BRACKET, expression=this): 5078 self.raise_error("Expecting ]") 5079 else: 5080 this = self.expression(exp.In, this=this, field=self._parse_column()) 5081 5082 return this 5083 5084 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5085 symmetric = None 5086 if self._match_text_seq("SYMMETRIC"): 5087 symmetric = True 5088 elif self._match_text_seq("ASYMMETRIC"): 5089 symmetric = False 5090 5091 low = self._parse_bitwise() 5092 self._match(TokenType.AND) 5093 high = self._parse_bitwise() 5094 5095 return self.expression( 5096 exp.Between, 5097 this=this, 5098 low=low, 5099 high=high, 5100 symmetric=symmetric, 5101 ) 5102 5103 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5104 if not self._match(TokenType.ESCAPE): 5105 return this 5106 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5107 5108 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5109 index = self._index 5110 5111 if not self._match(TokenType.INTERVAL) and match_interval: 5112 return None 5113 5114 if self._match(TokenType.STRING, advance=False): 5115 this = self._parse_primary() 5116 else: 5117 this = self._parse_term() 5118 5119 if not this or ( 5120 isinstance(this, exp.Column) 5121 and not this.table 5122 and not this.this.quoted 5123 and self._curr 5124 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5125 ): 5126 self._retreat(index) 5127 return None 5128 5129 # handle day-time format interval span with omitted units: 5130 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5131 interval_span_units_omitted = None 5132 if ( 5133 this 5134 and this.is_string 5135 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5136 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5137 ): 5138 index = self._index 5139 5140 # Var "TO" Var 5141 first_unit = self._parse_var(any_token=True, upper=True) 5142 second_unit = None 5143 if first_unit and self._match_text_seq("TO"): 5144 second_unit = self._parse_var(any_token=True, upper=True) 5145 5146 interval_span_units_omitted = not (first_unit and second_unit) 5147 5148 self._retreat(index) 5149 5150 unit = ( 5151 None 5152 if interval_span_units_omitted 5153 else ( 5154 self._parse_function() 5155 or ( 5156 not self._match(TokenType.ALIAS, advance=False) 5157 and self._parse_var(any_token=True, upper=True) 5158 ) 5159 ) 5160 ) 5161 5162 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5163 # each INTERVAL expression into this canonical form so it's easy to transpile 5164 if this and this.is_number: 5165 this = exp.Literal.string(this.to_py()) 5166 elif this and this.is_string: 5167 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5168 if parts and unit: 5169 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5170 unit = None 5171 self._retreat(self._index - 1) 5172 5173 if len(parts) == 1: 5174 this = exp.Literal.string(parts[0][0]) 5175 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5176 5177 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5178 unit = self.expression( 5179 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5180 ) 5181 5182 interval = self.expression(exp.Interval, this=this, unit=unit) 5183 5184 index = self._index 5185 self._match(TokenType.PLUS) 5186 5187 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5188 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5189 return self.expression( 5190 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5191 ) 5192 5193 self._retreat(index) 5194 return interval 5195 5196 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5197 this = self._parse_term() 5198 5199 while True: 5200 if self._match_set(self.BITWISE): 5201 this = self.expression( 5202 self.BITWISE[self._prev.token_type], 5203 this=this, 5204 expression=self._parse_term(), 5205 ) 5206 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5207 this = self.expression( 5208 exp.DPipe, 5209 this=this, 5210 expression=self._parse_term(), 5211 safe=not self.dialect.STRICT_STRING_CONCAT, 5212 ) 5213 elif self._match(TokenType.DQMARK): 5214 this = self.expression( 5215 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5216 ) 5217 elif self._match_pair(TokenType.LT, TokenType.LT): 5218 this = self.expression( 5219 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5220 ) 5221 elif self._match_pair(TokenType.GT, TokenType.GT): 5222 this = self.expression( 5223 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5224 ) 5225 else: 5226 break 5227 5228 return this 5229 5230 def _parse_term(self) -> t.Optional[exp.Expression]: 5231 this = self._parse_factor() 5232 5233 while self._match_set(self.TERM): 5234 klass = self.TERM[self._prev.token_type] 5235 comments = self._prev_comments 5236 expression = self._parse_factor() 5237 5238 this = self.expression(klass, this=this, comments=comments, expression=expression) 5239 5240 if isinstance(this, exp.Collate): 5241 expr = this.expression 5242 5243 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5244 # fallback to Identifier / Var 5245 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5246 ident = expr.this 5247 if isinstance(ident, exp.Identifier): 5248 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5249 5250 return this 5251 5252 def _parse_factor(self) -> t.Optional[exp.Expression]: 5253 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5254 this = parse_method() 5255 5256 while self._match_set(self.FACTOR): 5257 klass = self.FACTOR[self._prev.token_type] 5258 comments = self._prev_comments 5259 expression = parse_method() 5260 5261 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5262 self._retreat(self._index - 1) 5263 return this 5264 5265 this = self.expression(klass, this=this, comments=comments, expression=expression) 5266 5267 if isinstance(this, exp.Div): 5268 this.args["typed"] = self.dialect.TYPED_DIVISION 5269 this.args["safe"] = self.dialect.SAFE_DIVISION 5270 5271 return this 5272 5273 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5274 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5275 5276 def _parse_unary(self) -> t.Optional[exp.Expression]: 5277 if self._match_set(self.UNARY_PARSERS): 5278 return self.UNARY_PARSERS[self._prev.token_type](self) 5279 return self._parse_at_time_zone(self._parse_type()) 5280 5281 def _parse_type( 5282 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5283 ) -> t.Optional[exp.Expression]: 5284 interval = parse_interval and self._parse_interval() 5285 if interval: 5286 return interval 5287 5288 index = self._index 5289 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5290 5291 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5292 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5293 if isinstance(data_type, exp.Cast): 5294 # This constructor can contain ops directly after it, for instance struct unnesting: 5295 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5296 return self._parse_column_ops(data_type) 5297 5298 if data_type: 5299 index2 = self._index 5300 this = self._parse_primary() 5301 5302 if isinstance(this, exp.Literal): 5303 literal = this.name 5304 this = self._parse_column_ops(this) 5305 5306 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5307 if parser: 5308 return parser(self, this, data_type) 5309 5310 if ( 5311 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5312 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5313 and TIME_ZONE_RE.search(literal) 5314 ): 5315 data_type = exp.DataType.build("TIMESTAMPTZ") 5316 5317 return self.expression(exp.Cast, this=this, to=data_type) 5318 5319 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5320 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5321 # 5322 # If the index difference here is greater than 1, that means the parser itself must have 5323 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5324 # 5325 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5326 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5327 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5328 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5329 # 5330 # In these cases, we don't really want to return the converted type, but instead retreat 5331 # and try to parse a Column or Identifier in the section below. 5332 if data_type.expressions and index2 - index > 1: 5333 self._retreat(index2) 5334 return self._parse_column_ops(data_type) 5335 5336 self._retreat(index) 5337 5338 if fallback_to_identifier: 5339 return self._parse_id_var() 5340 5341 this = self._parse_column() 5342 return this and self._parse_column_ops(this) 5343 5344 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5345 this = self._parse_type() 5346 if not this: 5347 return None 5348 5349 if isinstance(this, exp.Column) and not this.table: 5350 this = exp.var(this.name.upper()) 5351 5352 return self.expression( 5353 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5354 ) 5355 5356 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5357 type_name = identifier.name 5358 5359 while self._match(TokenType.DOT): 5360 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5361 5362 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5363 5364 def _parse_types( 5365 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5366 ) -> t.Optional[exp.Expression]: 5367 index = self._index 5368 5369 this: t.Optional[exp.Expression] = None 5370 prefix = self._match_text_seq("SYSUDTLIB", ".") 5371 5372 if self._match_set(self.TYPE_TOKENS): 5373 type_token = self._prev.token_type 5374 else: 5375 type_token = None 5376 identifier = allow_identifiers and self._parse_id_var( 5377 any_token=False, tokens=(TokenType.VAR,) 5378 ) 5379 if isinstance(identifier, exp.Identifier): 5380 try: 5381 tokens = self.dialect.tokenize(identifier.name) 5382 except TokenError: 5383 tokens = None 5384 5385 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5386 type_token = tokens[0].token_type 5387 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5388 this = self._parse_user_defined_type(identifier) 5389 else: 5390 self._retreat(self._index - 1) 5391 return None 5392 else: 5393 return None 5394 5395 if type_token == TokenType.PSEUDO_TYPE: 5396 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5397 5398 if type_token == TokenType.OBJECT_IDENTIFIER: 5399 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5400 5401 # https://materialize.com/docs/sql/types/map/ 5402 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5403 key_type = self._parse_types( 5404 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5405 ) 5406 if not self._match(TokenType.FARROW): 5407 self._retreat(index) 5408 return None 5409 5410 value_type = self._parse_types( 5411 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5412 ) 5413 if not self._match(TokenType.R_BRACKET): 5414 self._retreat(index) 5415 return None 5416 5417 return exp.DataType( 5418 this=exp.DataType.Type.MAP, 5419 expressions=[key_type, value_type], 5420 nested=True, 5421 prefix=prefix, 5422 ) 5423 5424 nested = type_token in self.NESTED_TYPE_TOKENS 5425 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5426 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5427 expressions = None 5428 maybe_func = False 5429 5430 if self._match(TokenType.L_PAREN): 5431 if is_struct: 5432 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5433 elif nested: 5434 expressions = self._parse_csv( 5435 lambda: self._parse_types( 5436 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5437 ) 5438 ) 5439 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5440 this = expressions[0] 5441 this.set("nullable", True) 5442 self._match_r_paren() 5443 return this 5444 elif type_token in self.ENUM_TYPE_TOKENS: 5445 expressions = self._parse_csv(self._parse_equality) 5446 elif is_aggregate: 5447 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5448 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5449 ) 5450 if not func_or_ident: 5451 return None 5452 expressions = [func_or_ident] 5453 if self._match(TokenType.COMMA): 5454 expressions.extend( 5455 self._parse_csv( 5456 lambda: self._parse_types( 5457 check_func=check_func, 5458 schema=schema, 5459 allow_identifiers=allow_identifiers, 5460 ) 5461 ) 5462 ) 5463 else: 5464 expressions = self._parse_csv(self._parse_type_size) 5465 5466 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5467 if type_token == TokenType.VECTOR and len(expressions) == 2: 5468 expressions = self._parse_vector_expressions(expressions) 5469 5470 if not self._match(TokenType.R_PAREN): 5471 self._retreat(index) 5472 return None 5473 5474 maybe_func = True 5475 5476 values: t.Optional[t.List[exp.Expression]] = None 5477 5478 if nested and self._match(TokenType.LT): 5479 if is_struct: 5480 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5481 else: 5482 expressions = self._parse_csv( 5483 lambda: self._parse_types( 5484 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5485 ) 5486 ) 5487 5488 if not self._match(TokenType.GT): 5489 self.raise_error("Expecting >") 5490 5491 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5492 values = self._parse_csv(self._parse_assignment) 5493 if not values and is_struct: 5494 values = None 5495 self._retreat(self._index - 1) 5496 else: 5497 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5498 5499 if type_token in self.TIMESTAMPS: 5500 if self._match_text_seq("WITH", "TIME", "ZONE"): 5501 maybe_func = False 5502 tz_type = ( 5503 exp.DataType.Type.TIMETZ 5504 if type_token in self.TIMES 5505 else exp.DataType.Type.TIMESTAMPTZ 5506 ) 5507 this = exp.DataType(this=tz_type, expressions=expressions) 5508 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5509 maybe_func = False 5510 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5511 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5512 maybe_func = False 5513 elif type_token == TokenType.INTERVAL: 5514 unit = self._parse_var(upper=True) 5515 if unit: 5516 if self._match_text_seq("TO"): 5517 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5518 5519 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5520 else: 5521 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5522 elif type_token == TokenType.VOID: 5523 this = exp.DataType(this=exp.DataType.Type.NULL) 5524 5525 if maybe_func and check_func: 5526 index2 = self._index 5527 peek = self._parse_string() 5528 5529 if not peek: 5530 self._retreat(index) 5531 return None 5532 5533 self._retreat(index2) 5534 5535 if not this: 5536 if self._match_text_seq("UNSIGNED"): 5537 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5538 if not unsigned_type_token: 5539 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5540 5541 type_token = unsigned_type_token or type_token 5542 5543 # NULLABLE without parentheses can be a column (Presto/Trino) 5544 if type_token == TokenType.NULLABLE and not expressions: 5545 self._retreat(index) 5546 return None 5547 5548 this = exp.DataType( 5549 this=exp.DataType.Type[type_token.value], 5550 expressions=expressions, 5551 nested=nested, 5552 prefix=prefix, 5553 ) 5554 5555 # Empty arrays/structs are allowed 5556 if values is not None: 5557 cls = exp.Struct if is_struct else exp.Array 5558 this = exp.cast(cls(expressions=values), this, copy=False) 5559 5560 elif expressions: 5561 this.set("expressions", expressions) 5562 5563 # https://materialize.com/docs/sql/types/list/#type-name 5564 while self._match(TokenType.LIST): 5565 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5566 5567 index = self._index 5568 5569 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5570 matched_array = self._match(TokenType.ARRAY) 5571 5572 while self._curr: 5573 datatype_token = self._prev.token_type 5574 matched_l_bracket = self._match(TokenType.L_BRACKET) 5575 5576 if (not matched_l_bracket and not matched_array) or ( 5577 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5578 ): 5579 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5580 # not to be confused with the fixed size array parsing 5581 break 5582 5583 matched_array = False 5584 values = self._parse_csv(self._parse_assignment) or None 5585 if ( 5586 values 5587 and not schema 5588 and ( 5589 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5590 ) 5591 ): 5592 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5593 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5594 self._retreat(index) 5595 break 5596 5597 this = exp.DataType( 5598 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5599 ) 5600 self._match(TokenType.R_BRACKET) 5601 5602 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5603 converter = self.TYPE_CONVERTERS.get(this.this) 5604 if converter: 5605 this = converter(t.cast(exp.DataType, this)) 5606 5607 return this 5608 5609 def _parse_vector_expressions( 5610 self, expressions: t.List[exp.Expression] 5611 ) -> t.List[exp.Expression]: 5612 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5613 5614 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5615 index = self._index 5616 5617 if ( 5618 self._curr 5619 and self._next 5620 and self._curr.token_type in self.TYPE_TOKENS 5621 and self._next.token_type in self.TYPE_TOKENS 5622 ): 5623 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5624 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5625 this = self._parse_id_var() 5626 else: 5627 this = ( 5628 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5629 or self._parse_id_var() 5630 ) 5631 5632 self._match(TokenType.COLON) 5633 5634 if ( 5635 type_required 5636 and not isinstance(this, exp.DataType) 5637 and not self._match_set(self.TYPE_TOKENS, advance=False) 5638 ): 5639 self._retreat(index) 5640 return self._parse_types() 5641 5642 return self._parse_column_def(this) 5643 5644 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5645 if not self._match_text_seq("AT", "TIME", "ZONE"): 5646 return this 5647 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5648 5649 def _parse_column(self) -> t.Optional[exp.Expression]: 5650 this = self._parse_column_reference() 5651 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5652 5653 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5654 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5655 5656 return column 5657 5658 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5659 this = self._parse_field() 5660 if ( 5661 not this 5662 and self._match(TokenType.VALUES, advance=False) 5663 and self.VALUES_FOLLOWED_BY_PAREN 5664 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5665 ): 5666 this = self._parse_id_var() 5667 5668 if isinstance(this, exp.Identifier): 5669 # We bubble up comments from the Identifier to the Column 5670 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5671 5672 return this 5673 5674 def _parse_colon_as_variant_extract( 5675 self, this: t.Optional[exp.Expression] 5676 ) -> t.Optional[exp.Expression]: 5677 casts = [] 5678 json_path = [] 5679 escape = None 5680 5681 while self._match(TokenType.COLON): 5682 start_index = self._index 5683 5684 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5685 path = self._parse_column_ops( 5686 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5687 ) 5688 5689 # The cast :: operator has a lower precedence than the extraction operator :, so 5690 # we rearrange the AST appropriately to avoid casting the JSON path 5691 while isinstance(path, exp.Cast): 5692 casts.append(path.to) 5693 path = path.this 5694 5695 if casts: 5696 dcolon_offset = next( 5697 i 5698 for i, t in enumerate(self._tokens[start_index:]) 5699 if t.token_type == TokenType.DCOLON 5700 ) 5701 end_token = self._tokens[start_index + dcolon_offset - 1] 5702 else: 5703 end_token = self._prev 5704 5705 if path: 5706 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5707 # it'll roundtrip to a string literal in GET_PATH 5708 if isinstance(path, exp.Identifier) and path.quoted: 5709 escape = True 5710 5711 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5712 5713 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5714 # Databricks transforms it back to the colon/dot notation 5715 if json_path: 5716 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5717 5718 if json_path_expr: 5719 json_path_expr.set("escape", escape) 5720 5721 this = self.expression( 5722 exp.JSONExtract, 5723 this=this, 5724 expression=json_path_expr, 5725 variant_extract=True, 5726 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5727 ) 5728 5729 while casts: 5730 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5731 5732 return this 5733 5734 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5735 return self._parse_types() 5736 5737 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5738 this = self._parse_bracket(this) 5739 5740 while self._match_set(self.COLUMN_OPERATORS): 5741 op_token = self._prev.token_type 5742 op = self.COLUMN_OPERATORS.get(op_token) 5743 5744 if op_token in self.CAST_COLUMN_OPERATORS: 5745 field = self._parse_dcolon() 5746 if not field: 5747 self.raise_error("Expected type") 5748 elif op and self._curr: 5749 field = self._parse_column_reference() or self._parse_bitwise() 5750 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5751 field = self._parse_column_ops(field) 5752 else: 5753 field = self._parse_field(any_token=True, anonymous_func=True) 5754 5755 # Function calls can be qualified, e.g., x.y.FOO() 5756 # This converts the final AST to a series of Dots leading to the function call 5757 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5758 if isinstance(field, (exp.Func, exp.Window)) and this: 5759 this = this.transform( 5760 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5761 ) 5762 5763 if op: 5764 this = op(self, this, field) 5765 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5766 this = self.expression( 5767 exp.Column, 5768 comments=this.comments, 5769 this=field, 5770 table=this.this, 5771 db=this.args.get("table"), 5772 catalog=this.args.get("db"), 5773 ) 5774 elif isinstance(field, exp.Window): 5775 # Move the exp.Dot's to the window's function 5776 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5777 field.set("this", window_func) 5778 this = field 5779 else: 5780 this = self.expression(exp.Dot, this=this, expression=field) 5781 5782 if field and field.comments: 5783 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5784 5785 this = self._parse_bracket(this) 5786 5787 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5788 5789 def _parse_paren(self) -> t.Optional[exp.Expression]: 5790 if not self._match(TokenType.L_PAREN): 5791 return None 5792 5793 comments = self._prev_comments 5794 query = self._parse_select() 5795 5796 if query: 5797 expressions = [query] 5798 else: 5799 expressions = self._parse_expressions() 5800 5801 this = seq_get(expressions, 0) 5802 5803 if not this and self._match(TokenType.R_PAREN, advance=False): 5804 this = self.expression(exp.Tuple) 5805 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5806 this = self._parse_subquery(this=this, parse_alias=False) 5807 elif isinstance(this, exp.Subquery): 5808 this = self._parse_subquery( 5809 this=self._parse_query_modifiers(self._parse_set_operations(this)), 5810 parse_alias=False, 5811 ) 5812 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5813 this = self.expression(exp.Tuple, expressions=expressions) 5814 else: 5815 this = self.expression(exp.Paren, this=this) 5816 5817 if this: 5818 this.add_comments(comments) 5819 5820 self._match_r_paren(expression=this) 5821 5822 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5823 return self._parse_window(this) 5824 5825 return this 5826 5827 def _parse_primary(self) -> t.Optional[exp.Expression]: 5828 if self._match_set(self.PRIMARY_PARSERS): 5829 token_type = self._prev.token_type 5830 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5831 5832 if token_type == TokenType.STRING: 5833 expressions = [primary] 5834 while self._match(TokenType.STRING): 5835 expressions.append(exp.Literal.string(self._prev.text)) 5836 5837 if len(expressions) > 1: 5838 return self.expression(exp.Concat, expressions=expressions) 5839 5840 return primary 5841 5842 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5843 return exp.Literal.number(f"0.{self._prev.text}") 5844 5845 return self._parse_paren() 5846 5847 def _parse_field( 5848 self, 5849 any_token: bool = False, 5850 tokens: t.Optional[t.Collection[TokenType]] = None, 5851 anonymous_func: bool = False, 5852 ) -> t.Optional[exp.Expression]: 5853 if anonymous_func: 5854 field = ( 5855 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5856 or self._parse_primary() 5857 ) 5858 else: 5859 field = self._parse_primary() or self._parse_function( 5860 anonymous=anonymous_func, any_token=any_token 5861 ) 5862 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5863 5864 def _parse_function( 5865 self, 5866 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5867 anonymous: bool = False, 5868 optional_parens: bool = True, 5869 any_token: bool = False, 5870 ) -> t.Optional[exp.Expression]: 5871 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5872 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5873 fn_syntax = False 5874 if ( 5875 self._match(TokenType.L_BRACE, advance=False) 5876 and self._next 5877 and self._next.text.upper() == "FN" 5878 ): 5879 self._advance(2) 5880 fn_syntax = True 5881 5882 func = self._parse_function_call( 5883 functions=functions, 5884 anonymous=anonymous, 5885 optional_parens=optional_parens, 5886 any_token=any_token, 5887 ) 5888 5889 if fn_syntax: 5890 self._match(TokenType.R_BRACE) 5891 5892 return func 5893 5894 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5895 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5896 5897 def _parse_function_call( 5898 self, 5899 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5900 anonymous: bool = False, 5901 optional_parens: bool = True, 5902 any_token: bool = False, 5903 ) -> t.Optional[exp.Expression]: 5904 if not self._curr: 5905 return None 5906 5907 comments = self._curr.comments 5908 prev = self._prev 5909 token = self._curr 5910 token_type = self._curr.token_type 5911 this = self._curr.text 5912 upper = this.upper() 5913 5914 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5915 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5916 self._advance() 5917 return self._parse_window(parser(self)) 5918 5919 if not self._next or self._next.token_type != TokenType.L_PAREN: 5920 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5921 self._advance() 5922 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5923 5924 return None 5925 5926 if any_token: 5927 if token_type in self.RESERVED_TOKENS: 5928 return None 5929 elif token_type not in self.FUNC_TOKENS: 5930 return None 5931 5932 self._advance(2) 5933 5934 parser = self.FUNCTION_PARSERS.get(upper) 5935 if parser and not anonymous: 5936 this = parser(self) 5937 else: 5938 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5939 5940 if subquery_predicate: 5941 expr = None 5942 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5943 expr = self._parse_select() 5944 self._match_r_paren() 5945 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5946 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5947 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5948 self._advance(-1) 5949 expr = self._parse_bitwise() 5950 5951 if expr: 5952 return self.expression(subquery_predicate, comments=comments, this=expr) 5953 5954 if functions is None: 5955 functions = self.FUNCTIONS 5956 5957 function = functions.get(upper) 5958 known_function = function and not anonymous 5959 5960 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5961 args = self._parse_function_args(alias) 5962 5963 post_func_comments = self._curr and self._curr.comments 5964 if known_function and post_func_comments: 5965 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5966 # call we'll construct it as exp.Anonymous, even if it's "known" 5967 if any( 5968 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5969 for comment in post_func_comments 5970 ): 5971 known_function = False 5972 5973 if alias and known_function: 5974 args = self._kv_to_prop_eq(args) 5975 5976 if known_function: 5977 func_builder = t.cast(t.Callable, function) 5978 5979 if "dialect" in func_builder.__code__.co_varnames: 5980 func = func_builder(args, dialect=self.dialect) 5981 else: 5982 func = func_builder(args) 5983 5984 func = self.validate_expression(func, args) 5985 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5986 func.meta["name"] = this 5987 5988 this = func 5989 else: 5990 if token_type == TokenType.IDENTIFIER: 5991 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5992 5993 this = self.expression(exp.Anonymous, this=this, expressions=args) 5994 this = this.update_positions(token) 5995 5996 if isinstance(this, exp.Expression): 5997 this.add_comments(comments) 5998 5999 self._match_r_paren(this) 6000 return self._parse_window(this) 6001 6002 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6003 return expression 6004 6005 def _kv_to_prop_eq( 6006 self, expressions: t.List[exp.Expression], parse_map: bool = False 6007 ) -> t.List[exp.Expression]: 6008 transformed = [] 6009 6010 for index, e in enumerate(expressions): 6011 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6012 if isinstance(e, exp.Alias): 6013 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6014 6015 if not isinstance(e, exp.PropertyEQ): 6016 e = self.expression( 6017 exp.PropertyEQ, 6018 this=e.this if parse_map else exp.to_identifier(e.this.name), 6019 expression=e.expression, 6020 ) 6021 6022 if isinstance(e.this, exp.Column): 6023 e.this.replace(e.this.this) 6024 else: 6025 e = self._to_prop_eq(e, index) 6026 6027 transformed.append(e) 6028 6029 return transformed 6030 6031 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6032 return self._parse_statement() 6033 6034 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6035 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6036 6037 def _parse_user_defined_function( 6038 self, kind: t.Optional[TokenType] = None 6039 ) -> t.Optional[exp.Expression]: 6040 this = self._parse_table_parts(schema=True) 6041 6042 if not self._match(TokenType.L_PAREN): 6043 return this 6044 6045 expressions = self._parse_csv(self._parse_function_parameter) 6046 self._match_r_paren() 6047 return self.expression( 6048 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6049 ) 6050 6051 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6052 literal = self._parse_primary() 6053 if literal: 6054 return self.expression(exp.Introducer, this=token.text, expression=literal) 6055 6056 return self._identifier_expression(token) 6057 6058 def _parse_session_parameter(self) -> exp.SessionParameter: 6059 kind = None 6060 this = self._parse_id_var() or self._parse_primary() 6061 6062 if this and self._match(TokenType.DOT): 6063 kind = this.name 6064 this = self._parse_var() or self._parse_primary() 6065 6066 return self.expression(exp.SessionParameter, this=this, kind=kind) 6067 6068 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6069 return self._parse_id_var() 6070 6071 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6072 index = self._index 6073 6074 if self._match(TokenType.L_PAREN): 6075 expressions = t.cast( 6076 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6077 ) 6078 6079 if not self._match(TokenType.R_PAREN): 6080 self._retreat(index) 6081 else: 6082 expressions = [self._parse_lambda_arg()] 6083 6084 if self._match_set(self.LAMBDAS): 6085 return self.LAMBDAS[self._prev.token_type](self, expressions) 6086 6087 self._retreat(index) 6088 6089 this: t.Optional[exp.Expression] 6090 6091 if self._match(TokenType.DISTINCT): 6092 this = self.expression( 6093 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6094 ) 6095 else: 6096 this = self._parse_select_or_expression(alias=alias) 6097 6098 return self._parse_limit( 6099 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6100 ) 6101 6102 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6103 index = self._index 6104 if not self._match(TokenType.L_PAREN): 6105 return this 6106 6107 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6108 # expr can be of both types 6109 if self._match_set(self.SELECT_START_TOKENS): 6110 self._retreat(index) 6111 return this 6112 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6113 self._match_r_paren() 6114 return self.expression(exp.Schema, this=this, expressions=args) 6115 6116 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6117 return self._parse_column_def(self._parse_field(any_token=True)) 6118 6119 def _parse_column_def( 6120 self, this: t.Optional[exp.Expression], computed_column: bool = True 6121 ) -> t.Optional[exp.Expression]: 6122 # column defs are not really columns, they're identifiers 6123 if isinstance(this, exp.Column): 6124 this = this.this 6125 6126 if not computed_column: 6127 self._match(TokenType.ALIAS) 6128 6129 kind = self._parse_types(schema=True) 6130 6131 if self._match_text_seq("FOR", "ORDINALITY"): 6132 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6133 6134 constraints: t.List[exp.Expression] = [] 6135 6136 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6137 ("ALIAS", "MATERIALIZED") 6138 ): 6139 persisted = self._prev.text.upper() == "MATERIALIZED" 6140 constraint_kind = exp.ComputedColumnConstraint( 6141 this=self._parse_assignment(), 6142 persisted=persisted or self._match_text_seq("PERSISTED"), 6143 data_type=exp.Var(this="AUTO") 6144 if self._match_text_seq("AUTO") 6145 else self._parse_types(), 6146 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6147 ) 6148 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6149 elif ( 6150 kind 6151 and self._match(TokenType.ALIAS, advance=False) 6152 and ( 6153 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6154 or (self._next and self._next.token_type == TokenType.L_PAREN) 6155 ) 6156 ): 6157 self._advance() 6158 constraints.append( 6159 self.expression( 6160 exp.ColumnConstraint, 6161 kind=exp.ComputedColumnConstraint( 6162 this=self._parse_disjunction(), 6163 persisted=self._match_texts(("STORED", "VIRTUAL")) 6164 and self._prev.text.upper() == "STORED", 6165 ), 6166 ) 6167 ) 6168 6169 while True: 6170 constraint = self._parse_column_constraint() 6171 if not constraint: 6172 break 6173 constraints.append(constraint) 6174 6175 if not kind and not constraints: 6176 return this 6177 6178 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6179 6180 def _parse_auto_increment( 6181 self, 6182 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6183 start = None 6184 increment = None 6185 order = None 6186 6187 if self._match(TokenType.L_PAREN, advance=False): 6188 args = self._parse_wrapped_csv(self._parse_bitwise) 6189 start = seq_get(args, 0) 6190 increment = seq_get(args, 1) 6191 elif self._match_text_seq("START"): 6192 start = self._parse_bitwise() 6193 self._match_text_seq("INCREMENT") 6194 increment = self._parse_bitwise() 6195 if self._match_text_seq("ORDER"): 6196 order = True 6197 elif self._match_text_seq("NOORDER"): 6198 order = False 6199 6200 if start and increment: 6201 return exp.GeneratedAsIdentityColumnConstraint( 6202 start=start, increment=increment, this=False, order=order 6203 ) 6204 6205 return exp.AutoIncrementColumnConstraint() 6206 6207 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6208 if not self._match_text_seq("REFRESH"): 6209 self._retreat(self._index - 1) 6210 return None 6211 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6212 6213 def _parse_compress(self) -> exp.CompressColumnConstraint: 6214 if self._match(TokenType.L_PAREN, advance=False): 6215 return self.expression( 6216 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6217 ) 6218 6219 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6220 6221 def _parse_generated_as_identity( 6222 self, 6223 ) -> ( 6224 exp.GeneratedAsIdentityColumnConstraint 6225 | exp.ComputedColumnConstraint 6226 | exp.GeneratedAsRowColumnConstraint 6227 ): 6228 if self._match_text_seq("BY", "DEFAULT"): 6229 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6230 this = self.expression( 6231 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6232 ) 6233 else: 6234 self._match_text_seq("ALWAYS") 6235 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6236 6237 self._match(TokenType.ALIAS) 6238 6239 if self._match_text_seq("ROW"): 6240 start = self._match_text_seq("START") 6241 if not start: 6242 self._match(TokenType.END) 6243 hidden = self._match_text_seq("HIDDEN") 6244 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6245 6246 identity = self._match_text_seq("IDENTITY") 6247 6248 if self._match(TokenType.L_PAREN): 6249 if self._match(TokenType.START_WITH): 6250 this.set("start", self._parse_bitwise()) 6251 if self._match_text_seq("INCREMENT", "BY"): 6252 this.set("increment", self._parse_bitwise()) 6253 if self._match_text_seq("MINVALUE"): 6254 this.set("minvalue", self._parse_bitwise()) 6255 if self._match_text_seq("MAXVALUE"): 6256 this.set("maxvalue", self._parse_bitwise()) 6257 6258 if self._match_text_seq("CYCLE"): 6259 this.set("cycle", True) 6260 elif self._match_text_seq("NO", "CYCLE"): 6261 this.set("cycle", False) 6262 6263 if not identity: 6264 this.set("expression", self._parse_range()) 6265 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6266 args = self._parse_csv(self._parse_bitwise) 6267 this.set("start", seq_get(args, 0)) 6268 this.set("increment", seq_get(args, 1)) 6269 6270 self._match_r_paren() 6271 6272 return this 6273 6274 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6275 self._match_text_seq("LENGTH") 6276 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6277 6278 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6279 if self._match_text_seq("NULL"): 6280 return self.expression(exp.NotNullColumnConstraint) 6281 if self._match_text_seq("CASESPECIFIC"): 6282 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6283 if self._match_text_seq("FOR", "REPLICATION"): 6284 return self.expression(exp.NotForReplicationColumnConstraint) 6285 6286 # Unconsume the `NOT` token 6287 self._retreat(self._index - 1) 6288 return None 6289 6290 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6291 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6292 6293 procedure_option_follows = ( 6294 self._match(TokenType.WITH, advance=False) 6295 and self._next 6296 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6297 ) 6298 6299 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6300 return self.expression( 6301 exp.ColumnConstraint, 6302 this=this, 6303 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6304 ) 6305 6306 return this 6307 6308 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6309 if not self._match(TokenType.CONSTRAINT): 6310 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6311 6312 return self.expression( 6313 exp.Constraint, 6314 this=self._parse_id_var(), 6315 expressions=self._parse_unnamed_constraints(), 6316 ) 6317 6318 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6319 constraints = [] 6320 while True: 6321 constraint = self._parse_unnamed_constraint() or self._parse_function() 6322 if not constraint: 6323 break 6324 constraints.append(constraint) 6325 6326 return constraints 6327 6328 def _parse_unnamed_constraint( 6329 self, constraints: t.Optional[t.Collection[str]] = None 6330 ) -> t.Optional[exp.Expression]: 6331 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6332 constraints or self.CONSTRAINT_PARSERS 6333 ): 6334 return None 6335 6336 constraint = self._prev.text.upper() 6337 if constraint not in self.CONSTRAINT_PARSERS: 6338 self.raise_error(f"No parser found for schema constraint {constraint}.") 6339 6340 return self.CONSTRAINT_PARSERS[constraint](self) 6341 6342 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6343 return self._parse_id_var(any_token=False) 6344 6345 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6346 self._match_texts(("KEY", "INDEX")) 6347 return self.expression( 6348 exp.UniqueColumnConstraint, 6349 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6350 this=self._parse_schema(self._parse_unique_key()), 6351 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6352 on_conflict=self._parse_on_conflict(), 6353 options=self._parse_key_constraint_options(), 6354 ) 6355 6356 def _parse_key_constraint_options(self) -> t.List[str]: 6357 options = [] 6358 while True: 6359 if not self._curr: 6360 break 6361 6362 if self._match(TokenType.ON): 6363 action = None 6364 on = self._advance_any() and self._prev.text 6365 6366 if self._match_text_seq("NO", "ACTION"): 6367 action = "NO ACTION" 6368 elif self._match_text_seq("CASCADE"): 6369 action = "CASCADE" 6370 elif self._match_text_seq("RESTRICT"): 6371 action = "RESTRICT" 6372 elif self._match_pair(TokenType.SET, TokenType.NULL): 6373 action = "SET NULL" 6374 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6375 action = "SET DEFAULT" 6376 else: 6377 self.raise_error("Invalid key constraint") 6378 6379 options.append(f"ON {on} {action}") 6380 else: 6381 var = self._parse_var_from_options( 6382 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6383 ) 6384 if not var: 6385 break 6386 options.append(var.name) 6387 6388 return options 6389 6390 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6391 if match and not self._match(TokenType.REFERENCES): 6392 return None 6393 6394 expressions = None 6395 this = self._parse_table(schema=True) 6396 options = self._parse_key_constraint_options() 6397 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6398 6399 def _parse_foreign_key(self) -> exp.ForeignKey: 6400 expressions = ( 6401 self._parse_wrapped_id_vars() 6402 if not self._match(TokenType.REFERENCES, advance=False) 6403 else None 6404 ) 6405 reference = self._parse_references() 6406 on_options = {} 6407 6408 while self._match(TokenType.ON): 6409 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6410 self.raise_error("Expected DELETE or UPDATE") 6411 6412 kind = self._prev.text.lower() 6413 6414 if self._match_text_seq("NO", "ACTION"): 6415 action = "NO ACTION" 6416 elif self._match(TokenType.SET): 6417 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6418 action = "SET " + self._prev.text.upper() 6419 else: 6420 self._advance() 6421 action = self._prev.text.upper() 6422 6423 on_options[kind] = action 6424 6425 return self.expression( 6426 exp.ForeignKey, 6427 expressions=expressions, 6428 reference=reference, 6429 options=self._parse_key_constraint_options(), 6430 **on_options, # type: ignore 6431 ) 6432 6433 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6434 return self._parse_ordered() or self._parse_field() 6435 6436 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6437 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6438 self._retreat(self._index - 1) 6439 return None 6440 6441 id_vars = self._parse_wrapped_id_vars() 6442 return self.expression( 6443 exp.PeriodForSystemTimeConstraint, 6444 this=seq_get(id_vars, 0), 6445 expression=seq_get(id_vars, 1), 6446 ) 6447 6448 def _parse_primary_key( 6449 self, wrapped_optional: bool = False, in_props: bool = False 6450 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6451 desc = ( 6452 self._match_set((TokenType.ASC, TokenType.DESC)) 6453 and self._prev.token_type == TokenType.DESC 6454 ) 6455 6456 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6457 return self.expression( 6458 exp.PrimaryKeyColumnConstraint, 6459 desc=desc, 6460 options=self._parse_key_constraint_options(), 6461 ) 6462 6463 expressions = self._parse_wrapped_csv( 6464 self._parse_primary_key_part, optional=wrapped_optional 6465 ) 6466 6467 return self.expression( 6468 exp.PrimaryKey, 6469 expressions=expressions, 6470 include=self._parse_index_params(), 6471 options=self._parse_key_constraint_options(), 6472 ) 6473 6474 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6475 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6476 6477 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6478 """ 6479 Parses a datetime column in ODBC format. We parse the column into the corresponding 6480 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6481 same as we did for `DATE('yyyy-mm-dd')`. 6482 6483 Reference: 6484 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6485 """ 6486 self._match(TokenType.VAR) 6487 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6488 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6489 if not self._match(TokenType.R_BRACE): 6490 self.raise_error("Expected }") 6491 return expression 6492 6493 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6494 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6495 return this 6496 6497 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6498 map_token = seq_get(self._tokens, self._index - 2) 6499 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6500 else: 6501 parse_map = False 6502 6503 bracket_kind = self._prev.token_type 6504 if ( 6505 bracket_kind == TokenType.L_BRACE 6506 and self._curr 6507 and self._curr.token_type == TokenType.VAR 6508 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6509 ): 6510 return self._parse_odbc_datetime_literal() 6511 6512 expressions = self._parse_csv( 6513 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6514 ) 6515 6516 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6517 self.raise_error("Expected ]") 6518 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6519 self.raise_error("Expected }") 6520 6521 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6522 if bracket_kind == TokenType.L_BRACE: 6523 this = self.expression( 6524 exp.Struct, 6525 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6526 ) 6527 elif not this: 6528 this = build_array_constructor( 6529 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6530 ) 6531 else: 6532 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6533 if constructor_type: 6534 return build_array_constructor( 6535 constructor_type, 6536 args=expressions, 6537 bracket_kind=bracket_kind, 6538 dialect=self.dialect, 6539 ) 6540 6541 expressions = apply_index_offset( 6542 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6543 ) 6544 this = self.expression( 6545 exp.Bracket, 6546 this=this, 6547 expressions=expressions, 6548 comments=this.pop_comments(), 6549 ) 6550 6551 self._add_comments(this) 6552 return self._parse_bracket(this) 6553 6554 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6555 if self._match(TokenType.COLON): 6556 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6557 return this 6558 6559 def _parse_case(self) -> t.Optional[exp.Expression]: 6560 if self._match(TokenType.DOT, advance=False): 6561 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6562 self._retreat(self._index - 1) 6563 return None 6564 6565 ifs = [] 6566 default = None 6567 6568 comments = self._prev_comments 6569 expression = self._parse_assignment() 6570 6571 while self._match(TokenType.WHEN): 6572 this = self._parse_assignment() 6573 self._match(TokenType.THEN) 6574 then = self._parse_assignment() 6575 ifs.append(self.expression(exp.If, this=this, true=then)) 6576 6577 if self._match(TokenType.ELSE): 6578 default = self._parse_assignment() 6579 6580 if not self._match(TokenType.END): 6581 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6582 default = exp.column("interval") 6583 else: 6584 self.raise_error("Expected END after CASE", self._prev) 6585 6586 return self.expression( 6587 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6588 ) 6589 6590 def _parse_if(self) -> t.Optional[exp.Expression]: 6591 if self._match(TokenType.L_PAREN): 6592 args = self._parse_csv( 6593 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6594 ) 6595 this = self.validate_expression(exp.If.from_arg_list(args), args) 6596 self._match_r_paren() 6597 else: 6598 index = self._index - 1 6599 6600 if self.NO_PAREN_IF_COMMANDS and index == 0: 6601 return self._parse_as_command(self._prev) 6602 6603 condition = self._parse_assignment() 6604 6605 if not condition: 6606 self._retreat(index) 6607 return None 6608 6609 self._match(TokenType.THEN) 6610 true = self._parse_assignment() 6611 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6612 self._match(TokenType.END) 6613 this = self.expression(exp.If, this=condition, true=true, false=false) 6614 6615 return this 6616 6617 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6618 if not self._match_text_seq("VALUE", "FOR"): 6619 self._retreat(self._index - 1) 6620 return None 6621 6622 return self.expression( 6623 exp.NextValueFor, 6624 this=self._parse_column(), 6625 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6626 ) 6627 6628 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6629 this = self._parse_function() or self._parse_var_or_string(upper=True) 6630 6631 if self._match(TokenType.FROM): 6632 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6633 6634 if not self._match(TokenType.COMMA): 6635 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6636 6637 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6638 6639 def _parse_gap_fill(self) -> exp.GapFill: 6640 self._match(TokenType.TABLE) 6641 this = self._parse_table() 6642 6643 self._match(TokenType.COMMA) 6644 args = [this, *self._parse_csv(self._parse_lambda)] 6645 6646 gap_fill = exp.GapFill.from_arg_list(args) 6647 return self.validate_expression(gap_fill, args) 6648 6649 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6650 this = self._parse_assignment() 6651 6652 if not self._match(TokenType.ALIAS): 6653 if self._match(TokenType.COMMA): 6654 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6655 6656 self.raise_error("Expected AS after CAST") 6657 6658 fmt = None 6659 to = self._parse_types() 6660 6661 default = self._match(TokenType.DEFAULT) 6662 if default: 6663 default = self._parse_bitwise() 6664 self._match_text_seq("ON", "CONVERSION", "ERROR") 6665 6666 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6667 fmt_string = self._parse_string() 6668 fmt = self._parse_at_time_zone(fmt_string) 6669 6670 if not to: 6671 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6672 if to.this in exp.DataType.TEMPORAL_TYPES: 6673 this = self.expression( 6674 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6675 this=this, 6676 format=exp.Literal.string( 6677 format_time( 6678 fmt_string.this if fmt_string else "", 6679 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6680 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6681 ) 6682 ), 6683 safe=safe, 6684 ) 6685 6686 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6687 this.set("zone", fmt.args["zone"]) 6688 return this 6689 elif not to: 6690 self.raise_error("Expected TYPE after CAST") 6691 elif isinstance(to, exp.Identifier): 6692 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6693 elif to.this == exp.DataType.Type.CHAR: 6694 if self._match(TokenType.CHARACTER_SET): 6695 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6696 6697 return self.build_cast( 6698 strict=strict, 6699 this=this, 6700 to=to, 6701 format=fmt, 6702 safe=safe, 6703 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6704 default=default, 6705 ) 6706 6707 def _parse_string_agg(self) -> exp.GroupConcat: 6708 if self._match(TokenType.DISTINCT): 6709 args: t.List[t.Optional[exp.Expression]] = [ 6710 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6711 ] 6712 if self._match(TokenType.COMMA): 6713 args.extend(self._parse_csv(self._parse_assignment)) 6714 else: 6715 args = self._parse_csv(self._parse_assignment) # type: ignore 6716 6717 if self._match_text_seq("ON", "OVERFLOW"): 6718 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6719 if self._match_text_seq("ERROR"): 6720 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6721 else: 6722 self._match_text_seq("TRUNCATE") 6723 on_overflow = self.expression( 6724 exp.OverflowTruncateBehavior, 6725 this=self._parse_string(), 6726 with_count=( 6727 self._match_text_seq("WITH", "COUNT") 6728 or not self._match_text_seq("WITHOUT", "COUNT") 6729 ), 6730 ) 6731 else: 6732 on_overflow = None 6733 6734 index = self._index 6735 if not self._match(TokenType.R_PAREN) and args: 6736 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6737 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6738 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6739 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6740 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6741 6742 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6743 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6744 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6745 if not self._match_text_seq("WITHIN", "GROUP"): 6746 self._retreat(index) 6747 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6748 6749 # The corresponding match_r_paren will be called in parse_function (caller) 6750 self._match_l_paren() 6751 6752 return self.expression( 6753 exp.GroupConcat, 6754 this=self._parse_order(this=seq_get(args, 0)), 6755 separator=seq_get(args, 1), 6756 on_overflow=on_overflow, 6757 ) 6758 6759 def _parse_convert( 6760 self, strict: bool, safe: t.Optional[bool] = None 6761 ) -> t.Optional[exp.Expression]: 6762 this = self._parse_bitwise() 6763 6764 if self._match(TokenType.USING): 6765 to: t.Optional[exp.Expression] = self.expression( 6766 exp.CharacterSet, this=self._parse_var() 6767 ) 6768 elif self._match(TokenType.COMMA): 6769 to = self._parse_types() 6770 else: 6771 to = None 6772 6773 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6774 6775 def _parse_xml_table(self) -> exp.XMLTable: 6776 namespaces = None 6777 passing = None 6778 columns = None 6779 6780 if self._match_text_seq("XMLNAMESPACES", "("): 6781 namespaces = self._parse_xml_namespace() 6782 self._match_text_seq(")", ",") 6783 6784 this = self._parse_string() 6785 6786 if self._match_text_seq("PASSING"): 6787 # The BY VALUE keywords are optional and are provided for semantic clarity 6788 self._match_text_seq("BY", "VALUE") 6789 passing = self._parse_csv(self._parse_column) 6790 6791 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6792 6793 if self._match_text_seq("COLUMNS"): 6794 columns = self._parse_csv(self._parse_field_def) 6795 6796 return self.expression( 6797 exp.XMLTable, 6798 this=this, 6799 namespaces=namespaces, 6800 passing=passing, 6801 columns=columns, 6802 by_ref=by_ref, 6803 ) 6804 6805 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6806 namespaces = [] 6807 6808 while True: 6809 if self._match(TokenType.DEFAULT): 6810 uri = self._parse_string() 6811 else: 6812 uri = self._parse_alias(self._parse_string()) 6813 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6814 if not self._match(TokenType.COMMA): 6815 break 6816 6817 return namespaces 6818 6819 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6820 args = self._parse_csv(self._parse_assignment) 6821 6822 if len(args) < 3: 6823 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6824 6825 return self.expression(exp.DecodeCase, expressions=args) 6826 6827 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6828 self._match_text_seq("KEY") 6829 key = self._parse_column() 6830 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6831 self._match_text_seq("VALUE") 6832 value = self._parse_bitwise() 6833 6834 if not key and not value: 6835 return None 6836 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6837 6838 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6839 if not this or not self._match_text_seq("FORMAT", "JSON"): 6840 return this 6841 6842 return self.expression(exp.FormatJson, this=this) 6843 6844 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6845 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6846 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6847 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6848 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6849 else: 6850 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6851 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6852 6853 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6854 6855 if not empty and not error and not null: 6856 return None 6857 6858 return self.expression( 6859 exp.OnCondition, 6860 empty=empty, 6861 error=error, 6862 null=null, 6863 ) 6864 6865 def _parse_on_handling( 6866 self, on: str, *values: str 6867 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6868 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6869 for value in values: 6870 if self._match_text_seq(value, "ON", on): 6871 return f"{value} ON {on}" 6872 6873 index = self._index 6874 if self._match(TokenType.DEFAULT): 6875 default_value = self._parse_bitwise() 6876 if self._match_text_seq("ON", on): 6877 return default_value 6878 6879 self._retreat(index) 6880 6881 return None 6882 6883 @t.overload 6884 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6885 6886 @t.overload 6887 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6888 6889 def _parse_json_object(self, agg=False): 6890 star = self._parse_star() 6891 expressions = ( 6892 [star] 6893 if star 6894 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6895 ) 6896 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6897 6898 unique_keys = None 6899 if self._match_text_seq("WITH", "UNIQUE"): 6900 unique_keys = True 6901 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6902 unique_keys = False 6903 6904 self._match_text_seq("KEYS") 6905 6906 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6907 self._parse_type() 6908 ) 6909 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6910 6911 return self.expression( 6912 exp.JSONObjectAgg if agg else exp.JSONObject, 6913 expressions=expressions, 6914 null_handling=null_handling, 6915 unique_keys=unique_keys, 6916 return_type=return_type, 6917 encoding=encoding, 6918 ) 6919 6920 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6921 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6922 if not self._match_text_seq("NESTED"): 6923 this = self._parse_id_var() 6924 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 6925 kind = self._parse_types(allow_identifiers=False) 6926 nested = None 6927 else: 6928 this = None 6929 ordinality = None 6930 kind = None 6931 nested = True 6932 6933 path = self._match_text_seq("PATH") and self._parse_string() 6934 nested_schema = nested and self._parse_json_schema() 6935 6936 return self.expression( 6937 exp.JSONColumnDef, 6938 this=this, 6939 kind=kind, 6940 path=path, 6941 nested_schema=nested_schema, 6942 ordinality=ordinality, 6943 ) 6944 6945 def _parse_json_schema(self) -> exp.JSONSchema: 6946 self._match_text_seq("COLUMNS") 6947 return self.expression( 6948 exp.JSONSchema, 6949 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6950 ) 6951 6952 def _parse_json_table(self) -> exp.JSONTable: 6953 this = self._parse_format_json(self._parse_bitwise()) 6954 path = self._match(TokenType.COMMA) and self._parse_string() 6955 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6956 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6957 schema = self._parse_json_schema() 6958 6959 return exp.JSONTable( 6960 this=this, 6961 schema=schema, 6962 path=path, 6963 error_handling=error_handling, 6964 empty_handling=empty_handling, 6965 ) 6966 6967 def _parse_match_against(self) -> exp.MatchAgainst: 6968 if self._match_text_seq("TABLE"): 6969 # parse SingleStore MATCH(TABLE ...) syntax 6970 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6971 expressions = [] 6972 table = self._parse_table() 6973 if table: 6974 expressions = [table] 6975 else: 6976 expressions = self._parse_csv(self._parse_column) 6977 6978 self._match_text_seq(")", "AGAINST", "(") 6979 6980 this = self._parse_string() 6981 6982 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6983 modifier = "IN NATURAL LANGUAGE MODE" 6984 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6985 modifier = f"{modifier} WITH QUERY EXPANSION" 6986 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6987 modifier = "IN BOOLEAN MODE" 6988 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6989 modifier = "WITH QUERY EXPANSION" 6990 else: 6991 modifier = None 6992 6993 return self.expression( 6994 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6995 ) 6996 6997 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6998 def _parse_open_json(self) -> exp.OpenJSON: 6999 this = self._parse_bitwise() 7000 path = self._match(TokenType.COMMA) and self._parse_string() 7001 7002 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7003 this = self._parse_field(any_token=True) 7004 kind = self._parse_types() 7005 path = self._parse_string() 7006 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7007 7008 return self.expression( 7009 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7010 ) 7011 7012 expressions = None 7013 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7014 self._match_l_paren() 7015 expressions = self._parse_csv(_parse_open_json_column_def) 7016 7017 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7018 7019 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7020 args = self._parse_csv(self._parse_bitwise) 7021 7022 if self._match(TokenType.IN): 7023 return self.expression( 7024 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7025 ) 7026 7027 if haystack_first: 7028 haystack = seq_get(args, 0) 7029 needle = seq_get(args, 1) 7030 else: 7031 haystack = seq_get(args, 1) 7032 needle = seq_get(args, 0) 7033 7034 return self.expression( 7035 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7036 ) 7037 7038 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7039 args = self._parse_csv(self._parse_table) 7040 return exp.JoinHint(this=func_name.upper(), expressions=args) 7041 7042 def _parse_substring(self) -> exp.Substring: 7043 # Postgres supports the form: substring(string [from int] [for int]) 7044 # (despite being undocumented, the reverse order also works) 7045 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7046 7047 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7048 7049 start, length = None, None 7050 7051 while self._curr: 7052 if self._match(TokenType.FROM): 7053 start = self._parse_bitwise() 7054 elif self._match(TokenType.FOR): 7055 if not start: 7056 start = exp.Literal.number(1) 7057 length = self._parse_bitwise() 7058 else: 7059 break 7060 7061 if start: 7062 args.append(start) 7063 if length: 7064 args.append(length) 7065 7066 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7067 7068 def _parse_trim(self) -> exp.Trim: 7069 # https://www.w3resource.com/sql/character-functions/trim.php 7070 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7071 7072 position = None 7073 collation = None 7074 expression = None 7075 7076 if self._match_texts(self.TRIM_TYPES): 7077 position = self._prev.text.upper() 7078 7079 this = self._parse_bitwise() 7080 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7081 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7082 expression = self._parse_bitwise() 7083 7084 if invert_order: 7085 this, expression = expression, this 7086 7087 if self._match(TokenType.COLLATE): 7088 collation = self._parse_bitwise() 7089 7090 return self.expression( 7091 exp.Trim, this=this, position=position, expression=expression, collation=collation 7092 ) 7093 7094 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7095 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7096 7097 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7098 return self._parse_window(self._parse_id_var(), alias=True) 7099 7100 def _parse_respect_or_ignore_nulls( 7101 self, this: t.Optional[exp.Expression] 7102 ) -> t.Optional[exp.Expression]: 7103 if self._match_text_seq("IGNORE", "NULLS"): 7104 return self.expression(exp.IgnoreNulls, this=this) 7105 if self._match_text_seq("RESPECT", "NULLS"): 7106 return self.expression(exp.RespectNulls, this=this) 7107 return this 7108 7109 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7110 if self._match(TokenType.HAVING): 7111 self._match_texts(("MAX", "MIN")) 7112 max = self._prev.text.upper() != "MIN" 7113 return self.expression( 7114 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7115 ) 7116 7117 return this 7118 7119 def _parse_window( 7120 self, this: t.Optional[exp.Expression], alias: bool = False 7121 ) -> t.Optional[exp.Expression]: 7122 func = this 7123 comments = func.comments if isinstance(func, exp.Expression) else None 7124 7125 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7126 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7127 if self._match_text_seq("WITHIN", "GROUP"): 7128 order = self._parse_wrapped(self._parse_order) 7129 this = self.expression(exp.WithinGroup, this=this, expression=order) 7130 7131 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7132 self._match(TokenType.WHERE) 7133 this = self.expression( 7134 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7135 ) 7136 self._match_r_paren() 7137 7138 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7139 # Some dialects choose to implement and some do not. 7140 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7141 7142 # There is some code above in _parse_lambda that handles 7143 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7144 7145 # The below changes handle 7146 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7147 7148 # Oracle allows both formats 7149 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7150 # and Snowflake chose to do the same for familiarity 7151 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7152 if isinstance(this, exp.AggFunc): 7153 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7154 7155 if ignore_respect and ignore_respect is not this: 7156 ignore_respect.replace(ignore_respect.this) 7157 this = self.expression(ignore_respect.__class__, this=this) 7158 7159 this = self._parse_respect_or_ignore_nulls(this) 7160 7161 # bigquery select from window x AS (partition by ...) 7162 if alias: 7163 over = None 7164 self._match(TokenType.ALIAS) 7165 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7166 return this 7167 else: 7168 over = self._prev.text.upper() 7169 7170 if comments and isinstance(func, exp.Expression): 7171 func.pop_comments() 7172 7173 if not self._match(TokenType.L_PAREN): 7174 return self.expression( 7175 exp.Window, 7176 comments=comments, 7177 this=this, 7178 alias=self._parse_id_var(False), 7179 over=over, 7180 ) 7181 7182 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7183 7184 first = self._match(TokenType.FIRST) 7185 if self._match_text_seq("LAST"): 7186 first = False 7187 7188 partition, order = self._parse_partition_and_order() 7189 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7190 7191 if kind: 7192 self._match(TokenType.BETWEEN) 7193 start = self._parse_window_spec() 7194 7195 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7196 exclude = ( 7197 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7198 if self._match_text_seq("EXCLUDE") 7199 else None 7200 ) 7201 7202 spec = self.expression( 7203 exp.WindowSpec, 7204 kind=kind, 7205 start=start["value"], 7206 start_side=start["side"], 7207 end=end.get("value"), 7208 end_side=end.get("side"), 7209 exclude=exclude, 7210 ) 7211 else: 7212 spec = None 7213 7214 self._match_r_paren() 7215 7216 window = self.expression( 7217 exp.Window, 7218 comments=comments, 7219 this=this, 7220 partition_by=partition, 7221 order=order, 7222 spec=spec, 7223 alias=window_alias, 7224 over=over, 7225 first=first, 7226 ) 7227 7228 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7229 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7230 return self._parse_window(window, alias=alias) 7231 7232 return window 7233 7234 def _parse_partition_and_order( 7235 self, 7236 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7237 return self._parse_partition_by(), self._parse_order() 7238 7239 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7240 self._match(TokenType.BETWEEN) 7241 7242 return { 7243 "value": ( 7244 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7245 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7246 or self._parse_type() 7247 ), 7248 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7249 } 7250 7251 def _parse_alias( 7252 self, this: t.Optional[exp.Expression], explicit: bool = False 7253 ) -> t.Optional[exp.Expression]: 7254 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7255 # so this section tries to parse the clause version and if it fails, it treats the token 7256 # as an identifier (alias) 7257 if self._can_parse_limit_or_offset(): 7258 return this 7259 7260 any_token = self._match(TokenType.ALIAS) 7261 comments = self._prev_comments or [] 7262 7263 if explicit and not any_token: 7264 return this 7265 7266 if self._match(TokenType.L_PAREN): 7267 aliases = self.expression( 7268 exp.Aliases, 7269 comments=comments, 7270 this=this, 7271 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7272 ) 7273 self._match_r_paren(aliases) 7274 return aliases 7275 7276 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7277 self.STRING_ALIASES and self._parse_string_as_identifier() 7278 ) 7279 7280 if alias: 7281 comments.extend(alias.pop_comments()) 7282 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7283 column = this.this 7284 7285 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7286 if not this.comments and column and column.comments: 7287 this.comments = column.pop_comments() 7288 7289 return this 7290 7291 def _parse_id_var( 7292 self, 7293 any_token: bool = True, 7294 tokens: t.Optional[t.Collection[TokenType]] = None, 7295 ) -> t.Optional[exp.Expression]: 7296 expression = self._parse_identifier() 7297 if not expression and ( 7298 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7299 ): 7300 quoted = self._prev.token_type == TokenType.STRING 7301 expression = self._identifier_expression(quoted=quoted) 7302 7303 return expression 7304 7305 def _parse_string(self) -> t.Optional[exp.Expression]: 7306 if self._match_set(self.STRING_PARSERS): 7307 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7308 return self._parse_placeholder() 7309 7310 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7311 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7312 if output: 7313 output.update_positions(self._prev) 7314 return output 7315 7316 def _parse_number(self) -> t.Optional[exp.Expression]: 7317 if self._match_set(self.NUMERIC_PARSERS): 7318 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7319 return self._parse_placeholder() 7320 7321 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7322 if self._match(TokenType.IDENTIFIER): 7323 return self._identifier_expression(quoted=True) 7324 return self._parse_placeholder() 7325 7326 def _parse_var( 7327 self, 7328 any_token: bool = False, 7329 tokens: t.Optional[t.Collection[TokenType]] = None, 7330 upper: bool = False, 7331 ) -> t.Optional[exp.Expression]: 7332 if ( 7333 (any_token and self._advance_any()) 7334 or self._match(TokenType.VAR) 7335 or (self._match_set(tokens) if tokens else False) 7336 ): 7337 return self.expression( 7338 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7339 ) 7340 return self._parse_placeholder() 7341 7342 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7343 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7344 self._advance() 7345 return self._prev 7346 return None 7347 7348 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7349 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7350 7351 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7352 return self._parse_primary() or self._parse_var(any_token=True) 7353 7354 def _parse_null(self) -> t.Optional[exp.Expression]: 7355 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7356 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7357 return self._parse_placeholder() 7358 7359 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7360 if self._match(TokenType.TRUE): 7361 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7362 if self._match(TokenType.FALSE): 7363 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7364 return self._parse_placeholder() 7365 7366 def _parse_star(self) -> t.Optional[exp.Expression]: 7367 if self._match(TokenType.STAR): 7368 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7369 return self._parse_placeholder() 7370 7371 def _parse_parameter(self) -> exp.Parameter: 7372 this = self._parse_identifier() or self._parse_primary_or_var() 7373 return self.expression(exp.Parameter, this=this) 7374 7375 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7376 if self._match_set(self.PLACEHOLDER_PARSERS): 7377 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7378 if placeholder: 7379 return placeholder 7380 self._advance(-1) 7381 return None 7382 7383 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7384 if not self._match_texts(keywords): 7385 return None 7386 if self._match(TokenType.L_PAREN, advance=False): 7387 return self._parse_wrapped_csv(self._parse_expression) 7388 7389 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7390 return [expression] if expression else None 7391 7392 def _parse_csv( 7393 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7394 ) -> t.List[exp.Expression]: 7395 parse_result = parse_method() 7396 items = [parse_result] if parse_result is not None else [] 7397 7398 while self._match(sep): 7399 self._add_comments(parse_result) 7400 parse_result = parse_method() 7401 if parse_result is not None: 7402 items.append(parse_result) 7403 7404 return items 7405 7406 def _parse_tokens( 7407 self, parse_method: t.Callable, expressions: t.Dict 7408 ) -> t.Optional[exp.Expression]: 7409 this = parse_method() 7410 7411 while self._match_set(expressions): 7412 this = self.expression( 7413 expressions[self._prev.token_type], 7414 this=this, 7415 comments=self._prev_comments, 7416 expression=parse_method(), 7417 ) 7418 7419 return this 7420 7421 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7422 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7423 7424 def _parse_wrapped_csv( 7425 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7426 ) -> t.List[exp.Expression]: 7427 return self._parse_wrapped( 7428 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7429 ) 7430 7431 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7432 wrapped = self._match(TokenType.L_PAREN) 7433 if not wrapped and not optional: 7434 self.raise_error("Expecting (") 7435 parse_result = parse_method() 7436 if wrapped: 7437 self._match_r_paren() 7438 return parse_result 7439 7440 def _parse_expressions(self) -> t.List[exp.Expression]: 7441 return self._parse_csv(self._parse_expression) 7442 7443 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7444 return ( 7445 self._parse_set_operations( 7446 self._parse_alias(self._parse_assignment(), explicit=True) 7447 if alias 7448 else self._parse_assignment() 7449 ) 7450 or self._parse_select() 7451 ) 7452 7453 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7454 return self._parse_query_modifiers( 7455 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7456 ) 7457 7458 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7459 this = None 7460 if self._match_texts(self.TRANSACTION_KIND): 7461 this = self._prev.text 7462 7463 self._match_texts(("TRANSACTION", "WORK")) 7464 7465 modes = [] 7466 while True: 7467 mode = [] 7468 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7469 mode.append(self._prev.text) 7470 7471 if mode: 7472 modes.append(" ".join(mode)) 7473 if not self._match(TokenType.COMMA): 7474 break 7475 7476 return self.expression(exp.Transaction, this=this, modes=modes) 7477 7478 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7479 chain = None 7480 savepoint = None 7481 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7482 7483 self._match_texts(("TRANSACTION", "WORK")) 7484 7485 if self._match_text_seq("TO"): 7486 self._match_text_seq("SAVEPOINT") 7487 savepoint = self._parse_id_var() 7488 7489 if self._match(TokenType.AND): 7490 chain = not self._match_text_seq("NO") 7491 self._match_text_seq("CHAIN") 7492 7493 if is_rollback: 7494 return self.expression(exp.Rollback, savepoint=savepoint) 7495 7496 return self.expression(exp.Commit, chain=chain) 7497 7498 def _parse_refresh(self) -> exp.Refresh: 7499 self._match(TokenType.TABLE) 7500 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7501 7502 def _parse_column_def_with_exists(self): 7503 start = self._index 7504 self._match(TokenType.COLUMN) 7505 7506 exists_column = self._parse_exists(not_=True) 7507 expression = self._parse_field_def() 7508 7509 if not isinstance(expression, exp.ColumnDef): 7510 self._retreat(start) 7511 return None 7512 7513 expression.set("exists", exists_column) 7514 7515 return expression 7516 7517 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7518 if not self._prev.text.upper() == "ADD": 7519 return None 7520 7521 expression = self._parse_column_def_with_exists() 7522 if not expression: 7523 return None 7524 7525 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7526 if self._match_texts(("FIRST", "AFTER")): 7527 position = self._prev.text 7528 column_position = self.expression( 7529 exp.ColumnPosition, this=self._parse_column(), position=position 7530 ) 7531 expression.set("position", column_position) 7532 7533 return expression 7534 7535 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7536 drop = self._match(TokenType.DROP) and self._parse_drop() 7537 if drop and not isinstance(drop, exp.Command): 7538 drop.set("kind", drop.args.get("kind", "COLUMN")) 7539 return drop 7540 7541 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7542 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7543 return self.expression( 7544 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7545 ) 7546 7547 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7548 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7549 self._match_text_seq("ADD") 7550 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7551 return self.expression( 7552 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7553 ) 7554 7555 column_def = self._parse_add_column() 7556 if isinstance(column_def, exp.ColumnDef): 7557 return column_def 7558 7559 exists = self._parse_exists(not_=True) 7560 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7561 return self.expression( 7562 exp.AddPartition, 7563 exists=exists, 7564 this=self._parse_field(any_token=True), 7565 location=self._match_text_seq("LOCATION", advance=False) 7566 and self._parse_property(), 7567 ) 7568 7569 return None 7570 7571 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7572 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7573 or self._match_text_seq("COLUMNS") 7574 ): 7575 schema = self._parse_schema() 7576 7577 return ( 7578 ensure_list(schema) 7579 if schema 7580 else self._parse_csv(self._parse_column_def_with_exists) 7581 ) 7582 7583 return self._parse_csv(_parse_add_alteration) 7584 7585 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7586 if self._match_texts(self.ALTER_ALTER_PARSERS): 7587 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7588 7589 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7590 # keyword after ALTER we default to parsing this statement 7591 self._match(TokenType.COLUMN) 7592 column = self._parse_field(any_token=True) 7593 7594 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7595 return self.expression(exp.AlterColumn, this=column, drop=True) 7596 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7597 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7598 if self._match(TokenType.COMMENT): 7599 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7600 if self._match_text_seq("DROP", "NOT", "NULL"): 7601 return self.expression( 7602 exp.AlterColumn, 7603 this=column, 7604 drop=True, 7605 allow_null=True, 7606 ) 7607 if self._match_text_seq("SET", "NOT", "NULL"): 7608 return self.expression( 7609 exp.AlterColumn, 7610 this=column, 7611 allow_null=False, 7612 ) 7613 7614 if self._match_text_seq("SET", "VISIBLE"): 7615 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7616 if self._match_text_seq("SET", "INVISIBLE"): 7617 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7618 7619 self._match_text_seq("SET", "DATA") 7620 self._match_text_seq("TYPE") 7621 return self.expression( 7622 exp.AlterColumn, 7623 this=column, 7624 dtype=self._parse_types(), 7625 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7626 using=self._match(TokenType.USING) and self._parse_assignment(), 7627 ) 7628 7629 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7630 if self._match_texts(("ALL", "EVEN", "AUTO")): 7631 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7632 7633 self._match_text_seq("KEY", "DISTKEY") 7634 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7635 7636 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7637 if compound: 7638 self._match_text_seq("SORTKEY") 7639 7640 if self._match(TokenType.L_PAREN, advance=False): 7641 return self.expression( 7642 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7643 ) 7644 7645 self._match_texts(("AUTO", "NONE")) 7646 return self.expression( 7647 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7648 ) 7649 7650 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7651 index = self._index - 1 7652 7653 partition_exists = self._parse_exists() 7654 if self._match(TokenType.PARTITION, advance=False): 7655 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7656 7657 self._retreat(index) 7658 return self._parse_csv(self._parse_drop_column) 7659 7660 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7661 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7662 exists = self._parse_exists() 7663 old_column = self._parse_column() 7664 to = self._match_text_seq("TO") 7665 new_column = self._parse_column() 7666 7667 if old_column is None or to is None or new_column is None: 7668 return None 7669 7670 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7671 7672 self._match_text_seq("TO") 7673 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7674 7675 def _parse_alter_table_set(self) -> exp.AlterSet: 7676 alter_set = self.expression(exp.AlterSet) 7677 7678 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7679 "TABLE", "PROPERTIES" 7680 ): 7681 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7682 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7683 alter_set.set("expressions", [self._parse_assignment()]) 7684 elif self._match_texts(("LOGGED", "UNLOGGED")): 7685 alter_set.set("option", exp.var(self._prev.text.upper())) 7686 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7687 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7688 elif self._match_text_seq("LOCATION"): 7689 alter_set.set("location", self._parse_field()) 7690 elif self._match_text_seq("ACCESS", "METHOD"): 7691 alter_set.set("access_method", self._parse_field()) 7692 elif self._match_text_seq("TABLESPACE"): 7693 alter_set.set("tablespace", self._parse_field()) 7694 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7695 alter_set.set("file_format", [self._parse_field()]) 7696 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7697 alter_set.set("file_format", self._parse_wrapped_options()) 7698 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7699 alter_set.set("copy_options", self._parse_wrapped_options()) 7700 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7701 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7702 else: 7703 if self._match_text_seq("SERDE"): 7704 alter_set.set("serde", self._parse_field()) 7705 7706 properties = self._parse_wrapped(self._parse_properties, optional=True) 7707 alter_set.set("expressions", [properties]) 7708 7709 return alter_set 7710 7711 def _parse_alter_session(self) -> exp.AlterSession: 7712 """Parse ALTER SESSION SET/UNSET statements.""" 7713 if self._match(TokenType.SET): 7714 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7715 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7716 7717 self._match_text_seq("UNSET") 7718 expressions = self._parse_csv( 7719 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7720 ) 7721 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7722 7723 def _parse_alter(self) -> exp.Alter | exp.Command: 7724 start = self._prev 7725 7726 alter_token = self._match_set(self.ALTERABLES) and self._prev 7727 if not alter_token: 7728 return self._parse_as_command(start) 7729 7730 exists = self._parse_exists() 7731 only = self._match_text_seq("ONLY") 7732 7733 if alter_token.token_type == TokenType.SESSION: 7734 this = None 7735 check = None 7736 cluster = None 7737 else: 7738 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 7739 check = self._match_text_seq("WITH", "CHECK") 7740 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7741 7742 if self._next: 7743 self._advance() 7744 7745 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7746 if parser: 7747 actions = ensure_list(parser(self)) 7748 not_valid = self._match_text_seq("NOT", "VALID") 7749 options = self._parse_csv(self._parse_property) 7750 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 7751 7752 if not self._curr and actions: 7753 return self.expression( 7754 exp.Alter, 7755 this=this, 7756 kind=alter_token.text.upper(), 7757 exists=exists, 7758 actions=actions, 7759 only=only, 7760 options=options, 7761 cluster=cluster, 7762 not_valid=not_valid, 7763 check=check, 7764 cascade=cascade, 7765 ) 7766 7767 return self._parse_as_command(start) 7768 7769 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7770 start = self._prev 7771 # https://duckdb.org/docs/sql/statements/analyze 7772 if not self._curr: 7773 return self.expression(exp.Analyze) 7774 7775 options = [] 7776 while self._match_texts(self.ANALYZE_STYLES): 7777 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7778 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7779 else: 7780 options.append(self._prev.text.upper()) 7781 7782 this: t.Optional[exp.Expression] = None 7783 inner_expression: t.Optional[exp.Expression] = None 7784 7785 kind = self._curr and self._curr.text.upper() 7786 7787 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7788 this = self._parse_table_parts() 7789 elif self._match_text_seq("TABLES"): 7790 if self._match_set((TokenType.FROM, TokenType.IN)): 7791 kind = f"{kind} {self._prev.text.upper()}" 7792 this = self._parse_table(schema=True, is_db_reference=True) 7793 elif self._match_text_seq("DATABASE"): 7794 this = self._parse_table(schema=True, is_db_reference=True) 7795 elif self._match_text_seq("CLUSTER"): 7796 this = self._parse_table() 7797 # Try matching inner expr keywords before fallback to parse table. 7798 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7799 kind = None 7800 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7801 else: 7802 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7803 kind = None 7804 this = self._parse_table_parts() 7805 7806 partition = self._try_parse(self._parse_partition) 7807 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7808 return self._parse_as_command(start) 7809 7810 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7811 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7812 "WITH", "ASYNC", "MODE" 7813 ): 7814 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7815 else: 7816 mode = None 7817 7818 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7819 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7820 7821 properties = self._parse_properties() 7822 return self.expression( 7823 exp.Analyze, 7824 kind=kind, 7825 this=this, 7826 mode=mode, 7827 partition=partition, 7828 properties=properties, 7829 expression=inner_expression, 7830 options=options, 7831 ) 7832 7833 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7834 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7835 this = None 7836 kind = self._prev.text.upper() 7837 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7838 expressions = [] 7839 7840 if not self._match_text_seq("STATISTICS"): 7841 self.raise_error("Expecting token STATISTICS") 7842 7843 if self._match_text_seq("NOSCAN"): 7844 this = "NOSCAN" 7845 elif self._match(TokenType.FOR): 7846 if self._match_text_seq("ALL", "COLUMNS"): 7847 this = "FOR ALL COLUMNS" 7848 if self._match_texts("COLUMNS"): 7849 this = "FOR COLUMNS" 7850 expressions = self._parse_csv(self._parse_column_reference) 7851 elif self._match_text_seq("SAMPLE"): 7852 sample = self._parse_number() 7853 expressions = [ 7854 self.expression( 7855 exp.AnalyzeSample, 7856 sample=sample, 7857 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7858 ) 7859 ] 7860 7861 return self.expression( 7862 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7863 ) 7864 7865 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7866 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7867 kind = None 7868 this = None 7869 expression: t.Optional[exp.Expression] = None 7870 if self._match_text_seq("REF", "UPDATE"): 7871 kind = "REF" 7872 this = "UPDATE" 7873 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7874 this = "UPDATE SET DANGLING TO NULL" 7875 elif self._match_text_seq("STRUCTURE"): 7876 kind = "STRUCTURE" 7877 if self._match_text_seq("CASCADE", "FAST"): 7878 this = "CASCADE FAST" 7879 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7880 ("ONLINE", "OFFLINE") 7881 ): 7882 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7883 expression = self._parse_into() 7884 7885 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7886 7887 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7888 this = self._prev.text.upper() 7889 if self._match_text_seq("COLUMNS"): 7890 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7891 return None 7892 7893 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7894 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7895 if self._match_text_seq("STATISTICS"): 7896 return self.expression(exp.AnalyzeDelete, kind=kind) 7897 return None 7898 7899 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7900 if self._match_text_seq("CHAINED", "ROWS"): 7901 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7902 return None 7903 7904 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7905 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7906 this = self._prev.text.upper() 7907 expression: t.Optional[exp.Expression] = None 7908 expressions = [] 7909 update_options = None 7910 7911 if self._match_text_seq("HISTOGRAM", "ON"): 7912 expressions = self._parse_csv(self._parse_column_reference) 7913 with_expressions = [] 7914 while self._match(TokenType.WITH): 7915 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7916 if self._match_texts(("SYNC", "ASYNC")): 7917 if self._match_text_seq("MODE", advance=False): 7918 with_expressions.append(f"{self._prev.text.upper()} MODE") 7919 self._advance() 7920 else: 7921 buckets = self._parse_number() 7922 if self._match_text_seq("BUCKETS"): 7923 with_expressions.append(f"{buckets} BUCKETS") 7924 if with_expressions: 7925 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7926 7927 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7928 TokenType.UPDATE, advance=False 7929 ): 7930 update_options = self._prev.text.upper() 7931 self._advance() 7932 elif self._match_text_seq("USING", "DATA"): 7933 expression = self.expression(exp.UsingData, this=self._parse_string()) 7934 7935 return self.expression( 7936 exp.AnalyzeHistogram, 7937 this=this, 7938 expressions=expressions, 7939 expression=expression, 7940 update_options=update_options, 7941 ) 7942 7943 def _parse_merge(self) -> exp.Merge: 7944 self._match(TokenType.INTO) 7945 target = self._parse_table() 7946 7947 if target and self._match(TokenType.ALIAS, advance=False): 7948 target.set("alias", self._parse_table_alias()) 7949 7950 self._match(TokenType.USING) 7951 using = self._parse_table() 7952 7953 self._match(TokenType.ON) 7954 on = self._parse_assignment() 7955 7956 return self.expression( 7957 exp.Merge, 7958 this=target, 7959 using=using, 7960 on=on, 7961 whens=self._parse_when_matched(), 7962 returning=self._parse_returning(), 7963 ) 7964 7965 def _parse_when_matched(self) -> exp.Whens: 7966 whens = [] 7967 7968 while self._match(TokenType.WHEN): 7969 matched = not self._match(TokenType.NOT) 7970 self._match_text_seq("MATCHED") 7971 source = ( 7972 False 7973 if self._match_text_seq("BY", "TARGET") 7974 else self._match_text_seq("BY", "SOURCE") 7975 ) 7976 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7977 7978 self._match(TokenType.THEN) 7979 7980 if self._match(TokenType.INSERT): 7981 this = self._parse_star() 7982 if this: 7983 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7984 else: 7985 then = self.expression( 7986 exp.Insert, 7987 this=exp.var("ROW") 7988 if self._match_text_seq("ROW") 7989 else self._parse_value(values=False), 7990 expression=self._match_text_seq("VALUES") and self._parse_value(), 7991 ) 7992 elif self._match(TokenType.UPDATE): 7993 expressions = self._parse_star() 7994 if expressions: 7995 then = self.expression(exp.Update, expressions=expressions) 7996 else: 7997 then = self.expression( 7998 exp.Update, 7999 expressions=self._match(TokenType.SET) 8000 and self._parse_csv(self._parse_equality), 8001 ) 8002 elif self._match(TokenType.DELETE): 8003 then = self.expression(exp.Var, this=self._prev.text) 8004 else: 8005 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8006 8007 whens.append( 8008 self.expression( 8009 exp.When, 8010 matched=matched, 8011 source=source, 8012 condition=condition, 8013 then=then, 8014 ) 8015 ) 8016 return self.expression(exp.Whens, expressions=whens) 8017 8018 def _parse_show(self) -> t.Optional[exp.Expression]: 8019 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8020 if parser: 8021 return parser(self) 8022 return self._parse_as_command(self._prev) 8023 8024 def _parse_set_item_assignment( 8025 self, kind: t.Optional[str] = None 8026 ) -> t.Optional[exp.Expression]: 8027 index = self._index 8028 8029 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8030 return self._parse_set_transaction(global_=kind == "GLOBAL") 8031 8032 left = self._parse_primary() or self._parse_column() 8033 assignment_delimiter = self._match_texts(("=", "TO")) 8034 8035 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8036 self._retreat(index) 8037 return None 8038 8039 right = self._parse_statement() or self._parse_id_var() 8040 if isinstance(right, (exp.Column, exp.Identifier)): 8041 right = exp.var(right.name) 8042 8043 this = self.expression(exp.EQ, this=left, expression=right) 8044 return self.expression(exp.SetItem, this=this, kind=kind) 8045 8046 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8047 self._match_text_seq("TRANSACTION") 8048 characteristics = self._parse_csv( 8049 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8050 ) 8051 return self.expression( 8052 exp.SetItem, 8053 expressions=characteristics, 8054 kind="TRANSACTION", 8055 **{"global": global_}, # type: ignore 8056 ) 8057 8058 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8059 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8060 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8061 8062 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8063 index = self._index 8064 set_ = self.expression( 8065 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8066 ) 8067 8068 if self._curr: 8069 self._retreat(index) 8070 return self._parse_as_command(self._prev) 8071 8072 return set_ 8073 8074 def _parse_var_from_options( 8075 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8076 ) -> t.Optional[exp.Var]: 8077 start = self._curr 8078 if not start: 8079 return None 8080 8081 option = start.text.upper() 8082 continuations = options.get(option) 8083 8084 index = self._index 8085 self._advance() 8086 for keywords in continuations or []: 8087 if isinstance(keywords, str): 8088 keywords = (keywords,) 8089 8090 if self._match_text_seq(*keywords): 8091 option = f"{option} {' '.join(keywords)}" 8092 break 8093 else: 8094 if continuations or continuations is None: 8095 if raise_unmatched: 8096 self.raise_error(f"Unknown option {option}") 8097 8098 self._retreat(index) 8099 return None 8100 8101 return exp.var(option) 8102 8103 def _parse_as_command(self, start: Token) -> exp.Command: 8104 while self._curr: 8105 self._advance() 8106 text = self._find_sql(start, self._prev) 8107 size = len(start.text) 8108 self._warn_unsupported() 8109 return exp.Command(this=text[:size], expression=text[size:]) 8110 8111 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8112 settings = [] 8113 8114 self._match_l_paren() 8115 kind = self._parse_id_var() 8116 8117 if self._match(TokenType.L_PAREN): 8118 while True: 8119 key = self._parse_id_var() 8120 value = self._parse_primary() 8121 if not key and value is None: 8122 break 8123 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8124 self._match(TokenType.R_PAREN) 8125 8126 self._match_r_paren() 8127 8128 return self.expression( 8129 exp.DictProperty, 8130 this=this, 8131 kind=kind.this if kind else None, 8132 settings=settings, 8133 ) 8134 8135 def _parse_dict_range(self, this: str) -> exp.DictRange: 8136 self._match_l_paren() 8137 has_min = self._match_text_seq("MIN") 8138 if has_min: 8139 min = self._parse_var() or self._parse_primary() 8140 self._match_text_seq("MAX") 8141 max = self._parse_var() or self._parse_primary() 8142 else: 8143 max = self._parse_var() or self._parse_primary() 8144 min = exp.Literal.number(0) 8145 self._match_r_paren() 8146 return self.expression(exp.DictRange, this=this, min=min, max=max) 8147 8148 def _parse_comprehension( 8149 self, this: t.Optional[exp.Expression] 8150 ) -> t.Optional[exp.Comprehension]: 8151 index = self._index 8152 expression = self._parse_column() 8153 if not self._match(TokenType.IN): 8154 self._retreat(index - 1) 8155 return None 8156 iterator = self._parse_column() 8157 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8158 return self.expression( 8159 exp.Comprehension, 8160 this=this, 8161 expression=expression, 8162 iterator=iterator, 8163 condition=condition, 8164 ) 8165 8166 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8167 if self._match(TokenType.HEREDOC_STRING): 8168 return self.expression(exp.Heredoc, this=self._prev.text) 8169 8170 if not self._match_text_seq("$"): 8171 return None 8172 8173 tags = ["$"] 8174 tag_text = None 8175 8176 if self._is_connected(): 8177 self._advance() 8178 tags.append(self._prev.text.upper()) 8179 else: 8180 self.raise_error("No closing $ found") 8181 8182 if tags[-1] != "$": 8183 if self._is_connected() and self._match_text_seq("$"): 8184 tag_text = tags[-1] 8185 tags.append("$") 8186 else: 8187 self.raise_error("No closing $ found") 8188 8189 heredoc_start = self._curr 8190 8191 while self._curr: 8192 if self._match_text_seq(*tags, advance=False): 8193 this = self._find_sql(heredoc_start, self._prev) 8194 self._advance(len(tags)) 8195 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8196 8197 self._advance() 8198 8199 self.raise_error(f"No closing {''.join(tags)} found") 8200 return None 8201 8202 def _find_parser( 8203 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8204 ) -> t.Optional[t.Callable]: 8205 if not self._curr: 8206 return None 8207 8208 index = self._index 8209 this = [] 8210 while True: 8211 # The current token might be multiple words 8212 curr = self._curr.text.upper() 8213 key = curr.split(" ") 8214 this.append(curr) 8215 8216 self._advance() 8217 result, trie = in_trie(trie, key) 8218 if result == TrieResult.FAILED: 8219 break 8220 8221 if result == TrieResult.EXISTS: 8222 subparser = parsers[" ".join(this)] 8223 return subparser 8224 8225 self._retreat(index) 8226 return None 8227 8228 def _match(self, token_type, advance=True, expression=None): 8229 if not self._curr: 8230 return None 8231 8232 if self._curr.token_type == token_type: 8233 if advance: 8234 self._advance() 8235 self._add_comments(expression) 8236 return True 8237 8238 return None 8239 8240 def _match_set(self, types, advance=True): 8241 if not self._curr: 8242 return None 8243 8244 if self._curr.token_type in types: 8245 if advance: 8246 self._advance() 8247 return True 8248 8249 return None 8250 8251 def _match_pair(self, token_type_a, token_type_b, advance=True): 8252 if not self._curr or not self._next: 8253 return None 8254 8255 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8256 if advance: 8257 self._advance(2) 8258 return True 8259 8260 return None 8261 8262 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8263 if not self._match(TokenType.L_PAREN, expression=expression): 8264 self.raise_error("Expecting (") 8265 8266 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8267 if not self._match(TokenType.R_PAREN, expression=expression): 8268 self.raise_error("Expecting )") 8269 8270 def _match_texts(self, texts, advance=True): 8271 if ( 8272 self._curr 8273 and self._curr.token_type != TokenType.STRING 8274 and self._curr.text.upper() in texts 8275 ): 8276 if advance: 8277 self._advance() 8278 return True 8279 return None 8280 8281 def _match_text_seq(self, *texts, advance=True): 8282 index = self._index 8283 for text in texts: 8284 if ( 8285 self._curr 8286 and self._curr.token_type != TokenType.STRING 8287 and self._curr.text.upper() == text 8288 ): 8289 self._advance() 8290 else: 8291 self._retreat(index) 8292 return None 8293 8294 if not advance: 8295 self._retreat(index) 8296 8297 return True 8298 8299 def _replace_lambda( 8300 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8301 ) -> t.Optional[exp.Expression]: 8302 if not node: 8303 return node 8304 8305 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8306 8307 for column in node.find_all(exp.Column): 8308 typ = lambda_types.get(column.parts[0].name) 8309 if typ is not None: 8310 dot_or_id = column.to_dot() if column.table else column.this 8311 8312 if typ: 8313 dot_or_id = self.expression( 8314 exp.Cast, 8315 this=dot_or_id, 8316 to=typ, 8317 ) 8318 8319 parent = column.parent 8320 8321 while isinstance(parent, exp.Dot): 8322 if not isinstance(parent.parent, exp.Dot): 8323 parent.replace(dot_or_id) 8324 break 8325 parent = parent.parent 8326 else: 8327 if column is node: 8328 node = dot_or_id 8329 else: 8330 column.replace(dot_or_id) 8331 return node 8332 8333 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8334 start = self._prev 8335 8336 # Not to be confused with TRUNCATE(number, decimals) function call 8337 if self._match(TokenType.L_PAREN): 8338 self._retreat(self._index - 2) 8339 return self._parse_function() 8340 8341 # Clickhouse supports TRUNCATE DATABASE as well 8342 is_database = self._match(TokenType.DATABASE) 8343 8344 self._match(TokenType.TABLE) 8345 8346 exists = self._parse_exists(not_=False) 8347 8348 expressions = self._parse_csv( 8349 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8350 ) 8351 8352 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8353 8354 if self._match_text_seq("RESTART", "IDENTITY"): 8355 identity = "RESTART" 8356 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8357 identity = "CONTINUE" 8358 else: 8359 identity = None 8360 8361 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8362 option = self._prev.text 8363 else: 8364 option = None 8365 8366 partition = self._parse_partition() 8367 8368 # Fallback case 8369 if self._curr: 8370 return self._parse_as_command(start) 8371 8372 return self.expression( 8373 exp.TruncateTable, 8374 expressions=expressions, 8375 is_database=is_database, 8376 exists=exists, 8377 cluster=cluster, 8378 identity=identity, 8379 option=option, 8380 partition=partition, 8381 ) 8382 8383 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8384 this = self._parse_ordered(self._parse_opclass) 8385 8386 if not self._match(TokenType.WITH): 8387 return this 8388 8389 op = self._parse_var(any_token=True) 8390 8391 return self.expression(exp.WithOperator, this=this, op=op) 8392 8393 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8394 self._match(TokenType.EQ) 8395 self._match(TokenType.L_PAREN) 8396 8397 opts: t.List[t.Optional[exp.Expression]] = [] 8398 option: exp.Expression | None 8399 while self._curr and not self._match(TokenType.R_PAREN): 8400 if self._match_text_seq("FORMAT_NAME", "="): 8401 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8402 option = self._parse_format_name() 8403 else: 8404 option = self._parse_property() 8405 8406 if option is None: 8407 self.raise_error("Unable to parse option") 8408 break 8409 8410 opts.append(option) 8411 8412 return opts 8413 8414 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8415 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8416 8417 options = [] 8418 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8419 option = self._parse_var(any_token=True) 8420 prev = self._prev.text.upper() 8421 8422 # Different dialects might separate options and values by white space, "=" and "AS" 8423 self._match(TokenType.EQ) 8424 self._match(TokenType.ALIAS) 8425 8426 param = self.expression(exp.CopyParameter, this=option) 8427 8428 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8429 TokenType.L_PAREN, advance=False 8430 ): 8431 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8432 param.set("expressions", self._parse_wrapped_options()) 8433 elif prev == "FILE_FORMAT": 8434 # T-SQL's external file format case 8435 param.set("expression", self._parse_field()) 8436 else: 8437 param.set("expression", self._parse_unquoted_field()) 8438 8439 options.append(param) 8440 self._match(sep) 8441 8442 return options 8443 8444 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8445 expr = self.expression(exp.Credentials) 8446 8447 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8448 expr.set("storage", self._parse_field()) 8449 if self._match_text_seq("CREDENTIALS"): 8450 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8451 creds = ( 8452 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8453 ) 8454 expr.set("credentials", creds) 8455 if self._match_text_seq("ENCRYPTION"): 8456 expr.set("encryption", self._parse_wrapped_options()) 8457 if self._match_text_seq("IAM_ROLE"): 8458 expr.set("iam_role", self._parse_field()) 8459 if self._match_text_seq("REGION"): 8460 expr.set("region", self._parse_field()) 8461 8462 return expr 8463 8464 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8465 return self._parse_field() 8466 8467 def _parse_copy(self) -> exp.Copy | exp.Command: 8468 start = self._prev 8469 8470 self._match(TokenType.INTO) 8471 8472 this = ( 8473 self._parse_select(nested=True, parse_subquery_alias=False) 8474 if self._match(TokenType.L_PAREN, advance=False) 8475 else self._parse_table(schema=True) 8476 ) 8477 8478 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8479 8480 files = self._parse_csv(self._parse_file_location) 8481 if self._match(TokenType.EQ, advance=False): 8482 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8483 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8484 # list via `_parse_wrapped(..)` below. 8485 self._advance(-1) 8486 files = [] 8487 8488 credentials = self._parse_credentials() 8489 8490 self._match_text_seq("WITH") 8491 8492 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8493 8494 # Fallback case 8495 if self._curr: 8496 return self._parse_as_command(start) 8497 8498 return self.expression( 8499 exp.Copy, 8500 this=this, 8501 kind=kind, 8502 credentials=credentials, 8503 files=files, 8504 params=params, 8505 ) 8506 8507 def _parse_normalize(self) -> exp.Normalize: 8508 return self.expression( 8509 exp.Normalize, 8510 this=self._parse_bitwise(), 8511 form=self._match(TokenType.COMMA) and self._parse_var(), 8512 ) 8513 8514 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8515 args = self._parse_csv(lambda: self._parse_lambda()) 8516 8517 this = seq_get(args, 0) 8518 decimals = seq_get(args, 1) 8519 8520 return expr_type( 8521 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8522 ) 8523 8524 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8525 star_token = self._prev 8526 8527 if self._match_text_seq("COLUMNS", "(", advance=False): 8528 this = self._parse_function() 8529 if isinstance(this, exp.Columns): 8530 this.set("unpack", True) 8531 return this 8532 8533 return self.expression( 8534 exp.Star, 8535 **{ # type: ignore 8536 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8537 "replace": self._parse_star_op("REPLACE"), 8538 "rename": self._parse_star_op("RENAME"), 8539 }, 8540 ).update_positions(star_token) 8541 8542 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8543 privilege_parts = [] 8544 8545 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8546 # (end of privilege list) or L_PAREN (start of column list) are met 8547 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8548 privilege_parts.append(self._curr.text.upper()) 8549 self._advance() 8550 8551 this = exp.var(" ".join(privilege_parts)) 8552 expressions = ( 8553 self._parse_wrapped_csv(self._parse_column) 8554 if self._match(TokenType.L_PAREN, advance=False) 8555 else None 8556 ) 8557 8558 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8559 8560 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8561 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8562 principal = self._parse_id_var() 8563 8564 if not principal: 8565 return None 8566 8567 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8568 8569 def _parse_grant_revoke_common( 8570 self, 8571 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8572 privileges = self._parse_csv(self._parse_grant_privilege) 8573 8574 self._match(TokenType.ON) 8575 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8576 8577 # Attempt to parse the securable e.g. MySQL allows names 8578 # such as "foo.*", "*.*" which are not easily parseable yet 8579 securable = self._try_parse(self._parse_table_parts) 8580 8581 return privileges, kind, securable 8582 8583 def _parse_grant(self) -> exp.Grant | exp.Command: 8584 start = self._prev 8585 8586 privileges, kind, securable = self._parse_grant_revoke_common() 8587 8588 if not securable or not self._match_text_seq("TO"): 8589 return self._parse_as_command(start) 8590 8591 principals = self._parse_csv(self._parse_grant_principal) 8592 8593 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8594 8595 if self._curr: 8596 return self._parse_as_command(start) 8597 8598 return self.expression( 8599 exp.Grant, 8600 privileges=privileges, 8601 kind=kind, 8602 securable=securable, 8603 principals=principals, 8604 grant_option=grant_option, 8605 ) 8606 8607 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8608 start = self._prev 8609 8610 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8611 8612 privileges, kind, securable = self._parse_grant_revoke_common() 8613 8614 if not securable or not self._match_text_seq("FROM"): 8615 return self._parse_as_command(start) 8616 8617 principals = self._parse_csv(self._parse_grant_principal) 8618 8619 cascade = None 8620 if self._match_texts(("CASCADE", "RESTRICT")): 8621 cascade = self._prev.text.upper() 8622 8623 if self._curr: 8624 return self._parse_as_command(start) 8625 8626 return self.expression( 8627 exp.Revoke, 8628 privileges=privileges, 8629 kind=kind, 8630 securable=securable, 8631 principals=principals, 8632 grant_option=grant_option, 8633 cascade=cascade, 8634 ) 8635 8636 def _parse_overlay(self) -> exp.Overlay: 8637 return self.expression( 8638 exp.Overlay, 8639 **{ # type: ignore 8640 "this": self._parse_bitwise(), 8641 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8642 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8643 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8644 }, 8645 ) 8646 8647 def _parse_format_name(self) -> exp.Property: 8648 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8649 # for FILE_FORMAT = <format_name> 8650 return self.expression( 8651 exp.Property, 8652 this=exp.var("FORMAT_NAME"), 8653 value=self._parse_string() or self._parse_table_parts(), 8654 ) 8655 8656 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8657 args: t.List[exp.Expression] = [] 8658 8659 if self._match(TokenType.DISTINCT): 8660 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8661 self._match(TokenType.COMMA) 8662 8663 args.extend(self._parse_function_args()) 8664 8665 return self.expression( 8666 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8667 ) 8668 8669 def _identifier_expression( 8670 self, token: t.Optional[Token] = None, **kwargs: t.Any 8671 ) -> exp.Identifier: 8672 token = token or self._prev 8673 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8674 expression.update_positions(token) 8675 return expression 8676 8677 def _build_pipe_cte( 8678 self, 8679 query: exp.Query, 8680 expressions: t.List[exp.Expression], 8681 alias_cte: t.Optional[exp.TableAlias] = None, 8682 ) -> exp.Select: 8683 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8684 if alias_cte: 8685 new_cte = alias_cte 8686 else: 8687 self._pipe_cte_counter += 1 8688 new_cte = f"__tmp{self._pipe_cte_counter}" 8689 8690 with_ = query.args.get("with") 8691 ctes = with_.pop() if with_ else None 8692 8693 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8694 if ctes: 8695 new_select.set("with", ctes) 8696 8697 return new_select.with_(new_cte, as_=query, copy=False) 8698 8699 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8700 select = self._parse_select(consume_pipe=False) 8701 if not select: 8702 return query 8703 8704 return self._build_pipe_cte( 8705 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8706 ) 8707 8708 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8709 limit = self._parse_limit() 8710 offset = self._parse_offset() 8711 if limit: 8712 curr_limit = query.args.get("limit", limit) 8713 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8714 query.limit(limit, copy=False) 8715 if offset: 8716 curr_offset = query.args.get("offset") 8717 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8718 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8719 8720 return query 8721 8722 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8723 this = self._parse_assignment() 8724 if self._match_text_seq("GROUP", "AND", advance=False): 8725 return this 8726 8727 this = self._parse_alias(this) 8728 8729 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8730 return self._parse_ordered(lambda: this) 8731 8732 return this 8733 8734 def _parse_pipe_syntax_aggregate_group_order_by( 8735 self, query: exp.Select, group_by_exists: bool = True 8736 ) -> exp.Select: 8737 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8738 aggregates_or_groups, orders = [], [] 8739 for element in expr: 8740 if isinstance(element, exp.Ordered): 8741 this = element.this 8742 if isinstance(this, exp.Alias): 8743 element.set("this", this.args["alias"]) 8744 orders.append(element) 8745 else: 8746 this = element 8747 aggregates_or_groups.append(this) 8748 8749 if group_by_exists: 8750 query.select(*aggregates_or_groups, copy=False).group_by( 8751 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8752 copy=False, 8753 ) 8754 else: 8755 query.select(*aggregates_or_groups, append=False, copy=False) 8756 8757 if orders: 8758 return query.order_by(*orders, append=False, copy=False) 8759 8760 return query 8761 8762 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8763 self._match_text_seq("AGGREGATE") 8764 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8765 8766 if self._match(TokenType.GROUP_BY) or ( 8767 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8768 ): 8769 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8770 8771 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8772 8773 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8774 first_setop = self.parse_set_operation(this=query) 8775 if not first_setop: 8776 return None 8777 8778 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8779 expr = self._parse_paren() 8780 return expr.assert_is(exp.Subquery).unnest() if expr else None 8781 8782 first_setop.this.pop() 8783 8784 setops = [ 8785 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8786 *self._parse_csv(_parse_and_unwrap_query), 8787 ] 8788 8789 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8790 with_ = query.args.get("with") 8791 ctes = with_.pop() if with_ else None 8792 8793 if isinstance(first_setop, exp.Union): 8794 query = query.union(*setops, copy=False, **first_setop.args) 8795 elif isinstance(first_setop, exp.Except): 8796 query = query.except_(*setops, copy=False, **first_setop.args) 8797 else: 8798 query = query.intersect(*setops, copy=False, **first_setop.args) 8799 8800 query.set("with", ctes) 8801 8802 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8803 8804 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8805 join = self._parse_join() 8806 if not join: 8807 return None 8808 8809 if isinstance(query, exp.Select): 8810 return query.join(join, copy=False) 8811 8812 return query 8813 8814 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8815 pivots = self._parse_pivots() 8816 if not pivots: 8817 return query 8818 8819 from_ = query.args.get("from") 8820 if from_: 8821 from_.this.set("pivots", pivots) 8822 8823 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8824 8825 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8826 self._match_text_seq("EXTEND") 8827 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8828 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8829 8830 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8831 sample = self._parse_table_sample() 8832 8833 with_ = query.args.get("with") 8834 if with_: 8835 with_.expressions[-1].this.set("sample", sample) 8836 else: 8837 query.set("sample", sample) 8838 8839 return query 8840 8841 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8842 if isinstance(query, exp.Subquery): 8843 query = exp.select("*").from_(query, copy=False) 8844 8845 if not query.args.get("from"): 8846 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8847 8848 while self._match(TokenType.PIPE_GT): 8849 start = self._curr 8850 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8851 if not parser: 8852 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8853 # keywords, making it tricky to disambiguate them without lookahead. The approach 8854 # here is to try and parse a set operation and if that fails, then try to parse a 8855 # join operator. If that fails as well, then the operator is not supported. 8856 parsed_query = self._parse_pipe_syntax_set_operator(query) 8857 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8858 if not parsed_query: 8859 self._retreat(start) 8860 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8861 break 8862 query = parsed_query 8863 else: 8864 query = parser(self, query) 8865 8866 return query 8867 8868 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8869 vars = self._parse_csv(self._parse_id_var) 8870 if not vars: 8871 return None 8872 8873 return self.expression( 8874 exp.DeclareItem, 8875 this=vars, 8876 kind=self._parse_types(), 8877 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8878 ) 8879 8880 def _parse_declare(self) -> exp.Declare | exp.Command: 8881 start = self._prev 8882 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8883 8884 if not expressions or self._curr: 8885 return self._parse_as_command(start) 8886 8887 return self.expression(exp.Declare, expressions=expressions) 8888 8889 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8890 exp_class = exp.Cast if strict else exp.TryCast 8891 8892 if exp_class == exp.TryCast: 8893 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8894 8895 return self.expression(exp_class, **kwargs) 8896 8897 def _parse_json_value(self) -> exp.JSONValue: 8898 this = self._parse_bitwise() 8899 self._match(TokenType.COMMA) 8900 path = self._parse_bitwise() 8901 8902 returning = self._match(TokenType.RETURNING) and self._parse_type() 8903 8904 return self.expression( 8905 exp.JSONValue, 8906 this=this, 8907 path=self.dialect.to_json_path(path), 8908 returning=returning, 8909 on_condition=self._parse_on_condition(), 8910 ) 8911 8912 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8913 def concat_exprs( 8914 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8915 ) -> exp.Expression: 8916 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8917 concat_exprs = [ 8918 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8919 ] 8920 node.set("expressions", concat_exprs) 8921 return node 8922 if len(exprs) == 1: 8923 return exprs[0] 8924 return self.expression(exp.Concat, expressions=args, safe=True) 8925 8926 args = self._parse_csv(self._parse_lambda) 8927 8928 if args: 8929 order = args[-1] if isinstance(args[-1], exp.Order) else None 8930 8931 if order: 8932 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8933 # remove 'expr' from exp.Order and add it back to args 8934 args[-1] = order.this 8935 order.set("this", concat_exprs(order.this, args)) 8936 8937 this = order or concat_exprs(args[0], args) 8938 else: 8939 this = None 8940 8941 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8942 8943 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 814 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 815 exp.Group: lambda self: self._parse_group(), 816 exp.Having: lambda self: self._parse_having(), 817 exp.Hint: lambda self: self._parse_hint_body(), 818 exp.Identifier: lambda self: self._parse_id_var(), 819 exp.Join: lambda self: self._parse_join(), 820 exp.Lambda: lambda self: self._parse_lambda(), 821 exp.Lateral: lambda self: self._parse_lateral(), 822 exp.Limit: lambda self: self._parse_limit(), 823 exp.Offset: lambda self: self._parse_offset(), 824 exp.Order: lambda self: self._parse_order(), 825 exp.Ordered: lambda self: self._parse_ordered(), 826 exp.Properties: lambda self: self._parse_properties(), 827 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 828 exp.Qualify: lambda self: self._parse_qualify(), 829 exp.Returning: lambda self: self._parse_returning(), 830 exp.Select: lambda self: self._parse_select(), 831 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 832 exp.Table: lambda self: self._parse_table_parts(), 833 exp.TableAlias: lambda self: self._parse_table_alias(), 834 exp.Tuple: lambda self: self._parse_value(values=False), 835 exp.Whens: lambda self: self._parse_when_matched(), 836 exp.Where: lambda self: self._parse_where(), 837 exp.Window: lambda self: self._parse_named_window(), 838 exp.With: lambda self: self._parse_with(), 839 "JOIN_TYPE": lambda self: self._parse_join_parts(), 840 } 841 842 STATEMENT_PARSERS = { 843 TokenType.ALTER: lambda self: self._parse_alter(), 844 TokenType.ANALYZE: lambda self: self._parse_analyze(), 845 TokenType.BEGIN: lambda self: self._parse_transaction(), 846 TokenType.CACHE: lambda self: self._parse_cache(), 847 TokenType.COMMENT: lambda self: self._parse_comment(), 848 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 849 TokenType.COPY: lambda self: self._parse_copy(), 850 TokenType.CREATE: lambda self: self._parse_create(), 851 TokenType.DELETE: lambda self: self._parse_delete(), 852 TokenType.DESC: lambda self: self._parse_describe(), 853 TokenType.DESCRIBE: lambda self: self._parse_describe(), 854 TokenType.DROP: lambda self: self._parse_drop(), 855 TokenType.GRANT: lambda self: self._parse_grant(), 856 TokenType.REVOKE: lambda self: self._parse_revoke(), 857 TokenType.INSERT: lambda self: self._parse_insert(), 858 TokenType.KILL: lambda self: self._parse_kill(), 859 TokenType.LOAD: lambda self: self._parse_load(), 860 TokenType.MERGE: lambda self: self._parse_merge(), 861 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 862 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 863 TokenType.REFRESH: lambda self: self._parse_refresh(), 864 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 865 TokenType.SET: lambda self: self._parse_set(), 866 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 867 TokenType.UNCACHE: lambda self: self._parse_uncache(), 868 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 869 TokenType.UPDATE: lambda self: self._parse_update(), 870 TokenType.USE: lambda self: self._parse_use(), 871 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 872 } 873 874 UNARY_PARSERS = { 875 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 876 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 877 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 878 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 879 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 880 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 881 } 882 883 STRING_PARSERS = { 884 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 885 exp.RawString, this=token.text 886 ), 887 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 888 exp.National, this=token.text 889 ), 890 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 891 TokenType.STRING: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=True 893 ), 894 TokenType.UNICODE_STRING: lambda self, token: self.expression( 895 exp.UnicodeString, 896 this=token.text, 897 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 898 ), 899 } 900 901 NUMERIC_PARSERS = { 902 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 903 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 904 TokenType.HEX_STRING: lambda self, token: self.expression( 905 exp.HexString, 906 this=token.text, 907 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 908 ), 909 TokenType.NUMBER: lambda self, token: self.expression( 910 exp.Literal, this=token.text, is_string=False 911 ), 912 } 913 914 PRIMARY_PARSERS = { 915 **STRING_PARSERS, 916 **NUMERIC_PARSERS, 917 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 918 TokenType.NULL: lambda self, _: self.expression(exp.Null), 919 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 920 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 921 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 922 TokenType.STAR: lambda self, _: self._parse_star_ops(), 923 } 924 925 PLACEHOLDER_PARSERS = { 926 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 927 TokenType.PARAMETER: lambda self: self._parse_parameter(), 928 TokenType.COLON: lambda self: ( 929 self.expression(exp.Placeholder, this=self._prev.text) 930 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 931 else None 932 ), 933 } 934 935 RANGE_PARSERS = { 936 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 937 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 938 TokenType.GLOB: binary_range_parser(exp.Glob), 939 TokenType.ILIKE: binary_range_parser(exp.ILike), 940 TokenType.IN: lambda self, this: self._parse_in(this), 941 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 942 TokenType.IS: lambda self, this: self._parse_is(this), 943 TokenType.LIKE: binary_range_parser(exp.Like), 944 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 945 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 946 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 947 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 948 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 949 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 950 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 951 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 952 } 953 954 PIPE_SYNTAX_TRANSFORM_PARSERS = { 955 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 956 "AS": lambda self, query: self._build_pipe_cte( 957 query, [exp.Star()], self._parse_table_alias() 958 ), 959 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 960 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 961 "ORDER BY": lambda self, query: query.order_by( 962 self._parse_order(), append=False, copy=False 963 ), 964 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 966 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 967 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 968 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 969 } 970 971 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 972 "ALLOWED_VALUES": lambda self: self.expression( 973 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 974 ), 975 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 976 "AUTO": lambda self: self._parse_auto_property(), 977 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 978 "BACKUP": lambda self: self.expression( 979 exp.BackupProperty, this=self._parse_var(any_token=True) 980 ), 981 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 982 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 984 "CHECKSUM": lambda self: self._parse_checksum(), 985 "CLUSTER BY": lambda self: self._parse_cluster(), 986 "CLUSTERED": lambda self: self._parse_clustered_by(), 987 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 988 exp.CollateProperty, **kwargs 989 ), 990 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 991 "CONTAINS": lambda self: self._parse_contains_property(), 992 "COPY": lambda self: self._parse_copy_property(), 993 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 994 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 995 "DEFINER": lambda self: self._parse_definer(), 996 "DETERMINISTIC": lambda self: self.expression( 997 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 998 ), 999 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1000 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1001 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1002 "DISTKEY": lambda self: self._parse_distkey(), 1003 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1004 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1005 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1006 "ENVIRONMENT": lambda self: self.expression( 1007 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1008 ), 1009 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1010 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1011 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1012 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1013 "FREESPACE": lambda self: self._parse_freespace(), 1014 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1015 "HEAP": lambda self: self.expression(exp.HeapProperty), 1016 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1017 "IMMUTABLE": lambda self: self.expression( 1018 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1019 ), 1020 "INHERITS": lambda self: self.expression( 1021 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1022 ), 1023 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1024 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1025 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1026 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1027 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1030 "LOCK": lambda self: self._parse_locking(), 1031 "LOCKING": lambda self: self._parse_locking(), 1032 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1033 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1034 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1035 "MODIFIES": lambda self: self._parse_modifies_property(), 1036 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1037 "NO": lambda self: self._parse_no_property(), 1038 "ON": lambda self: self._parse_on_property(), 1039 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1040 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1041 "PARTITION": lambda self: self._parse_partitioned_of(), 1042 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1044 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1045 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1046 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1047 "READS": lambda self: self._parse_reads_property(), 1048 "REMOTE": lambda self: self._parse_remote_with_connection(), 1049 "RETURNS": lambda self: self._parse_returns(), 1050 "STRICT": lambda self: self.expression(exp.StrictProperty), 1051 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1052 "ROW": lambda self: self._parse_row(), 1053 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1054 "SAMPLE": lambda self: self.expression( 1055 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1056 ), 1057 "SECURE": lambda self: self.expression(exp.SecureProperty), 1058 "SECURITY": lambda self: self._parse_security(), 1059 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1060 "SETTINGS": lambda self: self._parse_settings_property(), 1061 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1062 "SORTKEY": lambda self: self._parse_sortkey(), 1063 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1064 "STABLE": lambda self: self.expression( 1065 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1066 ), 1067 "STORED": lambda self: self._parse_stored(), 1068 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1069 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1070 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1071 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1072 "TO": lambda self: self._parse_to_table(), 1073 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1074 "TRANSFORM": lambda self: self.expression( 1075 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1076 ), 1077 "TTL": lambda self: self._parse_ttl(), 1078 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1079 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1080 "VOLATILE": lambda self: self._parse_volatile_property(), 1081 "WITH": lambda self: self._parse_with_property(), 1082 } 1083 1084 CONSTRAINT_PARSERS = { 1085 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1086 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1087 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1088 "CHARACTER SET": lambda self: self.expression( 1089 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1090 ), 1091 "CHECK": lambda self: self.expression( 1092 exp.CheckColumnConstraint, 1093 this=self._parse_wrapped(self._parse_assignment), 1094 enforced=self._match_text_seq("ENFORCED"), 1095 ), 1096 "COLLATE": lambda self: self.expression( 1097 exp.CollateColumnConstraint, 1098 this=self._parse_identifier() or self._parse_column(), 1099 ), 1100 "COMMENT": lambda self: self.expression( 1101 exp.CommentColumnConstraint, this=self._parse_string() 1102 ), 1103 "COMPRESS": lambda self: self._parse_compress(), 1104 "CLUSTERED": lambda self: self.expression( 1105 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "NONCLUSTERED": lambda self: self.expression( 1108 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1109 ), 1110 "DEFAULT": lambda self: self.expression( 1111 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1114 "EPHEMERAL": lambda self: self.expression( 1115 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1116 ), 1117 "EXCLUDE": lambda self: self.expression( 1118 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1119 ), 1120 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1121 "FORMAT": lambda self: self.expression( 1122 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1123 ), 1124 "GENERATED": lambda self: self._parse_generated_as_identity(), 1125 "IDENTITY": lambda self: self._parse_auto_increment(), 1126 "INLINE": lambda self: self._parse_inline(), 1127 "LIKE": lambda self: self._parse_create_like(), 1128 "NOT": lambda self: self._parse_not_constraint(), 1129 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1130 "ON": lambda self: ( 1131 self._match(TokenType.UPDATE) 1132 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1133 ) 1134 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1135 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1136 "PERIOD": lambda self: self._parse_period_for_system_time(), 1137 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1138 "REFERENCES": lambda self: self._parse_references(match=False), 1139 "TITLE": lambda self: self.expression( 1140 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1141 ), 1142 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1143 "UNIQUE": lambda self: self._parse_unique(), 1144 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1145 "WITH": lambda self: self.expression( 1146 exp.Properties, expressions=self._parse_wrapped_properties() 1147 ), 1148 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1150 } 1151 1152 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1153 if not self._match(TokenType.L_PAREN, advance=False): 1154 # Partitioning by bucket or truncate follows the syntax: 1155 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1156 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1157 self._retreat(self._index - 1) 1158 return None 1159 1160 klass = ( 1161 exp.PartitionedByBucket 1162 if self._prev.text.upper() == "BUCKET" 1163 else exp.PartitionByTruncate 1164 ) 1165 1166 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1167 this, expression = seq_get(args, 0), seq_get(args, 1) 1168 1169 if isinstance(this, exp.Literal): 1170 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1171 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1172 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1173 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1174 # 1175 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1176 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1177 this, expression = expression, this 1178 1179 return self.expression(klass, this=this, expression=expression) 1180 1181 ALTER_PARSERS = { 1182 "ADD": lambda self: self._parse_alter_table_add(), 1183 "AS": lambda self: self._parse_select(), 1184 "ALTER": lambda self: self._parse_alter_table_alter(), 1185 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1186 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1187 "DROP": lambda self: self._parse_alter_table_drop(), 1188 "RENAME": lambda self: self._parse_alter_table_rename(), 1189 "SET": lambda self: self._parse_alter_table_set(), 1190 "SWAP": lambda self: self.expression( 1191 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1192 ), 1193 } 1194 1195 ALTER_ALTER_PARSERS = { 1196 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1197 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1198 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1199 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1200 } 1201 1202 SCHEMA_UNNAMED_CONSTRAINTS = { 1203 "CHECK", 1204 "EXCLUDE", 1205 "FOREIGN KEY", 1206 "LIKE", 1207 "PERIOD", 1208 "PRIMARY KEY", 1209 "UNIQUE", 1210 "BUCKET", 1211 "TRUNCATE", 1212 } 1213 1214 NO_PAREN_FUNCTION_PARSERS = { 1215 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1216 "CASE": lambda self: self._parse_case(), 1217 "CONNECT_BY_ROOT": lambda self: self.expression( 1218 exp.ConnectByRoot, this=self._parse_column() 1219 ), 1220 "IF": lambda self: self._parse_if(), 1221 } 1222 1223 INVALID_FUNC_NAME_TOKENS = { 1224 TokenType.IDENTIFIER, 1225 TokenType.STRING, 1226 } 1227 1228 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1229 1230 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1231 1232 FUNCTION_PARSERS = { 1233 **{ 1234 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1235 }, 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1238 }, 1239 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1240 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1241 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1242 "DECODE": lambda self: self._parse_decode(), 1243 "EXTRACT": lambda self: self._parse_extract(), 1244 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1245 "GAP_FILL": lambda self: self._parse_gap_fill(), 1246 "JSON_OBJECT": lambda self: self._parse_json_object(), 1247 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1248 "JSON_TABLE": lambda self: self._parse_json_table(), 1249 "MATCH": lambda self: self._parse_match_against(), 1250 "NORMALIZE": lambda self: self._parse_normalize(), 1251 "OPENJSON": lambda self: self._parse_open_json(), 1252 "OVERLAY": lambda self: self._parse_overlay(), 1253 "POSITION": lambda self: self._parse_position(), 1254 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1255 "STRING_AGG": lambda self: self._parse_string_agg(), 1256 "SUBSTRING": lambda self: self._parse_substring(), 1257 "TRIM": lambda self: self._parse_trim(), 1258 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1260 "XMLELEMENT": lambda self: self.expression( 1261 exp.XMLElement, 1262 this=self._match_text_seq("NAME") and self._parse_id_var(), 1263 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1264 ), 1265 "XMLTABLE": lambda self: self._parse_xml_table(), 1266 } 1267 1268 QUERY_MODIFIER_PARSERS = { 1269 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1270 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1271 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1272 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1273 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1274 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1275 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1276 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1277 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1278 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1279 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1280 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1281 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1282 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1284 TokenType.CLUSTER_BY: lambda self: ( 1285 "cluster", 1286 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1287 ), 1288 TokenType.DISTRIBUTE_BY: lambda self: ( 1289 "distribute", 1290 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1291 ), 1292 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1293 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1294 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1295 } 1296 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1297 1298 SET_PARSERS = { 1299 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1300 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1301 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1302 "TRANSACTION": lambda self: self._parse_set_transaction(), 1303 } 1304 1305 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1306 1307 TYPE_LITERAL_PARSERS = { 1308 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1309 } 1310 1311 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1312 1313 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1314 1315 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1316 1317 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1318 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1319 "ISOLATION": ( 1320 ("LEVEL", "REPEATABLE", "READ"), 1321 ("LEVEL", "READ", "COMMITTED"), 1322 ("LEVEL", "READ", "UNCOMITTED"), 1323 ("LEVEL", "SERIALIZABLE"), 1324 ), 1325 "READ": ("WRITE", "ONLY"), 1326 } 1327 1328 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1329 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1330 ) 1331 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1332 1333 CREATE_SEQUENCE: OPTIONS_TYPE = { 1334 "SCALE": ("EXTEND", "NOEXTEND"), 1335 "SHARD": ("EXTEND", "NOEXTEND"), 1336 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1337 **dict.fromkeys( 1338 ( 1339 "SESSION", 1340 "GLOBAL", 1341 "KEEP", 1342 "NOKEEP", 1343 "ORDER", 1344 "NOORDER", 1345 "NOCACHE", 1346 "CYCLE", 1347 "NOCYCLE", 1348 "NOMINVALUE", 1349 "NOMAXVALUE", 1350 "NOSCALE", 1351 "NOSHARD", 1352 ), 1353 tuple(), 1354 ), 1355 } 1356 1357 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1358 1359 USABLES: OPTIONS_TYPE = dict.fromkeys( 1360 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1361 ) 1362 1363 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1364 1365 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1366 "TYPE": ("EVOLUTION",), 1367 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1368 } 1369 1370 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1371 1372 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1373 1374 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1375 "NOT": ("ENFORCED",), 1376 "MATCH": ( 1377 "FULL", 1378 "PARTIAL", 1379 "SIMPLE", 1380 ), 1381 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1382 "USING": ( 1383 "BTREE", 1384 "HASH", 1385 ), 1386 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1387 } 1388 1389 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1390 "NO": ("OTHERS",), 1391 "CURRENT": ("ROW",), 1392 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1393 } 1394 1395 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1396 1397 CLONE_KEYWORDS = {"CLONE", "COPY"} 1398 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1399 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1400 1401 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1402 1403 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1404 1405 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1406 1407 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1408 1409 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1410 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1411 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1412 1413 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1414 1415 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1416 1417 ADD_CONSTRAINT_TOKENS = { 1418 TokenType.CONSTRAINT, 1419 TokenType.FOREIGN_KEY, 1420 TokenType.INDEX, 1421 TokenType.KEY, 1422 TokenType.PRIMARY_KEY, 1423 TokenType.UNIQUE, 1424 } 1425 1426 DISTINCT_TOKENS = {TokenType.DISTINCT} 1427 1428 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1429 1430 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1431 1432 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1433 1434 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1435 1436 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1437 1438 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1439 1440 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1441 1442 # The style options for the DESCRIBE statement 1443 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1444 1445 # The style options for the ANALYZE statement 1446 ANALYZE_STYLES = { 1447 "BUFFER_USAGE_LIMIT", 1448 "FULL", 1449 "LOCAL", 1450 "NO_WRITE_TO_BINLOG", 1451 "SAMPLE", 1452 "SKIP_LOCKED", 1453 "VERBOSE", 1454 } 1455 1456 ANALYZE_EXPRESSION_PARSERS = { 1457 "ALL": lambda self: self._parse_analyze_columns(), 1458 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1459 "DELETE": lambda self: self._parse_analyze_delete(), 1460 "DROP": lambda self: self._parse_analyze_histogram(), 1461 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1462 "LIST": lambda self: self._parse_analyze_list(), 1463 "PREDICATE": lambda self: self._parse_analyze_columns(), 1464 "UPDATE": lambda self: self._parse_analyze_histogram(), 1465 "VALIDATE": lambda self: self._parse_analyze_validate(), 1466 } 1467 1468 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1469 1470 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1471 1472 OPERATION_MODIFIERS: t.Set[str] = set() 1473 1474 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1475 1476 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1477 1478 STRICT_CAST = True 1479 1480 PREFIXED_PIVOT_COLUMNS = False 1481 IDENTIFY_PIVOT_STRINGS = False 1482 1483 LOG_DEFAULTS_TO_LN = False 1484 1485 # Whether the table sample clause expects CSV syntax 1486 TABLESAMPLE_CSV = False 1487 1488 # The default method used for table sampling 1489 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1490 1491 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1492 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1493 1494 # Whether the TRIM function expects the characters to trim as its first argument 1495 TRIM_PATTERN_FIRST = False 1496 1497 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1498 STRING_ALIASES = False 1499 1500 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1501 MODIFIERS_ATTACHED_TO_SET_OP = True 1502 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1503 1504 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1505 NO_PAREN_IF_COMMANDS = True 1506 1507 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1508 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1509 1510 # Whether the `:` operator is used to extract a value from a VARIANT column 1511 COLON_IS_VARIANT_EXTRACT = False 1512 1513 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1514 # If this is True and '(' is not found, the keyword will be treated as an identifier 1515 VALUES_FOLLOWED_BY_PAREN = True 1516 1517 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1518 SUPPORTS_IMPLICIT_UNNEST = False 1519 1520 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1521 INTERVAL_SPANS = True 1522 1523 # Whether a PARTITION clause can follow a table reference 1524 SUPPORTS_PARTITION_SELECTION = False 1525 1526 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1527 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1528 1529 # Whether the 'AS' keyword is optional in the CTE definition syntax 1530 OPTIONAL_ALIAS_TOKEN_CTE = True 1531 1532 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1533 ALTER_RENAME_REQUIRES_COLUMN = True 1534 1535 # Whether Alter statements are allowed to contain Partition specifications 1536 ALTER_TABLE_PARTITIONS = False 1537 1538 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1539 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1540 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1541 # as BigQuery, where all joins have the same precedence. 1542 JOINS_HAVE_EQUAL_PRECEDENCE = False 1543 1544 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1545 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1546 1547 # Whether map literals support arbitrary expressions as keys. 1548 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1549 # When False, keys are typically restricted to identifiers. 1550 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1551 1552 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1553 # is true for Snowflake but not for BigQuery which can also process strings 1554 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1555 1556 # Dialects like Databricks support JOINS without join criteria 1557 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1558 ADD_JOIN_ON_TRUE = False 1559 1560 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1561 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1562 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1563 1564 __slots__ = ( 1565 "error_level", 1566 "error_message_context", 1567 "max_errors", 1568 "dialect", 1569 "sql", 1570 "errors", 1571 "_tokens", 1572 "_index", 1573 "_curr", 1574 "_next", 1575 "_prev", 1576 "_prev_comments", 1577 "_pipe_cte_counter", 1578 ) 1579 1580 # Autofilled 1581 SHOW_TRIE: t.Dict = {} 1582 SET_TRIE: t.Dict = {} 1583 1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset() 1598 1599 def reset(self): 1600 self.sql = "" 1601 self.errors = [] 1602 self._tokens = [] 1603 self._index = 0 1604 self._curr = None 1605 self._next = None 1606 self._prev = None 1607 self._prev_comments = None 1608 self._pipe_cte_counter = 0 1609 1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 ) 1627 1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1] 1663 1664 def _parse( 1665 self, 1666 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1667 raw_tokens: t.List[Token], 1668 sql: t.Optional[str] = None, 1669 ) -> t.List[t.Optional[exp.Expression]]: 1670 self.reset() 1671 self.sql = sql or "" 1672 1673 total = len(raw_tokens) 1674 chunks: t.List[t.List[Token]] = [[]] 1675 1676 for i, token in enumerate(raw_tokens): 1677 if token.token_type == TokenType.SEMICOLON: 1678 if token.comments: 1679 chunks.append([token]) 1680 1681 if i < total - 1: 1682 chunks.append([]) 1683 else: 1684 chunks[-1].append(token) 1685 1686 expressions = [] 1687 1688 for tokens in chunks: 1689 self._index = -1 1690 self._tokens = tokens 1691 self._advance() 1692 1693 expressions.append(parse_method(self)) 1694 1695 if self._index < len(self._tokens): 1696 self.raise_error("Invalid expression / Unexpected token") 1697 1698 self.check_errors() 1699 1700 return expressions 1701 1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 ) 1712 1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error) 1740 1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance) 1758 1759 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1760 if expression and self._prev_comments: 1761 expression.add_comments(self._prev_comments) 1762 self._prev_comments = None 1763 1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression 1780 1781 def _find_sql(self, start: Token, end: Token) -> str: 1782 return self.sql[start.start : end.end + 1] 1783 1784 def _is_connected(self) -> bool: 1785 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1786 1787 def _advance(self, times: int = 1) -> None: 1788 self._index += times 1789 self._curr = seq_get(self._tokens, self._index) 1790 self._next = seq_get(self._tokens, self._index + 1) 1791 1792 if self._index > 0: 1793 self._prev = self._tokens[self._index - 1] 1794 self._prev_comments = self._prev.comments 1795 else: 1796 self._prev = None 1797 self._prev_comments = None 1798 1799 def _retreat(self, index: int) -> None: 1800 if index != self._index: 1801 self._advance(index - self._index) 1802 1803 def _warn_unsupported(self) -> None: 1804 if len(self._tokens) <= 1: 1805 return 1806 1807 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1808 # interested in emitting a warning for the one being currently processed. 1809 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1810 1811 logger.warning( 1812 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1813 ) 1814 1815 def _parse_command(self) -> exp.Command: 1816 self._warn_unsupported() 1817 return self.expression( 1818 exp.Command, 1819 comments=self._prev_comments, 1820 this=self._prev.text.upper(), 1821 expression=self._parse_string(), 1822 ) 1823 1824 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1825 """ 1826 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1827 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1828 solve this by setting & resetting the parser state accordingly 1829 """ 1830 index = self._index 1831 error_level = self.error_level 1832 1833 self.error_level = ErrorLevel.IMMEDIATE 1834 try: 1835 this = parse_method() 1836 except ParseError: 1837 this = None 1838 finally: 1839 if not this or retreat: 1840 self._retreat(index) 1841 self.error_level = error_level 1842 1843 return this 1844 1845 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1846 start = self._prev 1847 exists = self._parse_exists() if allow_exists else None 1848 1849 self._match(TokenType.ON) 1850 1851 materialized = self._match_text_seq("MATERIALIZED") 1852 kind = self._match_set(self.CREATABLES) and self._prev 1853 if not kind: 1854 return self._parse_as_command(start) 1855 1856 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1857 this = self._parse_user_defined_function(kind=kind.token_type) 1858 elif kind.token_type == TokenType.TABLE: 1859 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1860 elif kind.token_type == TokenType.COLUMN: 1861 this = self._parse_column() 1862 else: 1863 this = self._parse_id_var() 1864 1865 self._match(TokenType.IS) 1866 1867 return self.expression( 1868 exp.Comment, 1869 this=this, 1870 kind=kind.text, 1871 expression=self._parse_string(), 1872 exists=exists, 1873 materialized=materialized, 1874 ) 1875 1876 def _parse_to_table( 1877 self, 1878 ) -> exp.ToTableProperty: 1879 table = self._parse_table_parts(schema=True) 1880 return self.expression(exp.ToTableProperty, this=table) 1881 1882 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1883 def _parse_ttl(self) -> exp.Expression: 1884 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1885 this = self._parse_bitwise() 1886 1887 if self._match_text_seq("DELETE"): 1888 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1889 if self._match_text_seq("RECOMPRESS"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1892 ) 1893 if self._match_text_seq("TO", "DISK"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1896 ) 1897 if self._match_text_seq("TO", "VOLUME"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1900 ) 1901 1902 return this 1903 1904 expressions = self._parse_csv(_parse_ttl_action) 1905 where = self._parse_where() 1906 group = self._parse_group() 1907 1908 aggregates = None 1909 if group and self._match(TokenType.SET): 1910 aggregates = self._parse_csv(self._parse_set_item) 1911 1912 return self.expression( 1913 exp.MergeTreeTTL, 1914 expressions=expressions, 1915 where=where, 1916 group=group, 1917 aggregates=aggregates, 1918 ) 1919 1920 def _parse_statement(self) -> t.Optional[exp.Expression]: 1921 if self._curr is None: 1922 return None 1923 1924 if self._match_set(self.STATEMENT_PARSERS): 1925 comments = self._prev_comments 1926 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1927 stmt.add_comments(comments, prepend=True) 1928 return stmt 1929 1930 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1931 return self._parse_command() 1932 1933 expression = self._parse_expression() 1934 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1935 return self._parse_query_modifiers(expression) 1936 1937 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1938 start = self._prev 1939 temporary = self._match(TokenType.TEMPORARY) 1940 materialized = self._match_text_seq("MATERIALIZED") 1941 1942 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1943 if not kind: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 if_exists = exists or self._parse_exists() 1948 1949 if kind == "COLUMN": 1950 this = self._parse_column() 1951 else: 1952 this = self._parse_table_parts( 1953 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1954 ) 1955 1956 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1957 1958 if self._match(TokenType.L_PAREN, advance=False): 1959 expressions = self._parse_wrapped_csv(self._parse_types) 1960 else: 1961 expressions = None 1962 1963 return self.expression( 1964 exp.Drop, 1965 exists=if_exists, 1966 this=this, 1967 expressions=expressions, 1968 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1969 temporary=temporary, 1970 materialized=materialized, 1971 cascade=self._match_text_seq("CASCADE"), 1972 constraints=self._match_text_seq("CONSTRAINTS"), 1973 purge=self._match_text_seq("PURGE"), 1974 cluster=cluster, 1975 concurrently=concurrently, 1976 ) 1977 1978 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1979 return ( 1980 self._match_text_seq("IF") 1981 and (not not_ or self._match(TokenType.NOT)) 1982 and self._match(TokenType.EXISTS) 1983 ) 1984 1985 def _parse_create(self) -> exp.Create | exp.Command: 1986 # Note: this can't be None because we've matched a statement parser 1987 start = self._prev 1988 1989 replace = ( 1990 start.token_type == TokenType.REPLACE 1991 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1992 or self._match_pair(TokenType.OR, TokenType.ALTER) 1993 ) 1994 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1995 1996 unique = self._match(TokenType.UNIQUE) 1997 1998 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1999 clustered = True 2000 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2001 "COLUMNSTORE" 2002 ): 2003 clustered = False 2004 else: 2005 clustered = None 2006 2007 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2008 self._advance() 2009 2010 properties = None 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not create_token: 2014 # exp.Properties.Location.POST_CREATE 2015 properties = self._parse_properties() 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not properties or not create_token: 2019 return self._parse_as_command(start) 2020 2021 concurrently = self._match_text_seq("CONCURRENTLY") 2022 exists = self._parse_exists(not_=True) 2023 this = None 2024 expression: t.Optional[exp.Expression] = None 2025 indexes = None 2026 no_schema_binding = None 2027 begin = None 2028 end = None 2029 clone = None 2030 2031 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2032 nonlocal properties 2033 if properties and temp_props: 2034 properties.expressions.extend(temp_props.expressions) 2035 elif temp_props: 2036 properties = temp_props 2037 2038 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2039 this = self._parse_user_defined_function(kind=create_token.token_type) 2040 2041 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2042 extend_props(self._parse_properties()) 2043 2044 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2045 extend_props(self._parse_properties()) 2046 2047 if not expression: 2048 if self._match(TokenType.COMMAND): 2049 expression = self._parse_as_command(self._prev) 2050 else: 2051 begin = self._match(TokenType.BEGIN) 2052 return_ = self._match_text_seq("RETURN") 2053 2054 if self._match(TokenType.STRING, advance=False): 2055 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2056 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2057 expression = self._parse_string() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_user_defined_function_expression() 2061 2062 end = self._match_text_seq("END") 2063 2064 if return_: 2065 expression = self.expression(exp.Return, this=expression) 2066 elif create_token.token_type == TokenType.INDEX: 2067 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2068 if not self._match(TokenType.ON): 2069 index = self._parse_id_var() 2070 anonymous = False 2071 else: 2072 index = None 2073 anonymous = True 2074 2075 this = self._parse_index(index=index, anonymous=anonymous) 2076 elif create_token.token_type in self.DB_CREATABLES: 2077 table_parts = self._parse_table_parts( 2078 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2079 ) 2080 2081 # exp.Properties.Location.POST_NAME 2082 self._match(TokenType.COMMA) 2083 extend_props(self._parse_properties(before=True)) 2084 2085 this = self._parse_schema(this=table_parts) 2086 2087 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2088 extend_props(self._parse_properties()) 2089 2090 has_alias = self._match(TokenType.ALIAS) 2091 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2092 # exp.Properties.Location.POST_ALIAS 2093 extend_props(self._parse_properties()) 2094 2095 if create_token.token_type == TokenType.SEQUENCE: 2096 expression = self._parse_types() 2097 props = self._parse_properties() 2098 if props: 2099 sequence_props = exp.SequenceProperties() 2100 options = [] 2101 for prop in props: 2102 if isinstance(prop, exp.SequenceProperties): 2103 for arg, value in prop.args.items(): 2104 if arg == "options": 2105 options.extend(value) 2106 else: 2107 sequence_props.set(arg, value) 2108 prop.pop() 2109 2110 if options: 2111 sequence_props.set("options", options) 2112 2113 props.append("expressions", sequence_props) 2114 extend_props(props) 2115 else: 2116 expression = self._parse_ddl_select() 2117 2118 # Some dialects also support using a table as an alias instead of a SELECT. 2119 # Here we fallback to this as an alternative. 2120 if not expression and has_alias: 2121 expression = self._try_parse(self._parse_table_parts) 2122 2123 if create_token.token_type == TokenType.TABLE: 2124 # exp.Properties.Location.POST_EXPRESSION 2125 extend_props(self._parse_properties()) 2126 2127 indexes = [] 2128 while True: 2129 index = self._parse_index() 2130 2131 # exp.Properties.Location.POST_INDEX 2132 extend_props(self._parse_properties()) 2133 if not index: 2134 break 2135 else: 2136 self._match(TokenType.COMMA) 2137 indexes.append(index) 2138 elif create_token.token_type == TokenType.VIEW: 2139 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2140 no_schema_binding = True 2141 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2142 extend_props(self._parse_properties()) 2143 2144 shallow = self._match_text_seq("SHALLOW") 2145 2146 if self._match_texts(self.CLONE_KEYWORDS): 2147 copy = self._prev.text.lower() == "copy" 2148 clone = self.expression( 2149 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2150 ) 2151 2152 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2153 return self._parse_as_command(start) 2154 2155 create_kind_text = create_token.text.upper() 2156 return self.expression( 2157 exp.Create, 2158 this=this, 2159 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2160 replace=replace, 2161 refresh=refresh, 2162 unique=unique, 2163 expression=expression, 2164 exists=exists, 2165 properties=properties, 2166 indexes=indexes, 2167 no_schema_binding=no_schema_binding, 2168 begin=begin, 2169 end=end, 2170 clone=clone, 2171 concurrently=concurrently, 2172 clustered=clustered, 2173 ) 2174 2175 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2176 seq = exp.SequenceProperties() 2177 2178 options = [] 2179 index = self._index 2180 2181 while self._curr: 2182 self._match(TokenType.COMMA) 2183 if self._match_text_seq("INCREMENT"): 2184 self._match_text_seq("BY") 2185 self._match_text_seq("=") 2186 seq.set("increment", self._parse_term()) 2187 elif self._match_text_seq("MINVALUE"): 2188 seq.set("minvalue", self._parse_term()) 2189 elif self._match_text_seq("MAXVALUE"): 2190 seq.set("maxvalue", self._parse_term()) 2191 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2192 self._match_text_seq("=") 2193 seq.set("start", self._parse_term()) 2194 elif self._match_text_seq("CACHE"): 2195 # T-SQL allows empty CACHE which is initialized dynamically 2196 seq.set("cache", self._parse_number() or True) 2197 elif self._match_text_seq("OWNED", "BY"): 2198 # "OWNED BY NONE" is the default 2199 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2200 else: 2201 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2202 if opt: 2203 options.append(opt) 2204 else: 2205 break 2206 2207 seq.set("options", options if options else None) 2208 return None if self._index == index else seq 2209 2210 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2211 # only used for teradata currently 2212 self._match(TokenType.COMMA) 2213 2214 kwargs = { 2215 "no": self._match_text_seq("NO"), 2216 "dual": self._match_text_seq("DUAL"), 2217 "before": self._match_text_seq("BEFORE"), 2218 "default": self._match_text_seq("DEFAULT"), 2219 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2220 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2221 "after": self._match_text_seq("AFTER"), 2222 "minimum": self._match_texts(("MIN", "MINIMUM")), 2223 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2224 } 2225 2226 if self._match_texts(self.PROPERTY_PARSERS): 2227 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2228 try: 2229 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2230 except TypeError: 2231 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2232 2233 return None 2234 2235 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2236 return self._parse_wrapped_csv(self._parse_property) 2237 2238 def _parse_property(self) -> t.Optional[exp.Expression]: 2239 if self._match_texts(self.PROPERTY_PARSERS): 2240 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2241 2242 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2243 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2244 2245 if self._match_text_seq("COMPOUND", "SORTKEY"): 2246 return self._parse_sortkey(compound=True) 2247 2248 if self._match_text_seq("SQL", "SECURITY"): 2249 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2250 2251 index = self._index 2252 2253 seq_props = self._parse_sequence_properties() 2254 if seq_props: 2255 return seq_props 2256 2257 self._retreat(index) 2258 key = self._parse_column() 2259 2260 if not self._match(TokenType.EQ): 2261 self._retreat(index) 2262 return None 2263 2264 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2265 if isinstance(key, exp.Column): 2266 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2267 2268 value = self._parse_bitwise() or self._parse_var(any_token=True) 2269 2270 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2271 if isinstance(value, exp.Column): 2272 value = exp.var(value.name) 2273 2274 return self.expression(exp.Property, this=key, value=value) 2275 2276 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2277 if self._match_text_seq("BY"): 2278 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2279 2280 self._match(TokenType.ALIAS) 2281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2283 2284 return self.expression( 2285 exp.FileFormatProperty, 2286 this=( 2287 self.expression( 2288 exp.InputOutputFormat, 2289 input_format=input_format, 2290 output_format=output_format, 2291 ) 2292 if input_format or output_format 2293 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2294 ), 2295 hive_format=True, 2296 ) 2297 2298 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2299 field = self._parse_field() 2300 if isinstance(field, exp.Identifier) and not field.quoted: 2301 field = exp.var(field) 2302 2303 return field 2304 2305 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2306 self._match(TokenType.EQ) 2307 self._match(TokenType.ALIAS) 2308 2309 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2310 2311 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2312 properties = [] 2313 while True: 2314 if before: 2315 prop = self._parse_property_before() 2316 else: 2317 prop = self._parse_property() 2318 if not prop: 2319 break 2320 for p in ensure_list(prop): 2321 properties.append(p) 2322 2323 if properties: 2324 return self.expression(exp.Properties, expressions=properties) 2325 2326 return None 2327 2328 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2329 return self.expression( 2330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2331 ) 2332 2333 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2334 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2335 security_specifier = self._prev.text.upper() 2336 return self.expression(exp.SecurityProperty, this=security_specifier) 2337 return None 2338 2339 def _parse_settings_property(self) -> exp.SettingsProperty: 2340 return self.expression( 2341 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2342 ) 2343 2344 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2345 if self._index >= 2: 2346 pre_volatile_token = self._tokens[self._index - 2] 2347 else: 2348 pre_volatile_token = None 2349 2350 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2351 return exp.VolatileProperty() 2352 2353 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2354 2355 def _parse_retention_period(self) -> exp.Var: 2356 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2357 number = self._parse_number() 2358 number_str = f"{number} " if number else "" 2359 unit = self._parse_var(any_token=True) 2360 return exp.var(f"{number_str}{unit}") 2361 2362 def _parse_system_versioning_property( 2363 self, with_: bool = False 2364 ) -> exp.WithSystemVersioningProperty: 2365 self._match(TokenType.EQ) 2366 prop = self.expression( 2367 exp.WithSystemVersioningProperty, 2368 **{ # type: ignore 2369 "on": True, 2370 "with": with_, 2371 }, 2372 ) 2373 2374 if self._match_text_seq("OFF"): 2375 prop.set("on", False) 2376 return prop 2377 2378 self._match(TokenType.ON) 2379 if self._match(TokenType.L_PAREN): 2380 while self._curr and not self._match(TokenType.R_PAREN): 2381 if self._match_text_seq("HISTORY_TABLE", "="): 2382 prop.set("this", self._parse_table_parts()) 2383 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2384 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2385 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2386 prop.set("retention_period", self._parse_retention_period()) 2387 2388 self._match(TokenType.COMMA) 2389 2390 return prop 2391 2392 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2393 self._match(TokenType.EQ) 2394 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2395 prop = self.expression(exp.DataDeletionProperty, on=on) 2396 2397 if self._match(TokenType.L_PAREN): 2398 while self._curr and not self._match(TokenType.R_PAREN): 2399 if self._match_text_seq("FILTER_COLUMN", "="): 2400 prop.set("filter_column", self._parse_column()) 2401 elif self._match_text_seq("RETENTION_PERIOD", "="): 2402 prop.set("retention_period", self._parse_retention_period()) 2403 2404 self._match(TokenType.COMMA) 2405 2406 return prop 2407 2408 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2409 kind = "HASH" 2410 expressions: t.Optional[t.List[exp.Expression]] = None 2411 if self._match_text_seq("BY", "HASH"): 2412 expressions = self._parse_wrapped_csv(self._parse_id_var) 2413 elif self._match_text_seq("BY", "RANDOM"): 2414 kind = "RANDOM" 2415 2416 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2417 buckets: t.Optional[exp.Expression] = None 2418 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2419 buckets = self._parse_number() 2420 2421 return self.expression( 2422 exp.DistributedByProperty, 2423 expressions=expressions, 2424 kind=kind, 2425 buckets=buckets, 2426 order=self._parse_order(), 2427 ) 2428 2429 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2430 self._match_text_seq("KEY") 2431 expressions = self._parse_wrapped_id_vars() 2432 return self.expression(expr_type, expressions=expressions) 2433 2434 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2435 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2436 prop = self._parse_system_versioning_property(with_=True) 2437 self._match_r_paren() 2438 return prop 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 return self._parse_wrapped_properties() 2442 2443 if self._match_text_seq("JOURNAL"): 2444 return self._parse_withjournaltable() 2445 2446 if self._match_texts(self.VIEW_ATTRIBUTES): 2447 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2448 2449 if self._match_text_seq("DATA"): 2450 return self._parse_withdata(no=False) 2451 elif self._match_text_seq("NO", "DATA"): 2452 return self._parse_withdata(no=True) 2453 2454 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2455 return self._parse_serde_properties(with_=True) 2456 2457 if self._match(TokenType.SCHEMA): 2458 return self.expression( 2459 exp.WithSchemaBindingProperty, 2460 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2461 ) 2462 2463 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2464 return self.expression( 2465 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2466 ) 2467 2468 if not self._next: 2469 return None 2470 2471 return self._parse_withisolatedloading() 2472 2473 def _parse_procedure_option(self) -> exp.Expression | None: 2474 if self._match_text_seq("EXECUTE", "AS"): 2475 return self.expression( 2476 exp.ExecuteAsProperty, 2477 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2478 or self._parse_string(), 2479 ) 2480 2481 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2482 2483 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2484 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2485 self._match(TokenType.EQ) 2486 2487 user = self._parse_id_var() 2488 self._match(TokenType.PARAMETER) 2489 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2490 2491 if not user or not host: 2492 return None 2493 2494 return exp.DefinerProperty(this=f"{user}@{host}") 2495 2496 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2497 self._match(TokenType.TABLE) 2498 self._match(TokenType.EQ) 2499 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2500 2501 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2502 return self.expression(exp.LogProperty, no=no) 2503 2504 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2505 return self.expression(exp.JournalProperty, **kwargs) 2506 2507 def _parse_checksum(self) -> exp.ChecksumProperty: 2508 self._match(TokenType.EQ) 2509 2510 on = None 2511 if self._match(TokenType.ON): 2512 on = True 2513 elif self._match_text_seq("OFF"): 2514 on = False 2515 2516 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2517 2518 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2519 return self.expression( 2520 exp.Cluster, 2521 expressions=( 2522 self._parse_wrapped_csv(self._parse_ordered) 2523 if wrapped 2524 else self._parse_csv(self._parse_ordered) 2525 ), 2526 ) 2527 2528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2529 self._match_text_seq("BY") 2530 2531 self._match_l_paren() 2532 expressions = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 2535 if self._match_text_seq("SORTED", "BY"): 2536 self._match_l_paren() 2537 sorted_by = self._parse_csv(self._parse_ordered) 2538 self._match_r_paren() 2539 else: 2540 sorted_by = None 2541 2542 self._match(TokenType.INTO) 2543 buckets = self._parse_number() 2544 self._match_text_seq("BUCKETS") 2545 2546 return self.expression( 2547 exp.ClusteredByProperty, 2548 expressions=expressions, 2549 sorted_by=sorted_by, 2550 buckets=buckets, 2551 ) 2552 2553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2554 if not self._match_text_seq("GRANTS"): 2555 self._retreat(self._index - 1) 2556 return None 2557 2558 return self.expression(exp.CopyGrantsProperty) 2559 2560 def _parse_freespace(self) -> exp.FreespaceProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2564 ) 2565 2566 def _parse_mergeblockratio( 2567 self, no: bool = False, default: bool = False 2568 ) -> exp.MergeBlockRatioProperty: 2569 if self._match(TokenType.EQ): 2570 return self.expression( 2571 exp.MergeBlockRatioProperty, 2572 this=self._parse_number(), 2573 percent=self._match(TokenType.PERCENT), 2574 ) 2575 2576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2577 2578 def _parse_datablocksize( 2579 self, 2580 default: t.Optional[bool] = None, 2581 minimum: t.Optional[bool] = None, 2582 maximum: t.Optional[bool] = None, 2583 ) -> exp.DataBlocksizeProperty: 2584 self._match(TokenType.EQ) 2585 size = self._parse_number() 2586 2587 units = None 2588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2589 units = self._prev.text 2590 2591 return self.expression( 2592 exp.DataBlocksizeProperty, 2593 size=size, 2594 units=units, 2595 default=default, 2596 minimum=minimum, 2597 maximum=maximum, 2598 ) 2599 2600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2601 self._match(TokenType.EQ) 2602 always = self._match_text_seq("ALWAYS") 2603 manual = self._match_text_seq("MANUAL") 2604 never = self._match_text_seq("NEVER") 2605 default = self._match_text_seq("DEFAULT") 2606 2607 autotemp = None 2608 if self._match_text_seq("AUTOTEMP"): 2609 autotemp = self._parse_schema() 2610 2611 return self.expression( 2612 exp.BlockCompressionProperty, 2613 always=always, 2614 manual=manual, 2615 never=never, 2616 default=default, 2617 autotemp=autotemp, 2618 ) 2619 2620 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2621 index = self._index 2622 no = self._match_text_seq("NO") 2623 concurrent = self._match_text_seq("CONCURRENT") 2624 2625 if not self._match_text_seq("ISOLATED", "LOADING"): 2626 self._retreat(index) 2627 return None 2628 2629 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2630 return self.expression( 2631 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2632 ) 2633 2634 def _parse_locking(self) -> exp.LockingProperty: 2635 if self._match(TokenType.TABLE): 2636 kind = "TABLE" 2637 elif self._match(TokenType.VIEW): 2638 kind = "VIEW" 2639 elif self._match(TokenType.ROW): 2640 kind = "ROW" 2641 elif self._match_text_seq("DATABASE"): 2642 kind = "DATABASE" 2643 else: 2644 kind = None 2645 2646 if kind in ("DATABASE", "TABLE", "VIEW"): 2647 this = self._parse_table_parts() 2648 else: 2649 this = None 2650 2651 if self._match(TokenType.FOR): 2652 for_or_in = "FOR" 2653 elif self._match(TokenType.IN): 2654 for_or_in = "IN" 2655 else: 2656 for_or_in = None 2657 2658 if self._match_text_seq("ACCESS"): 2659 lock_type = "ACCESS" 2660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2661 lock_type = "EXCLUSIVE" 2662 elif self._match_text_seq("SHARE"): 2663 lock_type = "SHARE" 2664 elif self._match_text_seq("READ"): 2665 lock_type = "READ" 2666 elif self._match_text_seq("WRITE"): 2667 lock_type = "WRITE" 2668 elif self._match_text_seq("CHECKSUM"): 2669 lock_type = "CHECKSUM" 2670 else: 2671 lock_type = None 2672 2673 override = self._match_text_seq("OVERRIDE") 2674 2675 return self.expression( 2676 exp.LockingProperty, 2677 this=this, 2678 kind=kind, 2679 for_or_in=for_or_in, 2680 lock_type=lock_type, 2681 override=override, 2682 ) 2683 2684 def _parse_partition_by(self) -> t.List[exp.Expression]: 2685 if self._match(TokenType.PARTITION_BY): 2686 return self._parse_csv(self._parse_assignment) 2687 return [] 2688 2689 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2690 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2691 if self._match_text_seq("MINVALUE"): 2692 return exp.var("MINVALUE") 2693 if self._match_text_seq("MAXVALUE"): 2694 return exp.var("MAXVALUE") 2695 return self._parse_bitwise() 2696 2697 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2698 expression = None 2699 from_expressions = None 2700 to_expressions = None 2701 2702 if self._match(TokenType.IN): 2703 this = self._parse_wrapped_csv(self._parse_bitwise) 2704 elif self._match(TokenType.FROM): 2705 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2706 self._match_text_seq("TO") 2707 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 elif self._match_text_seq("WITH", "(", "MODULUS"): 2709 this = self._parse_number() 2710 self._match_text_seq(",", "REMAINDER") 2711 expression = self._parse_number() 2712 self._match_r_paren() 2713 else: 2714 self.raise_error("Failed to parse partition bound spec.") 2715 2716 return self.expression( 2717 exp.PartitionBoundSpec, 2718 this=this, 2719 expression=expression, 2720 from_expressions=from_expressions, 2721 to_expressions=to_expressions, 2722 ) 2723 2724 # https://www.postgresql.org/docs/current/sql-createtable.html 2725 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2726 if not self._match_text_seq("OF"): 2727 self._retreat(self._index - 1) 2728 return None 2729 2730 this = self._parse_table(schema=True) 2731 2732 if self._match(TokenType.DEFAULT): 2733 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2734 elif self._match_text_seq("FOR", "VALUES"): 2735 expression = self._parse_partition_bound_spec() 2736 else: 2737 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2738 2739 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2740 2741 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2742 self._match(TokenType.EQ) 2743 return self.expression( 2744 exp.PartitionedByProperty, 2745 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2746 ) 2747 2748 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2749 if self._match_text_seq("AND", "STATISTICS"): 2750 statistics = True 2751 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2752 statistics = False 2753 else: 2754 statistics = None 2755 2756 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2757 2758 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2761 return None 2762 2763 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL", "DATA"): 2765 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2766 return None 2767 2768 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("PRIMARY", "INDEX"): 2770 return exp.NoPrimaryIndexProperty() 2771 if self._match_text_seq("SQL"): 2772 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2773 return None 2774 2775 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2777 return exp.OnCommitProperty() 2778 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2779 return exp.OnCommitProperty(delete=True) 2780 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2781 2782 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2783 if self._match_text_seq("SQL", "DATA"): 2784 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2785 return None 2786 2787 def _parse_distkey(self) -> exp.DistKeyProperty: 2788 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2789 2790 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2791 table = self._parse_table(schema=True) 2792 2793 options = [] 2794 while self._match_texts(("INCLUDING", "EXCLUDING")): 2795 this = self._prev.text.upper() 2796 2797 id_var = self._parse_id_var() 2798 if not id_var: 2799 return None 2800 2801 options.append( 2802 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2803 ) 2804 2805 return self.expression(exp.LikeProperty, this=table, expressions=options) 2806 2807 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2808 return self.expression( 2809 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2810 ) 2811 2812 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2813 self._match(TokenType.EQ) 2814 return self.expression( 2815 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2816 ) 2817 2818 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2819 self._match_text_seq("WITH", "CONNECTION") 2820 return self.expression( 2821 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2822 ) 2823 2824 def _parse_returns(self) -> exp.ReturnsProperty: 2825 value: t.Optional[exp.Expression] 2826 null = None 2827 is_table = self._match(TokenType.TABLE) 2828 2829 if is_table: 2830 if self._match(TokenType.LT): 2831 value = self.expression( 2832 exp.Schema, 2833 this="TABLE", 2834 expressions=self._parse_csv(self._parse_struct_types), 2835 ) 2836 if not self._match(TokenType.GT): 2837 self.raise_error("Expecting >") 2838 else: 2839 value = self._parse_schema(exp.var("TABLE")) 2840 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2841 null = True 2842 value = None 2843 else: 2844 value = self._parse_types() 2845 2846 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2847 2848 def _parse_describe(self) -> exp.Describe: 2849 kind = self._match_set(self.CREATABLES) and self._prev.text 2850 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2851 if self._match(TokenType.DOT): 2852 style = None 2853 self._retreat(self._index - 2) 2854 2855 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2856 2857 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2858 this = self._parse_statement() 2859 else: 2860 this = self._parse_table(schema=True) 2861 2862 properties = self._parse_properties() 2863 expressions = properties.expressions if properties else None 2864 partition = self._parse_partition() 2865 return self.expression( 2866 exp.Describe, 2867 this=this, 2868 style=style, 2869 kind=kind, 2870 expressions=expressions, 2871 partition=partition, 2872 format=format, 2873 ) 2874 2875 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2876 kind = self._prev.text.upper() 2877 expressions = [] 2878 2879 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2880 if self._match(TokenType.WHEN): 2881 expression = self._parse_disjunction() 2882 self._match(TokenType.THEN) 2883 else: 2884 expression = None 2885 2886 else_ = self._match(TokenType.ELSE) 2887 2888 if not self._match(TokenType.INTO): 2889 return None 2890 2891 return self.expression( 2892 exp.ConditionalInsert, 2893 this=self.expression( 2894 exp.Insert, 2895 this=self._parse_table(schema=True), 2896 expression=self._parse_derived_table_values(), 2897 ), 2898 expression=expression, 2899 else_=else_, 2900 ) 2901 2902 expression = parse_conditional_insert() 2903 while expression is not None: 2904 expressions.append(expression) 2905 expression = parse_conditional_insert() 2906 2907 return self.expression( 2908 exp.MultitableInserts, 2909 kind=kind, 2910 comments=comments, 2911 expressions=expressions, 2912 source=self._parse_table(), 2913 ) 2914 2915 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2916 comments = [] 2917 hint = self._parse_hint() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 ignore = self._match(TokenType.IGNORE) 2920 local = self._match_text_seq("LOCAL") 2921 alternative = None 2922 is_function = None 2923 2924 if self._match_text_seq("DIRECTORY"): 2925 this: t.Optional[exp.Expression] = self.expression( 2926 exp.Directory, 2927 this=self._parse_var_or_string(), 2928 local=local, 2929 row_format=self._parse_row_format(match_row=True), 2930 ) 2931 else: 2932 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2933 comments += ensure_list(self._prev_comments) 2934 return self._parse_multitable_inserts(comments) 2935 2936 if self._match(TokenType.OR): 2937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2938 2939 self._match(TokenType.INTO) 2940 comments += ensure_list(self._prev_comments) 2941 self._match(TokenType.TABLE) 2942 is_function = self._match(TokenType.FUNCTION) 2943 2944 this = ( 2945 self._parse_table(schema=True, parse_partition=True) 2946 if not is_function 2947 else self._parse_function() 2948 ) 2949 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2950 this.set("alias", self._parse_table_alias()) 2951 2952 returning = self._parse_returning() 2953 2954 return self.expression( 2955 exp.Insert, 2956 comments=comments, 2957 hint=hint, 2958 is_function=is_function, 2959 this=this, 2960 stored=self._match_text_seq("STORED") and self._parse_stored(), 2961 by_name=self._match_text_seq("BY", "NAME"), 2962 exists=self._parse_exists(), 2963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2964 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2965 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2966 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2967 conflict=self._parse_on_conflict(), 2968 returning=returning or self._parse_returning(), 2969 overwrite=overwrite, 2970 alternative=alternative, 2971 ignore=ignore, 2972 source=self._match(TokenType.TABLE) and self._parse_table(), 2973 ) 2974 2975 def _parse_kill(self) -> exp.Kill: 2976 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2977 2978 return self.expression( 2979 exp.Kill, 2980 this=self._parse_primary(), 2981 kind=kind, 2982 ) 2983 2984 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2985 conflict = self._match_text_seq("ON", "CONFLICT") 2986 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2987 2988 if not conflict and not duplicate: 2989 return None 2990 2991 conflict_keys = None 2992 constraint = None 2993 2994 if conflict: 2995 if self._match_text_seq("ON", "CONSTRAINT"): 2996 constraint = self._parse_id_var() 2997 elif self._match(TokenType.L_PAREN): 2998 conflict_keys = self._parse_csv(self._parse_id_var) 2999 self._match_r_paren() 3000 3001 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3002 if self._prev.token_type == TokenType.UPDATE: 3003 self._match(TokenType.SET) 3004 expressions = self._parse_csv(self._parse_equality) 3005 else: 3006 expressions = None 3007 3008 return self.expression( 3009 exp.OnConflict, 3010 duplicate=duplicate, 3011 expressions=expressions, 3012 action=action, 3013 conflict_keys=conflict_keys, 3014 constraint=constraint, 3015 where=self._parse_where(), 3016 ) 3017 3018 def _parse_returning(self) -> t.Optional[exp.Returning]: 3019 if not self._match(TokenType.RETURNING): 3020 return None 3021 return self.expression( 3022 exp.Returning, 3023 expressions=self._parse_csv(self._parse_expression), 3024 into=self._match(TokenType.INTO) and self._parse_table_part(), 3025 ) 3026 3027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3028 if not self._match(TokenType.FORMAT): 3029 return None 3030 return self._parse_row_format() 3031 3032 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3033 index = self._index 3034 with_ = with_ or self._match_text_seq("WITH") 3035 3036 if not self._match(TokenType.SERDE_PROPERTIES): 3037 self._retreat(index) 3038 return None 3039 return self.expression( 3040 exp.SerdeProperties, 3041 **{ # type: ignore 3042 "expressions": self._parse_wrapped_properties(), 3043 "with": with_, 3044 }, 3045 ) 3046 3047 def _parse_row_format( 3048 self, match_row: bool = False 3049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3051 return None 3052 3053 if self._match_text_seq("SERDE"): 3054 this = self._parse_string() 3055 3056 serde_properties = self._parse_serde_properties() 3057 3058 return self.expression( 3059 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3060 ) 3061 3062 self._match_text_seq("DELIMITED") 3063 3064 kwargs = {} 3065 3066 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3067 kwargs["fields"] = self._parse_string() 3068 if self._match_text_seq("ESCAPED", "BY"): 3069 kwargs["escaped"] = self._parse_string() 3070 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3071 kwargs["collection_items"] = self._parse_string() 3072 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3073 kwargs["map_keys"] = self._parse_string() 3074 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3075 kwargs["lines"] = self._parse_string() 3076 if self._match_text_seq("NULL", "DEFINED", "AS"): 3077 kwargs["null"] = self._parse_string() 3078 3079 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3080 3081 def _parse_load(self) -> exp.LoadData | exp.Command: 3082 if self._match_text_seq("DATA"): 3083 local = self._match_text_seq("LOCAL") 3084 self._match_text_seq("INPATH") 3085 inpath = self._parse_string() 3086 overwrite = self._match(TokenType.OVERWRITE) 3087 self._match_pair(TokenType.INTO, TokenType.TABLE) 3088 3089 return self.expression( 3090 exp.LoadData, 3091 this=self._parse_table(schema=True), 3092 local=local, 3093 overwrite=overwrite, 3094 inpath=inpath, 3095 partition=self._parse_partition(), 3096 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3097 serde=self._match_text_seq("SERDE") and self._parse_string(), 3098 ) 3099 return self._parse_as_command(self._prev) 3100 3101 def _parse_delete(self) -> exp.Delete: 3102 # This handles MySQL's "Multiple-Table Syntax" 3103 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3104 tables = None 3105 if not self._match(TokenType.FROM, advance=False): 3106 tables = self._parse_csv(self._parse_table) or None 3107 3108 returning = self._parse_returning() 3109 3110 return self.expression( 3111 exp.Delete, 3112 tables=tables, 3113 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3114 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3115 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3116 where=self._parse_where(), 3117 returning=returning or self._parse_returning(), 3118 limit=self._parse_limit(), 3119 ) 3120 3121 def _parse_update(self) -> exp.Update: 3122 kwargs: t.Dict[str, t.Any] = { 3123 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3124 } 3125 while self._curr: 3126 if self._match(TokenType.SET): 3127 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3128 elif self._match(TokenType.RETURNING, advance=False): 3129 kwargs["returning"] = self._parse_returning() 3130 elif self._match(TokenType.FROM, advance=False): 3131 kwargs["from"] = self._parse_from(joins=True) 3132 elif self._match(TokenType.WHERE, advance=False): 3133 kwargs["where"] = self._parse_where() 3134 elif self._match(TokenType.ORDER_BY, advance=False): 3135 kwargs["order"] = self._parse_order() 3136 elif self._match(TokenType.LIMIT, advance=False): 3137 kwargs["limit"] = self._parse_limit() 3138 else: 3139 break 3140 3141 return self.expression(exp.Update, **kwargs) 3142 3143 def _parse_use(self) -> exp.Use: 3144 return self.expression( 3145 exp.Use, 3146 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3147 this=self._parse_table(schema=False), 3148 ) 3149 3150 def _parse_uncache(self) -> exp.Uncache: 3151 if not self._match(TokenType.TABLE): 3152 self.raise_error("Expecting TABLE after UNCACHE") 3153 3154 return self.expression( 3155 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3156 ) 3157 3158 def _parse_cache(self) -> exp.Cache: 3159 lazy = self._match_text_seq("LAZY") 3160 self._match(TokenType.TABLE) 3161 table = self._parse_table(schema=True) 3162 3163 options = [] 3164 if self._match_text_seq("OPTIONS"): 3165 self._match_l_paren() 3166 k = self._parse_string() 3167 self._match(TokenType.EQ) 3168 v = self._parse_string() 3169 options = [k, v] 3170 self._match_r_paren() 3171 3172 self._match(TokenType.ALIAS) 3173 return self.expression( 3174 exp.Cache, 3175 this=table, 3176 lazy=lazy, 3177 options=options, 3178 expression=self._parse_select(nested=True), 3179 ) 3180 3181 def _parse_partition(self) -> t.Optional[exp.Partition]: 3182 if not self._match_texts(self.PARTITION_KEYWORDS): 3183 return None 3184 3185 return self.expression( 3186 exp.Partition, 3187 subpartition=self._prev.text.upper() == "SUBPARTITION", 3188 expressions=self._parse_wrapped_csv(self._parse_assignment), 3189 ) 3190 3191 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3192 def _parse_value_expression() -> t.Optional[exp.Expression]: 3193 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3194 return exp.var(self._prev.text.upper()) 3195 return self._parse_expression() 3196 3197 if self._match(TokenType.L_PAREN): 3198 expressions = self._parse_csv(_parse_value_expression) 3199 self._match_r_paren() 3200 return self.expression(exp.Tuple, expressions=expressions) 3201 3202 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3203 expression = self._parse_expression() 3204 if expression: 3205 return self.expression(exp.Tuple, expressions=[expression]) 3206 return None 3207 3208 def _parse_projections(self) -> t.List[exp.Expression]: 3209 return self._parse_expressions() 3210 3211 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3212 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3213 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3214 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3215 ) 3216 elif self._match(TokenType.FROM): 3217 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3218 # Support parentheses for duckdb FROM-first syntax 3219 select = self._parse_select(from_=from_) 3220 if select: 3221 if not select.args.get("from"): 3222 select.set("from", from_) 3223 this = select 3224 else: 3225 this = exp.select("*").from_(t.cast(exp.From, from_)) 3226 else: 3227 this = ( 3228 self._parse_table(consume_pipe=True) 3229 if table 3230 else self._parse_select(nested=True, parse_set_operation=False) 3231 ) 3232 3233 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3234 # in case a modifier (e.g. join) is following 3235 if table and isinstance(this, exp.Values) and this.alias: 3236 alias = this.args["alias"].pop() 3237 this = exp.Table(this=this, alias=alias) 3238 3239 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3240 3241 return this 3242 3243 def _parse_select( 3244 self, 3245 nested: bool = False, 3246 table: bool = False, 3247 parse_subquery_alias: bool = True, 3248 parse_set_operation: bool = True, 3249 consume_pipe: bool = True, 3250 from_: t.Optional[exp.From] = None, 3251 ) -> t.Optional[exp.Expression]: 3252 query = self._parse_select_query( 3253 nested=nested, 3254 table=table, 3255 parse_subquery_alias=parse_subquery_alias, 3256 parse_set_operation=parse_set_operation, 3257 ) 3258 3259 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3260 if not query and from_: 3261 query = exp.select("*").from_(from_) 3262 if isinstance(query, exp.Query): 3263 query = self._parse_pipe_syntax_query(query) 3264 query = query.subquery(copy=False) if query and table else query 3265 3266 return query 3267 3268 def _parse_select_query( 3269 self, 3270 nested: bool = False, 3271 table: bool = False, 3272 parse_subquery_alias: bool = True, 3273 parse_set_operation: bool = True, 3274 ) -> t.Optional[exp.Expression]: 3275 cte = self._parse_with() 3276 3277 if cte: 3278 this = self._parse_statement() 3279 3280 if not this: 3281 self.raise_error("Failed to parse any statement following CTE") 3282 return cte 3283 3284 if "with" in this.arg_types: 3285 this.set("with", cte) 3286 else: 3287 self.raise_error(f"{this.key} does not support CTE") 3288 this = cte 3289 3290 return this 3291 3292 # duckdb supports leading with FROM x 3293 from_ = ( 3294 self._parse_from(consume_pipe=True) 3295 if self._match(TokenType.FROM, advance=False) 3296 else None 3297 ) 3298 3299 if self._match(TokenType.SELECT): 3300 comments = self._prev_comments 3301 3302 hint = self._parse_hint() 3303 3304 if self._next and not self._next.token_type == TokenType.DOT: 3305 all_ = self._match(TokenType.ALL) 3306 distinct = self._match_set(self.DISTINCT_TOKENS) 3307 else: 3308 all_, distinct = None, None 3309 3310 kind = ( 3311 self._match(TokenType.ALIAS) 3312 and self._match_texts(("STRUCT", "VALUE")) 3313 and self._prev.text.upper() 3314 ) 3315 3316 if distinct: 3317 distinct = self.expression( 3318 exp.Distinct, 3319 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3320 ) 3321 3322 if all_ and distinct: 3323 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3324 3325 operation_modifiers = [] 3326 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3327 operation_modifiers.append(exp.var(self._prev.text.upper())) 3328 3329 limit = self._parse_limit(top=True) 3330 projections = self._parse_projections() 3331 3332 this = self.expression( 3333 exp.Select, 3334 kind=kind, 3335 hint=hint, 3336 distinct=distinct, 3337 expressions=projections, 3338 limit=limit, 3339 operation_modifiers=operation_modifiers or None, 3340 ) 3341 this.comments = comments 3342 3343 into = self._parse_into() 3344 if into: 3345 this.set("into", into) 3346 3347 if not from_: 3348 from_ = self._parse_from() 3349 3350 if from_: 3351 this.set("from", from_) 3352 3353 this = self._parse_query_modifiers(this) 3354 elif (table or nested) and self._match(TokenType.L_PAREN): 3355 this = self._parse_wrapped_select(table=table) 3356 3357 # We return early here so that the UNION isn't attached to the subquery by the 3358 # following call to _parse_set_operations, but instead becomes the parent node 3359 self._match_r_paren() 3360 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3361 elif self._match(TokenType.VALUES, advance=False): 3362 this = self._parse_derived_table_values() 3363 elif from_: 3364 this = exp.select("*").from_(from_.this, copy=False) 3365 elif self._match(TokenType.SUMMARIZE): 3366 table = self._match(TokenType.TABLE) 3367 this = self._parse_select() or self._parse_string() or self._parse_table() 3368 return self.expression(exp.Summarize, this=this, table=table) 3369 elif self._match(TokenType.DESCRIBE): 3370 this = self._parse_describe() 3371 elif self._match_text_seq("STREAM"): 3372 this = self._parse_function() 3373 if this: 3374 this = self.expression(exp.Stream, this=this) 3375 else: 3376 self._retreat(self._index - 1) 3377 else: 3378 this = None 3379 3380 return self._parse_set_operations(this) if parse_set_operation else this 3381 3382 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3383 self._match_text_seq("SEARCH") 3384 3385 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3386 3387 if not kind: 3388 return None 3389 3390 self._match_text_seq("FIRST", "BY") 3391 3392 return self.expression( 3393 exp.RecursiveWithSearch, 3394 kind=kind, 3395 this=self._parse_id_var(), 3396 expression=self._match_text_seq("SET") and self._parse_id_var(), 3397 using=self._match_text_seq("USING") and self._parse_id_var(), 3398 ) 3399 3400 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3401 if not skip_with_token and not self._match(TokenType.WITH): 3402 return None 3403 3404 comments = self._prev_comments 3405 recursive = self._match(TokenType.RECURSIVE) 3406 3407 last_comments = None 3408 expressions = [] 3409 while True: 3410 cte = self._parse_cte() 3411 if isinstance(cte, exp.CTE): 3412 expressions.append(cte) 3413 if last_comments: 3414 cte.add_comments(last_comments) 3415 3416 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3417 break 3418 else: 3419 self._match(TokenType.WITH) 3420 3421 last_comments = self._prev_comments 3422 3423 return self.expression( 3424 exp.With, 3425 comments=comments, 3426 expressions=expressions, 3427 recursive=recursive, 3428 search=self._parse_recursive_with_search(), 3429 ) 3430 3431 def _parse_cte(self) -> t.Optional[exp.CTE]: 3432 index = self._index 3433 3434 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3435 if not alias or not alias.this: 3436 self.raise_error("Expected CTE to have alias") 3437 3438 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3439 self._retreat(index) 3440 return None 3441 3442 comments = self._prev_comments 3443 3444 if self._match_text_seq("NOT", "MATERIALIZED"): 3445 materialized = False 3446 elif self._match_text_seq("MATERIALIZED"): 3447 materialized = True 3448 else: 3449 materialized = None 3450 3451 cte = self.expression( 3452 exp.CTE, 3453 this=self._parse_wrapped(self._parse_statement), 3454 alias=alias, 3455 materialized=materialized, 3456 comments=comments, 3457 ) 3458 3459 values = cte.this 3460 if isinstance(values, exp.Values): 3461 if values.alias: 3462 cte.set("this", exp.select("*").from_(values)) 3463 else: 3464 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3465 3466 return cte 3467 3468 def _parse_table_alias( 3469 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3470 ) -> t.Optional[exp.TableAlias]: 3471 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3472 # so this section tries to parse the clause version and if it fails, it treats the token 3473 # as an identifier (alias) 3474 if self._can_parse_limit_or_offset(): 3475 return None 3476 3477 any_token = self._match(TokenType.ALIAS) 3478 alias = ( 3479 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3480 or self._parse_string_as_identifier() 3481 ) 3482 3483 index = self._index 3484 if self._match(TokenType.L_PAREN): 3485 columns = self._parse_csv(self._parse_function_parameter) 3486 self._match_r_paren() if columns else self._retreat(index) 3487 else: 3488 columns = None 3489 3490 if not alias and not columns: 3491 return None 3492 3493 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3494 3495 # We bubble up comments from the Identifier to the TableAlias 3496 if isinstance(alias, exp.Identifier): 3497 table_alias.add_comments(alias.pop_comments()) 3498 3499 return table_alias 3500 3501 def _parse_subquery( 3502 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3503 ) -> t.Optional[exp.Subquery]: 3504 if not this: 3505 return None 3506 3507 return self.expression( 3508 exp.Subquery, 3509 this=this, 3510 pivots=self._parse_pivots(), 3511 alias=self._parse_table_alias() if parse_alias else None, 3512 sample=self._parse_table_sample(), 3513 ) 3514 3515 def _implicit_unnests_to_explicit(self, this: E) -> E: 3516 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3517 3518 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3519 for i, join in enumerate(this.args.get("joins") or []): 3520 table = join.this 3521 normalized_table = table.copy() 3522 normalized_table.meta["maybe_column"] = True 3523 normalized_table = _norm(normalized_table, dialect=self.dialect) 3524 3525 if isinstance(table, exp.Table) and not join.args.get("on"): 3526 if normalized_table.parts[0].name in refs: 3527 table_as_column = table.to_column() 3528 unnest = exp.Unnest(expressions=[table_as_column]) 3529 3530 # Table.to_column creates a parent Alias node that we want to convert to 3531 # a TableAlias and attach to the Unnest, so it matches the parser's output 3532 if isinstance(table.args.get("alias"), exp.TableAlias): 3533 table_as_column.replace(table_as_column.this) 3534 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3535 3536 table.replace(unnest) 3537 3538 refs.add(normalized_table.alias_or_name) 3539 3540 return this 3541 3542 @t.overload 3543 def _parse_query_modifiers(self, this: E) -> E: ... 3544 3545 @t.overload 3546 def _parse_query_modifiers(self, this: None) -> None: ... 3547 3548 def _parse_query_modifiers(self, this): 3549 if isinstance(this, self.MODIFIABLES): 3550 for join in self._parse_joins(): 3551 this.append("joins", join) 3552 for lateral in iter(self._parse_lateral, None): 3553 this.append("laterals", lateral) 3554 3555 while True: 3556 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3557 modifier_token = self._curr 3558 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3559 key, expression = parser(self) 3560 3561 if expression: 3562 if this.args.get(key): 3563 self.raise_error( 3564 f"Found multiple '{modifier_token.text.upper()}' clauses", 3565 token=modifier_token, 3566 ) 3567 3568 this.set(key, expression) 3569 if key == "limit": 3570 offset = expression.args.pop("offset", None) 3571 3572 if offset: 3573 offset = exp.Offset(expression=offset) 3574 this.set("offset", offset) 3575 3576 limit_by_expressions = expression.expressions 3577 expression.set("expressions", None) 3578 offset.set("expressions", limit_by_expressions) 3579 continue 3580 break 3581 3582 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3583 this = self._implicit_unnests_to_explicit(this) 3584 3585 return this 3586 3587 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3588 start = self._curr 3589 while self._curr: 3590 self._advance() 3591 3592 end = self._tokens[self._index - 1] 3593 return exp.Hint(expressions=[self._find_sql(start, end)]) 3594 3595 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3596 return self._parse_function_call() 3597 3598 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3599 start_index = self._index 3600 should_fallback_to_string = False 3601 3602 hints = [] 3603 try: 3604 for hint in iter( 3605 lambda: self._parse_csv( 3606 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3607 ), 3608 [], 3609 ): 3610 hints.extend(hint) 3611 except ParseError: 3612 should_fallback_to_string = True 3613 3614 if should_fallback_to_string or self._curr: 3615 self._retreat(start_index) 3616 return self._parse_hint_fallback_to_string() 3617 3618 return self.expression(exp.Hint, expressions=hints) 3619 3620 def _parse_hint(self) -> t.Optional[exp.Hint]: 3621 if self._match(TokenType.HINT) and self._prev_comments: 3622 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3623 3624 return None 3625 3626 def _parse_into(self) -> t.Optional[exp.Into]: 3627 if not self._match(TokenType.INTO): 3628 return None 3629 3630 temp = self._match(TokenType.TEMPORARY) 3631 unlogged = self._match_text_seq("UNLOGGED") 3632 self._match(TokenType.TABLE) 3633 3634 return self.expression( 3635 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3636 ) 3637 3638 def _parse_from( 3639 self, 3640 joins: bool = False, 3641 skip_from_token: bool = False, 3642 consume_pipe: bool = False, 3643 ) -> t.Optional[exp.From]: 3644 if not skip_from_token and not self._match(TokenType.FROM): 3645 return None 3646 3647 return self.expression( 3648 exp.From, 3649 comments=self._prev_comments, 3650 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3651 ) 3652 3653 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3654 return self.expression( 3655 exp.MatchRecognizeMeasure, 3656 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3657 this=self._parse_expression(), 3658 ) 3659 3660 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3661 if not self._match(TokenType.MATCH_RECOGNIZE): 3662 return None 3663 3664 self._match_l_paren() 3665 3666 partition = self._parse_partition_by() 3667 order = self._parse_order() 3668 3669 measures = ( 3670 self._parse_csv(self._parse_match_recognize_measure) 3671 if self._match_text_seq("MEASURES") 3672 else None 3673 ) 3674 3675 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3676 rows = exp.var("ONE ROW PER MATCH") 3677 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3678 text = "ALL ROWS PER MATCH" 3679 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3680 text += " SHOW EMPTY MATCHES" 3681 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3682 text += " OMIT EMPTY MATCHES" 3683 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3684 text += " WITH UNMATCHED ROWS" 3685 rows = exp.var(text) 3686 else: 3687 rows = None 3688 3689 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3690 text = "AFTER MATCH SKIP" 3691 if self._match_text_seq("PAST", "LAST", "ROW"): 3692 text += " PAST LAST ROW" 3693 elif self._match_text_seq("TO", "NEXT", "ROW"): 3694 text += " TO NEXT ROW" 3695 elif self._match_text_seq("TO", "FIRST"): 3696 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3697 elif self._match_text_seq("TO", "LAST"): 3698 text += f" TO LAST {self._advance_any().text}" # type: ignore 3699 after = exp.var(text) 3700 else: 3701 after = None 3702 3703 if self._match_text_seq("PATTERN"): 3704 self._match_l_paren() 3705 3706 if not self._curr: 3707 self.raise_error("Expecting )", self._curr) 3708 3709 paren = 1 3710 start = self._curr 3711 3712 while self._curr and paren > 0: 3713 if self._curr.token_type == TokenType.L_PAREN: 3714 paren += 1 3715 if self._curr.token_type == TokenType.R_PAREN: 3716 paren -= 1 3717 3718 end = self._prev 3719 self._advance() 3720 3721 if paren > 0: 3722 self.raise_error("Expecting )", self._curr) 3723 3724 pattern = exp.var(self._find_sql(start, end)) 3725 else: 3726 pattern = None 3727 3728 define = ( 3729 self._parse_csv(self._parse_name_as_expression) 3730 if self._match_text_seq("DEFINE") 3731 else None 3732 ) 3733 3734 self._match_r_paren() 3735 3736 return self.expression( 3737 exp.MatchRecognize, 3738 partition_by=partition, 3739 order=order, 3740 measures=measures, 3741 rows=rows, 3742 after=after, 3743 pattern=pattern, 3744 define=define, 3745 alias=self._parse_table_alias(), 3746 ) 3747 3748 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3749 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3750 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3751 cross_apply = False 3752 3753 if cross_apply is not None: 3754 this = self._parse_select(table=True) 3755 view = None 3756 outer = None 3757 elif self._match(TokenType.LATERAL): 3758 this = self._parse_select(table=True) 3759 view = self._match(TokenType.VIEW) 3760 outer = self._match(TokenType.OUTER) 3761 else: 3762 return None 3763 3764 if not this: 3765 this = ( 3766 self._parse_unnest() 3767 or self._parse_function() 3768 or self._parse_id_var(any_token=False) 3769 ) 3770 3771 while self._match(TokenType.DOT): 3772 this = exp.Dot( 3773 this=this, 3774 expression=self._parse_function() or self._parse_id_var(any_token=False), 3775 ) 3776 3777 ordinality: t.Optional[bool] = None 3778 3779 if view: 3780 table = self._parse_id_var(any_token=False) 3781 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3782 table_alias: t.Optional[exp.TableAlias] = self.expression( 3783 exp.TableAlias, this=table, columns=columns 3784 ) 3785 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3786 # We move the alias from the lateral's child node to the lateral itself 3787 table_alias = this.args["alias"].pop() 3788 else: 3789 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3790 table_alias = self._parse_table_alias() 3791 3792 return self.expression( 3793 exp.Lateral, 3794 this=this, 3795 view=view, 3796 outer=outer, 3797 alias=table_alias, 3798 cross_apply=cross_apply, 3799 ordinality=ordinality, 3800 ) 3801 3802 def _parse_join_parts( 3803 self, 3804 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3805 return ( 3806 self._match_set(self.JOIN_METHODS) and self._prev, 3807 self._match_set(self.JOIN_SIDES) and self._prev, 3808 self._match_set(self.JOIN_KINDS) and self._prev, 3809 ) 3810 3811 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3812 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3813 this = self._parse_column() 3814 if isinstance(this, exp.Column): 3815 return this.this 3816 return this 3817 3818 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3819 3820 def _parse_join( 3821 self, skip_join_token: bool = False, parse_bracket: bool = False 3822 ) -> t.Optional[exp.Join]: 3823 if self._match(TokenType.COMMA): 3824 table = self._try_parse(self._parse_table) 3825 cross_join = self.expression(exp.Join, this=table) if table else None 3826 3827 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3828 cross_join.set("kind", "CROSS") 3829 3830 return cross_join 3831 3832 index = self._index 3833 method, side, kind = self._parse_join_parts() 3834 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3835 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3836 join_comments = self._prev_comments 3837 3838 if not skip_join_token and not join: 3839 self._retreat(index) 3840 kind = None 3841 method = None 3842 side = None 3843 3844 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3845 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3846 3847 if not skip_join_token and not join and not outer_apply and not cross_apply: 3848 return None 3849 3850 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3851 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3852 kwargs["expressions"] = self._parse_csv( 3853 lambda: self._parse_table(parse_bracket=parse_bracket) 3854 ) 3855 3856 if method: 3857 kwargs["method"] = method.text 3858 if side: 3859 kwargs["side"] = side.text 3860 if kind: 3861 kwargs["kind"] = kind.text 3862 if hint: 3863 kwargs["hint"] = hint 3864 3865 if self._match(TokenType.MATCH_CONDITION): 3866 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3867 3868 if self._match(TokenType.ON): 3869 kwargs["on"] = self._parse_assignment() 3870 elif self._match(TokenType.USING): 3871 kwargs["using"] = self._parse_using_identifiers() 3872 elif ( 3873 not method 3874 and not (outer_apply or cross_apply) 3875 and not isinstance(kwargs["this"], exp.Unnest) 3876 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3877 ): 3878 index = self._index 3879 joins: t.Optional[list] = list(self._parse_joins()) 3880 3881 if joins and self._match(TokenType.ON): 3882 kwargs["on"] = self._parse_assignment() 3883 elif joins and self._match(TokenType.USING): 3884 kwargs["using"] = self._parse_using_identifiers() 3885 else: 3886 joins = None 3887 self._retreat(index) 3888 3889 kwargs["this"].set("joins", joins if joins else None) 3890 3891 kwargs["pivots"] = self._parse_pivots() 3892 3893 comments = [c for token in (method, side, kind) if token for c in token.comments] 3894 comments = (join_comments or []) + comments 3895 3896 if ( 3897 self.ADD_JOIN_ON_TRUE 3898 and not kwargs.get("on") 3899 and not kwargs.get("using") 3900 and not kwargs.get("method") 3901 and kwargs.get("kind") in (None, "INNER", "OUTER") 3902 ): 3903 kwargs["on"] = exp.true() 3904 3905 return self.expression(exp.Join, comments=comments, **kwargs) 3906 3907 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3908 this = self._parse_assignment() 3909 3910 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3911 return this 3912 3913 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3914 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3915 3916 return this 3917 3918 def _parse_index_params(self) -> exp.IndexParameters: 3919 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3920 3921 if self._match(TokenType.L_PAREN, advance=False): 3922 columns = self._parse_wrapped_csv(self._parse_with_operator) 3923 else: 3924 columns = None 3925 3926 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3927 partition_by = self._parse_partition_by() 3928 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3929 tablespace = ( 3930 self._parse_var(any_token=True) 3931 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3932 else None 3933 ) 3934 where = self._parse_where() 3935 3936 on = self._parse_field() if self._match(TokenType.ON) else None 3937 3938 return self.expression( 3939 exp.IndexParameters, 3940 using=using, 3941 columns=columns, 3942 include=include, 3943 partition_by=partition_by, 3944 where=where, 3945 with_storage=with_storage, 3946 tablespace=tablespace, 3947 on=on, 3948 ) 3949 3950 def _parse_index( 3951 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3952 ) -> t.Optional[exp.Index]: 3953 if index or anonymous: 3954 unique = None 3955 primary = None 3956 amp = None 3957 3958 self._match(TokenType.ON) 3959 self._match(TokenType.TABLE) # hive 3960 table = self._parse_table_parts(schema=True) 3961 else: 3962 unique = self._match(TokenType.UNIQUE) 3963 primary = self._match_text_seq("PRIMARY") 3964 amp = self._match_text_seq("AMP") 3965 3966 if not self._match(TokenType.INDEX): 3967 return None 3968 3969 index = self._parse_id_var() 3970 table = None 3971 3972 params = self._parse_index_params() 3973 3974 return self.expression( 3975 exp.Index, 3976 this=index, 3977 table=table, 3978 unique=unique, 3979 primary=primary, 3980 amp=amp, 3981 params=params, 3982 ) 3983 3984 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3985 hints: t.List[exp.Expression] = [] 3986 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3987 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3988 hints.append( 3989 self.expression( 3990 exp.WithTableHint, 3991 expressions=self._parse_csv( 3992 lambda: self._parse_function() or self._parse_var(any_token=True) 3993 ), 3994 ) 3995 ) 3996 self._match_r_paren() 3997 else: 3998 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3999 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4000 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4001 4002 self._match_set((TokenType.INDEX, TokenType.KEY)) 4003 if self._match(TokenType.FOR): 4004 hint.set("target", self._advance_any() and self._prev.text.upper()) 4005 4006 hint.set("expressions", self._parse_wrapped_id_vars()) 4007 hints.append(hint) 4008 4009 return hints or None 4010 4011 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4012 return ( 4013 (not schema and self._parse_function(optional_parens=False)) 4014 or self._parse_id_var(any_token=False) 4015 or self._parse_string_as_identifier() 4016 or self._parse_placeholder() 4017 ) 4018 4019 def _parse_table_parts( 4020 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4021 ) -> exp.Table: 4022 catalog = None 4023 db = None 4024 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4025 4026 while self._match(TokenType.DOT): 4027 if catalog: 4028 # This allows nesting the table in arbitrarily many dot expressions if needed 4029 table = self.expression( 4030 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4031 ) 4032 else: 4033 catalog = db 4034 db = table 4035 # "" used for tsql FROM a..b case 4036 table = self._parse_table_part(schema=schema) or "" 4037 4038 if ( 4039 wildcard 4040 and self._is_connected() 4041 and (isinstance(table, exp.Identifier) or not table) 4042 and self._match(TokenType.STAR) 4043 ): 4044 if isinstance(table, exp.Identifier): 4045 table.args["this"] += "*" 4046 else: 4047 table = exp.Identifier(this="*") 4048 4049 # We bubble up comments from the Identifier to the Table 4050 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4051 4052 if is_db_reference: 4053 catalog = db 4054 db = table 4055 table = None 4056 4057 if not table and not is_db_reference: 4058 self.raise_error(f"Expected table name but got {self._curr}") 4059 if not db and is_db_reference: 4060 self.raise_error(f"Expected database name but got {self._curr}") 4061 4062 table = self.expression( 4063 exp.Table, 4064 comments=comments, 4065 this=table, 4066 db=db, 4067 catalog=catalog, 4068 ) 4069 4070 changes = self._parse_changes() 4071 if changes: 4072 table.set("changes", changes) 4073 4074 at_before = self._parse_historical_data() 4075 if at_before: 4076 table.set("when", at_before) 4077 4078 pivots = self._parse_pivots() 4079 if pivots: 4080 table.set("pivots", pivots) 4081 4082 return table 4083 4084 def _parse_table( 4085 self, 4086 schema: bool = False, 4087 joins: bool = False, 4088 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4089 parse_bracket: bool = False, 4090 is_db_reference: bool = False, 4091 parse_partition: bool = False, 4092 consume_pipe: bool = False, 4093 ) -> t.Optional[exp.Expression]: 4094 lateral = self._parse_lateral() 4095 if lateral: 4096 return lateral 4097 4098 unnest = self._parse_unnest() 4099 if unnest: 4100 return unnest 4101 4102 values = self._parse_derived_table_values() 4103 if values: 4104 return values 4105 4106 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4107 if subquery: 4108 if not subquery.args.get("pivots"): 4109 subquery.set("pivots", self._parse_pivots()) 4110 return subquery 4111 4112 bracket = parse_bracket and self._parse_bracket(None) 4113 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4114 4115 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4116 self._parse_table 4117 ) 4118 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4119 4120 only = self._match(TokenType.ONLY) 4121 4122 this = t.cast( 4123 exp.Expression, 4124 bracket 4125 or rows_from 4126 or self._parse_bracket( 4127 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4128 ), 4129 ) 4130 4131 if only: 4132 this.set("only", only) 4133 4134 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4135 self._match_text_seq("*") 4136 4137 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4138 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4139 this.set("partition", self._parse_partition()) 4140 4141 if schema: 4142 return self._parse_schema(this=this) 4143 4144 version = self._parse_version() 4145 4146 if version: 4147 this.set("version", version) 4148 4149 if self.dialect.ALIAS_POST_TABLESAMPLE: 4150 this.set("sample", self._parse_table_sample()) 4151 4152 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4153 if alias: 4154 this.set("alias", alias) 4155 4156 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4157 return self.expression( 4158 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4159 ) 4160 4161 this.set("hints", self._parse_table_hints()) 4162 4163 if not this.args.get("pivots"): 4164 this.set("pivots", self._parse_pivots()) 4165 4166 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4167 this.set("sample", self._parse_table_sample()) 4168 4169 if joins: 4170 for join in self._parse_joins(): 4171 this.append("joins", join) 4172 4173 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4174 this.set("ordinality", True) 4175 this.set("alias", self._parse_table_alias()) 4176 4177 return this 4178 4179 def _parse_version(self) -> t.Optional[exp.Version]: 4180 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4181 this = "TIMESTAMP" 4182 elif self._match(TokenType.VERSION_SNAPSHOT): 4183 this = "VERSION" 4184 else: 4185 return None 4186 4187 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4188 kind = self._prev.text.upper() 4189 start = self._parse_bitwise() 4190 self._match_texts(("TO", "AND")) 4191 end = self._parse_bitwise() 4192 expression: t.Optional[exp.Expression] = self.expression( 4193 exp.Tuple, expressions=[start, end] 4194 ) 4195 elif self._match_text_seq("CONTAINED", "IN"): 4196 kind = "CONTAINED IN" 4197 expression = self.expression( 4198 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4199 ) 4200 elif self._match(TokenType.ALL): 4201 kind = "ALL" 4202 expression = None 4203 else: 4204 self._match_text_seq("AS", "OF") 4205 kind = "AS OF" 4206 expression = self._parse_type() 4207 4208 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4209 4210 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4211 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4212 index = self._index 4213 historical_data = None 4214 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4215 this = self._prev.text.upper() 4216 kind = ( 4217 self._match(TokenType.L_PAREN) 4218 and self._match_texts(self.HISTORICAL_DATA_KIND) 4219 and self._prev.text.upper() 4220 ) 4221 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4222 4223 if expression: 4224 self._match_r_paren() 4225 historical_data = self.expression( 4226 exp.HistoricalData, this=this, kind=kind, expression=expression 4227 ) 4228 else: 4229 self._retreat(index) 4230 4231 return historical_data 4232 4233 def _parse_changes(self) -> t.Optional[exp.Changes]: 4234 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4235 return None 4236 4237 information = self._parse_var(any_token=True) 4238 self._match_r_paren() 4239 4240 return self.expression( 4241 exp.Changes, 4242 information=information, 4243 at_before=self._parse_historical_data(), 4244 end=self._parse_historical_data(), 4245 ) 4246 4247 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4248 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4249 return None 4250 4251 self._advance() 4252 4253 expressions = self._parse_wrapped_csv(self._parse_equality) 4254 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4255 4256 alias = self._parse_table_alias() if with_alias else None 4257 4258 if alias: 4259 if self.dialect.UNNEST_COLUMN_ONLY: 4260 if alias.args.get("columns"): 4261 self.raise_error("Unexpected extra column alias in unnest.") 4262 4263 alias.set("columns", [alias.this]) 4264 alias.set("this", None) 4265 4266 columns = alias.args.get("columns") or [] 4267 if offset and len(expressions) < len(columns): 4268 offset = columns.pop() 4269 4270 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4271 self._match(TokenType.ALIAS) 4272 offset = self._parse_id_var( 4273 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4274 ) or exp.to_identifier("offset") 4275 4276 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4277 4278 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4279 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4280 if not is_derived and not ( 4281 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4282 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4283 ): 4284 return None 4285 4286 expressions = self._parse_csv(self._parse_value) 4287 alias = self._parse_table_alias() 4288 4289 if is_derived: 4290 self._match_r_paren() 4291 4292 return self.expression( 4293 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4294 ) 4295 4296 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4297 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4298 as_modifier and self._match_text_seq("USING", "SAMPLE") 4299 ): 4300 return None 4301 4302 bucket_numerator = None 4303 bucket_denominator = None 4304 bucket_field = None 4305 percent = None 4306 size = None 4307 seed = None 4308 4309 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4310 matched_l_paren = self._match(TokenType.L_PAREN) 4311 4312 if self.TABLESAMPLE_CSV: 4313 num = None 4314 expressions = self._parse_csv(self._parse_primary) 4315 else: 4316 expressions = None 4317 num = ( 4318 self._parse_factor() 4319 if self._match(TokenType.NUMBER, advance=False) 4320 else self._parse_primary() or self._parse_placeholder() 4321 ) 4322 4323 if self._match_text_seq("BUCKET"): 4324 bucket_numerator = self._parse_number() 4325 self._match_text_seq("OUT", "OF") 4326 bucket_denominator = bucket_denominator = self._parse_number() 4327 self._match(TokenType.ON) 4328 bucket_field = self._parse_field() 4329 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4330 percent = num 4331 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4332 size = num 4333 else: 4334 percent = num 4335 4336 if matched_l_paren: 4337 self._match_r_paren() 4338 4339 if self._match(TokenType.L_PAREN): 4340 method = self._parse_var(upper=True) 4341 seed = self._match(TokenType.COMMA) and self._parse_number() 4342 self._match_r_paren() 4343 elif self._match_texts(("SEED", "REPEATABLE")): 4344 seed = self._parse_wrapped(self._parse_number) 4345 4346 if not method and self.DEFAULT_SAMPLING_METHOD: 4347 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4348 4349 return self.expression( 4350 exp.TableSample, 4351 expressions=expressions, 4352 method=method, 4353 bucket_numerator=bucket_numerator, 4354 bucket_denominator=bucket_denominator, 4355 bucket_field=bucket_field, 4356 percent=percent, 4357 size=size, 4358 seed=seed, 4359 ) 4360 4361 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4362 return list(iter(self._parse_pivot, None)) or None 4363 4364 def _parse_joins(self) -> t.Iterator[exp.Join]: 4365 return iter(self._parse_join, None) 4366 4367 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4368 if not self._match(TokenType.INTO): 4369 return None 4370 4371 return self.expression( 4372 exp.UnpivotColumns, 4373 this=self._match_text_seq("NAME") and self._parse_column(), 4374 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4375 ) 4376 4377 # https://duckdb.org/docs/sql/statements/pivot 4378 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4379 def _parse_on() -> t.Optional[exp.Expression]: 4380 this = self._parse_bitwise() 4381 4382 if self._match(TokenType.IN): 4383 # PIVOT ... ON col IN (row_val1, row_val2) 4384 return self._parse_in(this) 4385 if self._match(TokenType.ALIAS, advance=False): 4386 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4387 return self._parse_alias(this) 4388 4389 return this 4390 4391 this = self._parse_table() 4392 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4393 into = self._parse_unpivot_columns() 4394 using = self._match(TokenType.USING) and self._parse_csv( 4395 lambda: self._parse_alias(self._parse_function()) 4396 ) 4397 group = self._parse_group() 4398 4399 return self.expression( 4400 exp.Pivot, 4401 this=this, 4402 expressions=expressions, 4403 using=using, 4404 group=group, 4405 unpivot=is_unpivot, 4406 into=into, 4407 ) 4408 4409 def _parse_pivot_in(self) -> exp.In: 4410 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4411 this = self._parse_select_or_expression() 4412 4413 self._match(TokenType.ALIAS) 4414 alias = self._parse_bitwise() 4415 if alias: 4416 if isinstance(alias, exp.Column) and not alias.db: 4417 alias = alias.this 4418 return self.expression(exp.PivotAlias, this=this, alias=alias) 4419 4420 return this 4421 4422 value = self._parse_column() 4423 4424 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4425 self.raise_error("Expecting IN (") 4426 4427 if self._match(TokenType.ANY): 4428 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4429 else: 4430 exprs = self._parse_csv(_parse_aliased_expression) 4431 4432 self._match_r_paren() 4433 return self.expression(exp.In, this=value, expressions=exprs) 4434 4435 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4436 func = self._parse_function() 4437 if not func: 4438 if self._prev and self._prev.token_type == TokenType.COMMA: 4439 return None 4440 self.raise_error("Expecting an aggregation function in PIVOT") 4441 4442 return self._parse_alias(func) 4443 4444 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4445 index = self._index 4446 include_nulls = None 4447 4448 if self._match(TokenType.PIVOT): 4449 unpivot = False 4450 elif self._match(TokenType.UNPIVOT): 4451 unpivot = True 4452 4453 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4454 if self._match_text_seq("INCLUDE", "NULLS"): 4455 include_nulls = True 4456 elif self._match_text_seq("EXCLUDE", "NULLS"): 4457 include_nulls = False 4458 else: 4459 return None 4460 4461 expressions = [] 4462 4463 if not self._match(TokenType.L_PAREN): 4464 self._retreat(index) 4465 return None 4466 4467 if unpivot: 4468 expressions = self._parse_csv(self._parse_column) 4469 else: 4470 expressions = self._parse_csv(self._parse_pivot_aggregation) 4471 4472 if not expressions: 4473 self.raise_error("Failed to parse PIVOT's aggregation list") 4474 4475 if not self._match(TokenType.FOR): 4476 self.raise_error("Expecting FOR") 4477 4478 fields = [] 4479 while True: 4480 field = self._try_parse(self._parse_pivot_in) 4481 if not field: 4482 break 4483 fields.append(field) 4484 4485 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4486 self._parse_bitwise 4487 ) 4488 4489 group = self._parse_group() 4490 4491 self._match_r_paren() 4492 4493 pivot = self.expression( 4494 exp.Pivot, 4495 expressions=expressions, 4496 fields=fields, 4497 unpivot=unpivot, 4498 include_nulls=include_nulls, 4499 default_on_null=default_on_null, 4500 group=group, 4501 ) 4502 4503 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4504 pivot.set("alias", self._parse_table_alias()) 4505 4506 if not unpivot: 4507 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4508 4509 columns: t.List[exp.Expression] = [] 4510 all_fields = [] 4511 for pivot_field in pivot.fields: 4512 pivot_field_expressions = pivot_field.expressions 4513 4514 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4515 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4516 continue 4517 4518 all_fields.append( 4519 [ 4520 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4521 for fld in pivot_field_expressions 4522 ] 4523 ) 4524 4525 if all_fields: 4526 if names: 4527 all_fields.append(names) 4528 4529 # Generate all possible combinations of the pivot columns 4530 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4531 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4532 for fld_parts_tuple in itertools.product(*all_fields): 4533 fld_parts = list(fld_parts_tuple) 4534 4535 if names and self.PREFIXED_PIVOT_COLUMNS: 4536 # Move the "name" to the front of the list 4537 fld_parts.insert(0, fld_parts.pop(-1)) 4538 4539 columns.append(exp.to_identifier("_".join(fld_parts))) 4540 4541 pivot.set("columns", columns) 4542 4543 return pivot 4544 4545 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4546 return [agg.alias for agg in aggregations if agg.alias] 4547 4548 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4549 if not skip_where_token and not self._match(TokenType.PREWHERE): 4550 return None 4551 4552 return self.expression( 4553 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4554 ) 4555 4556 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4557 if not skip_where_token and not self._match(TokenType.WHERE): 4558 return None 4559 4560 return self.expression( 4561 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4562 ) 4563 4564 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4565 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4566 return None 4567 comments = self._prev_comments 4568 4569 elements: t.Dict[str, t.Any] = defaultdict(list) 4570 4571 if self._match(TokenType.ALL): 4572 elements["all"] = True 4573 elif self._match(TokenType.DISTINCT): 4574 elements["all"] = False 4575 4576 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4577 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4578 4579 while True: 4580 index = self._index 4581 4582 elements["expressions"].extend( 4583 self._parse_csv( 4584 lambda: None 4585 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4586 else self._parse_assignment() 4587 ) 4588 ) 4589 4590 before_with_index = self._index 4591 with_prefix = self._match(TokenType.WITH) 4592 4593 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4594 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4595 elements[key].append(cube_or_rollup) 4596 elif grouping_sets := self._parse_grouping_sets(): 4597 elements["grouping_sets"].append(grouping_sets) 4598 elif self._match_text_seq("TOTALS"): 4599 elements["totals"] = True # type: ignore 4600 4601 if before_with_index <= self._index <= before_with_index + 1: 4602 self._retreat(before_with_index) 4603 break 4604 4605 if index == self._index: 4606 break 4607 4608 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4609 4610 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4611 if self._match(TokenType.CUBE): 4612 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4613 elif self._match(TokenType.ROLLUP): 4614 kind = exp.Rollup 4615 else: 4616 return None 4617 4618 return self.expression( 4619 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4620 ) 4621 4622 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4623 if self._match(TokenType.GROUPING_SETS): 4624 return self.expression( 4625 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4626 ) 4627 return None 4628 4629 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4630 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4631 4632 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4633 if not skip_having_token and not self._match(TokenType.HAVING): 4634 return None 4635 return self.expression( 4636 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4637 ) 4638 4639 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4640 if not self._match(TokenType.QUALIFY): 4641 return None 4642 return self.expression(exp.Qualify, this=self._parse_assignment()) 4643 4644 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4645 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4646 exp.Prior, this=self._parse_bitwise() 4647 ) 4648 connect = self._parse_assignment() 4649 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4650 return connect 4651 4652 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4653 if skip_start_token: 4654 start = None 4655 elif self._match(TokenType.START_WITH): 4656 start = self._parse_assignment() 4657 else: 4658 return None 4659 4660 self._match(TokenType.CONNECT_BY) 4661 nocycle = self._match_text_seq("NOCYCLE") 4662 connect = self._parse_connect_with_prior() 4663 4664 if not start and self._match(TokenType.START_WITH): 4665 start = self._parse_assignment() 4666 4667 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4668 4669 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4670 this = self._parse_id_var(any_token=True) 4671 if self._match(TokenType.ALIAS): 4672 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4673 return this 4674 4675 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4676 if self._match_text_seq("INTERPOLATE"): 4677 return self._parse_wrapped_csv(self._parse_name_as_expression) 4678 return None 4679 4680 def _parse_order( 4681 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4682 ) -> t.Optional[exp.Expression]: 4683 siblings = None 4684 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4685 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4686 return this 4687 4688 siblings = True 4689 4690 return self.expression( 4691 exp.Order, 4692 comments=self._prev_comments, 4693 this=this, 4694 expressions=self._parse_csv(self._parse_ordered), 4695 siblings=siblings, 4696 ) 4697 4698 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4699 if not self._match(token): 4700 return None 4701 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4702 4703 def _parse_ordered( 4704 self, parse_method: t.Optional[t.Callable] = None 4705 ) -> t.Optional[exp.Ordered]: 4706 this = parse_method() if parse_method else self._parse_assignment() 4707 if not this: 4708 return None 4709 4710 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4711 this = exp.var("ALL") 4712 4713 asc = self._match(TokenType.ASC) 4714 desc = self._match(TokenType.DESC) or (asc and False) 4715 4716 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4717 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4718 4719 nulls_first = is_nulls_first or False 4720 explicitly_null_ordered = is_nulls_first or is_nulls_last 4721 4722 if ( 4723 not explicitly_null_ordered 4724 and ( 4725 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4726 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4727 ) 4728 and self.dialect.NULL_ORDERING != "nulls_are_last" 4729 ): 4730 nulls_first = True 4731 4732 if self._match_text_seq("WITH", "FILL"): 4733 with_fill = self.expression( 4734 exp.WithFill, 4735 **{ # type: ignore 4736 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4737 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4738 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4739 "interpolate": self._parse_interpolate(), 4740 }, 4741 ) 4742 else: 4743 with_fill = None 4744 4745 return self.expression( 4746 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4747 ) 4748 4749 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4750 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4751 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4752 self._match_text_seq("ONLY") 4753 with_ties = self._match_text_seq("WITH", "TIES") 4754 4755 if not (percent or rows or with_ties): 4756 return None 4757 4758 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4759 4760 def _parse_limit( 4761 self, 4762 this: t.Optional[exp.Expression] = None, 4763 top: bool = False, 4764 skip_limit_token: bool = False, 4765 ) -> t.Optional[exp.Expression]: 4766 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4767 comments = self._prev_comments 4768 if top: 4769 limit_paren = self._match(TokenType.L_PAREN) 4770 expression = self._parse_term() if limit_paren else self._parse_number() 4771 4772 if limit_paren: 4773 self._match_r_paren() 4774 4775 else: 4776 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4777 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4778 # consume the factor plus parse the percentage separately 4779 expression = self._try_parse(self._parse_term) or self._parse_factor() 4780 4781 limit_options = self._parse_limit_options() 4782 4783 if self._match(TokenType.COMMA): 4784 offset = expression 4785 expression = self._parse_term() 4786 else: 4787 offset = None 4788 4789 limit_exp = self.expression( 4790 exp.Limit, 4791 this=this, 4792 expression=expression, 4793 offset=offset, 4794 comments=comments, 4795 limit_options=limit_options, 4796 expressions=self._parse_limit_by(), 4797 ) 4798 4799 return limit_exp 4800 4801 if self._match(TokenType.FETCH): 4802 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4803 direction = self._prev.text.upper() if direction else "FIRST" 4804 4805 count = self._parse_field(tokens=self.FETCH_TOKENS) 4806 4807 return self.expression( 4808 exp.Fetch, 4809 direction=direction, 4810 count=count, 4811 limit_options=self._parse_limit_options(), 4812 ) 4813 4814 return this 4815 4816 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4817 if not self._match(TokenType.OFFSET): 4818 return this 4819 4820 count = self._parse_term() 4821 self._match_set((TokenType.ROW, TokenType.ROWS)) 4822 4823 return self.expression( 4824 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4825 ) 4826 4827 def _can_parse_limit_or_offset(self) -> bool: 4828 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4829 return False 4830 4831 index = self._index 4832 result = bool( 4833 self._try_parse(self._parse_limit, retreat=True) 4834 or self._try_parse(self._parse_offset, retreat=True) 4835 ) 4836 self._retreat(index) 4837 return result 4838 4839 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4840 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4841 4842 def _parse_locks(self) -> t.List[exp.Lock]: 4843 locks = [] 4844 while True: 4845 update, key = None, None 4846 if self._match_text_seq("FOR", "UPDATE"): 4847 update = True 4848 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4849 "LOCK", "IN", "SHARE", "MODE" 4850 ): 4851 update = False 4852 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4853 update, key = False, True 4854 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4855 update, key = True, True 4856 else: 4857 break 4858 4859 expressions = None 4860 if self._match_text_seq("OF"): 4861 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4862 4863 wait: t.Optional[bool | exp.Expression] = None 4864 if self._match_text_seq("NOWAIT"): 4865 wait = True 4866 elif self._match_text_seq("WAIT"): 4867 wait = self._parse_primary() 4868 elif self._match_text_seq("SKIP", "LOCKED"): 4869 wait = False 4870 4871 locks.append( 4872 self.expression( 4873 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4874 ) 4875 ) 4876 4877 return locks 4878 4879 def parse_set_operation( 4880 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4881 ) -> t.Optional[exp.Expression]: 4882 start = self._index 4883 _, side_token, kind_token = self._parse_join_parts() 4884 4885 side = side_token.text if side_token else None 4886 kind = kind_token.text if kind_token else None 4887 4888 if not self._match_set(self.SET_OPERATIONS): 4889 self._retreat(start) 4890 return None 4891 4892 token_type = self._prev.token_type 4893 4894 if token_type == TokenType.UNION: 4895 operation: t.Type[exp.SetOperation] = exp.Union 4896 elif token_type == TokenType.EXCEPT: 4897 operation = exp.Except 4898 else: 4899 operation = exp.Intersect 4900 4901 comments = self._prev.comments 4902 4903 if self._match(TokenType.DISTINCT): 4904 distinct: t.Optional[bool] = True 4905 elif self._match(TokenType.ALL): 4906 distinct = False 4907 else: 4908 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4909 if distinct is None: 4910 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4911 4912 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4913 "STRICT", "CORRESPONDING" 4914 ) 4915 if self._match_text_seq("CORRESPONDING"): 4916 by_name = True 4917 if not side and not kind: 4918 kind = "INNER" 4919 4920 on_column_list = None 4921 if by_name and self._match_texts(("ON", "BY")): 4922 on_column_list = self._parse_wrapped_csv(self._parse_column) 4923 4924 expression = self._parse_select( 4925 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4926 ) 4927 4928 return self.expression( 4929 operation, 4930 comments=comments, 4931 this=this, 4932 distinct=distinct, 4933 by_name=by_name, 4934 expression=expression, 4935 side=side, 4936 kind=kind, 4937 on=on_column_list, 4938 ) 4939 4940 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4941 while this: 4942 setop = self.parse_set_operation(this) 4943 if not setop: 4944 break 4945 this = setop 4946 4947 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4948 expression = this.expression 4949 4950 if expression: 4951 for arg in self.SET_OP_MODIFIERS: 4952 expr = expression.args.get(arg) 4953 if expr: 4954 this.set(arg, expr.pop()) 4955 4956 return this 4957 4958 def _parse_expression(self) -> t.Optional[exp.Expression]: 4959 return self._parse_alias(self._parse_assignment()) 4960 4961 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4962 this = self._parse_disjunction() 4963 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4964 # This allows us to parse <non-identifier token> := <expr> 4965 this = exp.column( 4966 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4967 ) 4968 4969 while self._match_set(self.ASSIGNMENT): 4970 if isinstance(this, exp.Column) and len(this.parts) == 1: 4971 this = this.this 4972 4973 this = self.expression( 4974 self.ASSIGNMENT[self._prev.token_type], 4975 this=this, 4976 comments=self._prev_comments, 4977 expression=self._parse_assignment(), 4978 ) 4979 4980 return this 4981 4982 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4983 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4984 4985 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4986 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4987 4988 def _parse_equality(self) -> t.Optional[exp.Expression]: 4989 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4990 4991 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4992 return self._parse_tokens(self._parse_range, self.COMPARISON) 4993 4994 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4995 this = this or self._parse_bitwise() 4996 negate = self._match(TokenType.NOT) 4997 4998 if self._match_set(self.RANGE_PARSERS): 4999 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5000 if not expression: 5001 return this 5002 5003 this = expression 5004 elif self._match(TokenType.ISNULL): 5005 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5006 5007 # Postgres supports ISNULL and NOTNULL for conditions. 5008 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5009 if self._match(TokenType.NOTNULL): 5010 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5011 this = self.expression(exp.Not, this=this) 5012 5013 if negate: 5014 this = self._negate_range(this) 5015 5016 if self._match(TokenType.IS): 5017 this = self._parse_is(this) 5018 5019 return this 5020 5021 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5022 if not this: 5023 return this 5024 5025 return self.expression(exp.Not, this=this) 5026 5027 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5028 index = self._index - 1 5029 negate = self._match(TokenType.NOT) 5030 5031 if self._match_text_seq("DISTINCT", "FROM"): 5032 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5033 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5034 5035 if self._match(TokenType.JSON): 5036 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5037 5038 if self._match_text_seq("WITH"): 5039 _with = True 5040 elif self._match_text_seq("WITHOUT"): 5041 _with = False 5042 else: 5043 _with = None 5044 5045 unique = self._match(TokenType.UNIQUE) 5046 self._match_text_seq("KEYS") 5047 expression: t.Optional[exp.Expression] = self.expression( 5048 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5049 ) 5050 else: 5051 expression = self._parse_primary() or self._parse_null() 5052 if not expression: 5053 self._retreat(index) 5054 return None 5055 5056 this = self.expression(exp.Is, this=this, expression=expression) 5057 return self.expression(exp.Not, this=this) if negate else this 5058 5059 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5060 unnest = self._parse_unnest(with_alias=False) 5061 if unnest: 5062 this = self.expression(exp.In, this=this, unnest=unnest) 5063 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5064 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5065 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5066 5067 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5068 this = self.expression( 5069 exp.In, 5070 this=this, 5071 query=self._parse_query_modifiers(query).subquery(copy=False), 5072 ) 5073 else: 5074 this = self.expression(exp.In, this=this, expressions=expressions) 5075 5076 if matched_l_paren: 5077 self._match_r_paren(this) 5078 elif not self._match(TokenType.R_BRACKET, expression=this): 5079 self.raise_error("Expecting ]") 5080 else: 5081 this = self.expression(exp.In, this=this, field=self._parse_column()) 5082 5083 return this 5084 5085 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5086 symmetric = None 5087 if self._match_text_seq("SYMMETRIC"): 5088 symmetric = True 5089 elif self._match_text_seq("ASYMMETRIC"): 5090 symmetric = False 5091 5092 low = self._parse_bitwise() 5093 self._match(TokenType.AND) 5094 high = self._parse_bitwise() 5095 5096 return self.expression( 5097 exp.Between, 5098 this=this, 5099 low=low, 5100 high=high, 5101 symmetric=symmetric, 5102 ) 5103 5104 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5105 if not self._match(TokenType.ESCAPE): 5106 return this 5107 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5108 5109 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5110 index = self._index 5111 5112 if not self._match(TokenType.INTERVAL) and match_interval: 5113 return None 5114 5115 if self._match(TokenType.STRING, advance=False): 5116 this = self._parse_primary() 5117 else: 5118 this = self._parse_term() 5119 5120 if not this or ( 5121 isinstance(this, exp.Column) 5122 and not this.table 5123 and not this.this.quoted 5124 and self._curr 5125 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5126 ): 5127 self._retreat(index) 5128 return None 5129 5130 # handle day-time format interval span with omitted units: 5131 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5132 interval_span_units_omitted = None 5133 if ( 5134 this 5135 and this.is_string 5136 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5137 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5138 ): 5139 index = self._index 5140 5141 # Var "TO" Var 5142 first_unit = self._parse_var(any_token=True, upper=True) 5143 second_unit = None 5144 if first_unit and self._match_text_seq("TO"): 5145 second_unit = self._parse_var(any_token=True, upper=True) 5146 5147 interval_span_units_omitted = not (first_unit and second_unit) 5148 5149 self._retreat(index) 5150 5151 unit = ( 5152 None 5153 if interval_span_units_omitted 5154 else ( 5155 self._parse_function() 5156 or ( 5157 not self._match(TokenType.ALIAS, advance=False) 5158 and self._parse_var(any_token=True, upper=True) 5159 ) 5160 ) 5161 ) 5162 5163 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5164 # each INTERVAL expression into this canonical form so it's easy to transpile 5165 if this and this.is_number: 5166 this = exp.Literal.string(this.to_py()) 5167 elif this and this.is_string: 5168 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5169 if parts and unit: 5170 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5171 unit = None 5172 self._retreat(self._index - 1) 5173 5174 if len(parts) == 1: 5175 this = exp.Literal.string(parts[0][0]) 5176 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5177 5178 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5179 unit = self.expression( 5180 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5181 ) 5182 5183 interval = self.expression(exp.Interval, this=this, unit=unit) 5184 5185 index = self._index 5186 self._match(TokenType.PLUS) 5187 5188 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5189 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5190 return self.expression( 5191 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5192 ) 5193 5194 self._retreat(index) 5195 return interval 5196 5197 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5198 this = self._parse_term() 5199 5200 while True: 5201 if self._match_set(self.BITWISE): 5202 this = self.expression( 5203 self.BITWISE[self._prev.token_type], 5204 this=this, 5205 expression=self._parse_term(), 5206 ) 5207 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5208 this = self.expression( 5209 exp.DPipe, 5210 this=this, 5211 expression=self._parse_term(), 5212 safe=not self.dialect.STRICT_STRING_CONCAT, 5213 ) 5214 elif self._match(TokenType.DQMARK): 5215 this = self.expression( 5216 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5217 ) 5218 elif self._match_pair(TokenType.LT, TokenType.LT): 5219 this = self.expression( 5220 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5221 ) 5222 elif self._match_pair(TokenType.GT, TokenType.GT): 5223 this = self.expression( 5224 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5225 ) 5226 else: 5227 break 5228 5229 return this 5230 5231 def _parse_term(self) -> t.Optional[exp.Expression]: 5232 this = self._parse_factor() 5233 5234 while self._match_set(self.TERM): 5235 klass = self.TERM[self._prev.token_type] 5236 comments = self._prev_comments 5237 expression = self._parse_factor() 5238 5239 this = self.expression(klass, this=this, comments=comments, expression=expression) 5240 5241 if isinstance(this, exp.Collate): 5242 expr = this.expression 5243 5244 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5245 # fallback to Identifier / Var 5246 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5247 ident = expr.this 5248 if isinstance(ident, exp.Identifier): 5249 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5250 5251 return this 5252 5253 def _parse_factor(self) -> t.Optional[exp.Expression]: 5254 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5255 this = parse_method() 5256 5257 while self._match_set(self.FACTOR): 5258 klass = self.FACTOR[self._prev.token_type] 5259 comments = self._prev_comments 5260 expression = parse_method() 5261 5262 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5263 self._retreat(self._index - 1) 5264 return this 5265 5266 this = self.expression(klass, this=this, comments=comments, expression=expression) 5267 5268 if isinstance(this, exp.Div): 5269 this.args["typed"] = self.dialect.TYPED_DIVISION 5270 this.args["safe"] = self.dialect.SAFE_DIVISION 5271 5272 return this 5273 5274 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5275 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5276 5277 def _parse_unary(self) -> t.Optional[exp.Expression]: 5278 if self._match_set(self.UNARY_PARSERS): 5279 return self.UNARY_PARSERS[self._prev.token_type](self) 5280 return self._parse_at_time_zone(self._parse_type()) 5281 5282 def _parse_type( 5283 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5284 ) -> t.Optional[exp.Expression]: 5285 interval = parse_interval and self._parse_interval() 5286 if interval: 5287 return interval 5288 5289 index = self._index 5290 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5291 5292 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5293 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5294 if isinstance(data_type, exp.Cast): 5295 # This constructor can contain ops directly after it, for instance struct unnesting: 5296 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5297 return self._parse_column_ops(data_type) 5298 5299 if data_type: 5300 index2 = self._index 5301 this = self._parse_primary() 5302 5303 if isinstance(this, exp.Literal): 5304 literal = this.name 5305 this = self._parse_column_ops(this) 5306 5307 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5308 if parser: 5309 return parser(self, this, data_type) 5310 5311 if ( 5312 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5313 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5314 and TIME_ZONE_RE.search(literal) 5315 ): 5316 data_type = exp.DataType.build("TIMESTAMPTZ") 5317 5318 return self.expression(exp.Cast, this=this, to=data_type) 5319 5320 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5321 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5322 # 5323 # If the index difference here is greater than 1, that means the parser itself must have 5324 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5325 # 5326 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5327 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5328 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5329 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5330 # 5331 # In these cases, we don't really want to return the converted type, but instead retreat 5332 # and try to parse a Column or Identifier in the section below. 5333 if data_type.expressions and index2 - index > 1: 5334 self._retreat(index2) 5335 return self._parse_column_ops(data_type) 5336 5337 self._retreat(index) 5338 5339 if fallback_to_identifier: 5340 return self._parse_id_var() 5341 5342 this = self._parse_column() 5343 return this and self._parse_column_ops(this) 5344 5345 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5346 this = self._parse_type() 5347 if not this: 5348 return None 5349 5350 if isinstance(this, exp.Column) and not this.table: 5351 this = exp.var(this.name.upper()) 5352 5353 return self.expression( 5354 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5355 ) 5356 5357 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5358 type_name = identifier.name 5359 5360 while self._match(TokenType.DOT): 5361 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5362 5363 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5364 5365 def _parse_types( 5366 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5367 ) -> t.Optional[exp.Expression]: 5368 index = self._index 5369 5370 this: t.Optional[exp.Expression] = None 5371 prefix = self._match_text_seq("SYSUDTLIB", ".") 5372 5373 if self._match_set(self.TYPE_TOKENS): 5374 type_token = self._prev.token_type 5375 else: 5376 type_token = None 5377 identifier = allow_identifiers and self._parse_id_var( 5378 any_token=False, tokens=(TokenType.VAR,) 5379 ) 5380 if isinstance(identifier, exp.Identifier): 5381 try: 5382 tokens = self.dialect.tokenize(identifier.name) 5383 except TokenError: 5384 tokens = None 5385 5386 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5387 type_token = tokens[0].token_type 5388 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5389 this = self._parse_user_defined_type(identifier) 5390 else: 5391 self._retreat(self._index - 1) 5392 return None 5393 else: 5394 return None 5395 5396 if type_token == TokenType.PSEUDO_TYPE: 5397 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5398 5399 if type_token == TokenType.OBJECT_IDENTIFIER: 5400 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5401 5402 # https://materialize.com/docs/sql/types/map/ 5403 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5404 key_type = self._parse_types( 5405 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5406 ) 5407 if not self._match(TokenType.FARROW): 5408 self._retreat(index) 5409 return None 5410 5411 value_type = self._parse_types( 5412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5413 ) 5414 if not self._match(TokenType.R_BRACKET): 5415 self._retreat(index) 5416 return None 5417 5418 return exp.DataType( 5419 this=exp.DataType.Type.MAP, 5420 expressions=[key_type, value_type], 5421 nested=True, 5422 prefix=prefix, 5423 ) 5424 5425 nested = type_token in self.NESTED_TYPE_TOKENS 5426 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5427 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5428 expressions = None 5429 maybe_func = False 5430 5431 if self._match(TokenType.L_PAREN): 5432 if is_struct: 5433 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5434 elif nested: 5435 expressions = self._parse_csv( 5436 lambda: self._parse_types( 5437 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5438 ) 5439 ) 5440 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5441 this = expressions[0] 5442 this.set("nullable", True) 5443 self._match_r_paren() 5444 return this 5445 elif type_token in self.ENUM_TYPE_TOKENS: 5446 expressions = self._parse_csv(self._parse_equality) 5447 elif is_aggregate: 5448 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5449 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5450 ) 5451 if not func_or_ident: 5452 return None 5453 expressions = [func_or_ident] 5454 if self._match(TokenType.COMMA): 5455 expressions.extend( 5456 self._parse_csv( 5457 lambda: self._parse_types( 5458 check_func=check_func, 5459 schema=schema, 5460 allow_identifiers=allow_identifiers, 5461 ) 5462 ) 5463 ) 5464 else: 5465 expressions = self._parse_csv(self._parse_type_size) 5466 5467 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5468 if type_token == TokenType.VECTOR and len(expressions) == 2: 5469 expressions = self._parse_vector_expressions(expressions) 5470 5471 if not self._match(TokenType.R_PAREN): 5472 self._retreat(index) 5473 return None 5474 5475 maybe_func = True 5476 5477 values: t.Optional[t.List[exp.Expression]] = None 5478 5479 if nested and self._match(TokenType.LT): 5480 if is_struct: 5481 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5482 else: 5483 expressions = self._parse_csv( 5484 lambda: self._parse_types( 5485 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5486 ) 5487 ) 5488 5489 if not self._match(TokenType.GT): 5490 self.raise_error("Expecting >") 5491 5492 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5493 values = self._parse_csv(self._parse_assignment) 5494 if not values and is_struct: 5495 values = None 5496 self._retreat(self._index - 1) 5497 else: 5498 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5499 5500 if type_token in self.TIMESTAMPS: 5501 if self._match_text_seq("WITH", "TIME", "ZONE"): 5502 maybe_func = False 5503 tz_type = ( 5504 exp.DataType.Type.TIMETZ 5505 if type_token in self.TIMES 5506 else exp.DataType.Type.TIMESTAMPTZ 5507 ) 5508 this = exp.DataType(this=tz_type, expressions=expressions) 5509 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5510 maybe_func = False 5511 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5512 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5513 maybe_func = False 5514 elif type_token == TokenType.INTERVAL: 5515 unit = self._parse_var(upper=True) 5516 if unit: 5517 if self._match_text_seq("TO"): 5518 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5519 5520 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5521 else: 5522 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5523 elif type_token == TokenType.VOID: 5524 this = exp.DataType(this=exp.DataType.Type.NULL) 5525 5526 if maybe_func and check_func: 5527 index2 = self._index 5528 peek = self._parse_string() 5529 5530 if not peek: 5531 self._retreat(index) 5532 return None 5533 5534 self._retreat(index2) 5535 5536 if not this: 5537 if self._match_text_seq("UNSIGNED"): 5538 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5539 if not unsigned_type_token: 5540 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5541 5542 type_token = unsigned_type_token or type_token 5543 5544 # NULLABLE without parentheses can be a column (Presto/Trino) 5545 if type_token == TokenType.NULLABLE and not expressions: 5546 self._retreat(index) 5547 return None 5548 5549 this = exp.DataType( 5550 this=exp.DataType.Type[type_token.value], 5551 expressions=expressions, 5552 nested=nested, 5553 prefix=prefix, 5554 ) 5555 5556 # Empty arrays/structs are allowed 5557 if values is not None: 5558 cls = exp.Struct if is_struct else exp.Array 5559 this = exp.cast(cls(expressions=values), this, copy=False) 5560 5561 elif expressions: 5562 this.set("expressions", expressions) 5563 5564 # https://materialize.com/docs/sql/types/list/#type-name 5565 while self._match(TokenType.LIST): 5566 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5567 5568 index = self._index 5569 5570 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5571 matched_array = self._match(TokenType.ARRAY) 5572 5573 while self._curr: 5574 datatype_token = self._prev.token_type 5575 matched_l_bracket = self._match(TokenType.L_BRACKET) 5576 5577 if (not matched_l_bracket and not matched_array) or ( 5578 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5579 ): 5580 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5581 # not to be confused with the fixed size array parsing 5582 break 5583 5584 matched_array = False 5585 values = self._parse_csv(self._parse_assignment) or None 5586 if ( 5587 values 5588 and not schema 5589 and ( 5590 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5591 ) 5592 ): 5593 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5594 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5595 self._retreat(index) 5596 break 5597 5598 this = exp.DataType( 5599 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5600 ) 5601 self._match(TokenType.R_BRACKET) 5602 5603 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5604 converter = self.TYPE_CONVERTERS.get(this.this) 5605 if converter: 5606 this = converter(t.cast(exp.DataType, this)) 5607 5608 return this 5609 5610 def _parse_vector_expressions( 5611 self, expressions: t.List[exp.Expression] 5612 ) -> t.List[exp.Expression]: 5613 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5614 5615 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5616 index = self._index 5617 5618 if ( 5619 self._curr 5620 and self._next 5621 and self._curr.token_type in self.TYPE_TOKENS 5622 and self._next.token_type in self.TYPE_TOKENS 5623 ): 5624 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5625 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5626 this = self._parse_id_var() 5627 else: 5628 this = ( 5629 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5630 or self._parse_id_var() 5631 ) 5632 5633 self._match(TokenType.COLON) 5634 5635 if ( 5636 type_required 5637 and not isinstance(this, exp.DataType) 5638 and not self._match_set(self.TYPE_TOKENS, advance=False) 5639 ): 5640 self._retreat(index) 5641 return self._parse_types() 5642 5643 return self._parse_column_def(this) 5644 5645 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5646 if not self._match_text_seq("AT", "TIME", "ZONE"): 5647 return this 5648 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5649 5650 def _parse_column(self) -> t.Optional[exp.Expression]: 5651 this = self._parse_column_reference() 5652 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5653 5654 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5655 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5656 5657 return column 5658 5659 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5660 this = self._parse_field() 5661 if ( 5662 not this 5663 and self._match(TokenType.VALUES, advance=False) 5664 and self.VALUES_FOLLOWED_BY_PAREN 5665 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5666 ): 5667 this = self._parse_id_var() 5668 5669 if isinstance(this, exp.Identifier): 5670 # We bubble up comments from the Identifier to the Column 5671 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5672 5673 return this 5674 5675 def _parse_colon_as_variant_extract( 5676 self, this: t.Optional[exp.Expression] 5677 ) -> t.Optional[exp.Expression]: 5678 casts = [] 5679 json_path = [] 5680 escape = None 5681 5682 while self._match(TokenType.COLON): 5683 start_index = self._index 5684 5685 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5686 path = self._parse_column_ops( 5687 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5688 ) 5689 5690 # The cast :: operator has a lower precedence than the extraction operator :, so 5691 # we rearrange the AST appropriately to avoid casting the JSON path 5692 while isinstance(path, exp.Cast): 5693 casts.append(path.to) 5694 path = path.this 5695 5696 if casts: 5697 dcolon_offset = next( 5698 i 5699 for i, t in enumerate(self._tokens[start_index:]) 5700 if t.token_type == TokenType.DCOLON 5701 ) 5702 end_token = self._tokens[start_index + dcolon_offset - 1] 5703 else: 5704 end_token = self._prev 5705 5706 if path: 5707 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5708 # it'll roundtrip to a string literal in GET_PATH 5709 if isinstance(path, exp.Identifier) and path.quoted: 5710 escape = True 5711 5712 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5713 5714 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5715 # Databricks transforms it back to the colon/dot notation 5716 if json_path: 5717 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5718 5719 if json_path_expr: 5720 json_path_expr.set("escape", escape) 5721 5722 this = self.expression( 5723 exp.JSONExtract, 5724 this=this, 5725 expression=json_path_expr, 5726 variant_extract=True, 5727 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5728 ) 5729 5730 while casts: 5731 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5732 5733 return this 5734 5735 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5736 return self._parse_types() 5737 5738 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5739 this = self._parse_bracket(this) 5740 5741 while self._match_set(self.COLUMN_OPERATORS): 5742 op_token = self._prev.token_type 5743 op = self.COLUMN_OPERATORS.get(op_token) 5744 5745 if op_token in self.CAST_COLUMN_OPERATORS: 5746 field = self._parse_dcolon() 5747 if not field: 5748 self.raise_error("Expected type") 5749 elif op and self._curr: 5750 field = self._parse_column_reference() or self._parse_bitwise() 5751 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5752 field = self._parse_column_ops(field) 5753 else: 5754 field = self._parse_field(any_token=True, anonymous_func=True) 5755 5756 # Function calls can be qualified, e.g., x.y.FOO() 5757 # This converts the final AST to a series of Dots leading to the function call 5758 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5759 if isinstance(field, (exp.Func, exp.Window)) and this: 5760 this = this.transform( 5761 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5762 ) 5763 5764 if op: 5765 this = op(self, this, field) 5766 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5767 this = self.expression( 5768 exp.Column, 5769 comments=this.comments, 5770 this=field, 5771 table=this.this, 5772 db=this.args.get("table"), 5773 catalog=this.args.get("db"), 5774 ) 5775 elif isinstance(field, exp.Window): 5776 # Move the exp.Dot's to the window's function 5777 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5778 field.set("this", window_func) 5779 this = field 5780 else: 5781 this = self.expression(exp.Dot, this=this, expression=field) 5782 5783 if field and field.comments: 5784 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5785 5786 this = self._parse_bracket(this) 5787 5788 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5789 5790 def _parse_paren(self) -> t.Optional[exp.Expression]: 5791 if not self._match(TokenType.L_PAREN): 5792 return None 5793 5794 comments = self._prev_comments 5795 query = self._parse_select() 5796 5797 if query: 5798 expressions = [query] 5799 else: 5800 expressions = self._parse_expressions() 5801 5802 this = seq_get(expressions, 0) 5803 5804 if not this and self._match(TokenType.R_PAREN, advance=False): 5805 this = self.expression(exp.Tuple) 5806 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5807 this = self._parse_subquery(this=this, parse_alias=False) 5808 elif isinstance(this, exp.Subquery): 5809 this = self._parse_subquery( 5810 this=self._parse_query_modifiers(self._parse_set_operations(this)), 5811 parse_alias=False, 5812 ) 5813 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5814 this = self.expression(exp.Tuple, expressions=expressions) 5815 else: 5816 this = self.expression(exp.Paren, this=this) 5817 5818 if this: 5819 this.add_comments(comments) 5820 5821 self._match_r_paren(expression=this) 5822 5823 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5824 return self._parse_window(this) 5825 5826 return this 5827 5828 def _parse_primary(self) -> t.Optional[exp.Expression]: 5829 if self._match_set(self.PRIMARY_PARSERS): 5830 token_type = self._prev.token_type 5831 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5832 5833 if token_type == TokenType.STRING: 5834 expressions = [primary] 5835 while self._match(TokenType.STRING): 5836 expressions.append(exp.Literal.string(self._prev.text)) 5837 5838 if len(expressions) > 1: 5839 return self.expression(exp.Concat, expressions=expressions) 5840 5841 return primary 5842 5843 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5844 return exp.Literal.number(f"0.{self._prev.text}") 5845 5846 return self._parse_paren() 5847 5848 def _parse_field( 5849 self, 5850 any_token: bool = False, 5851 tokens: t.Optional[t.Collection[TokenType]] = None, 5852 anonymous_func: bool = False, 5853 ) -> t.Optional[exp.Expression]: 5854 if anonymous_func: 5855 field = ( 5856 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5857 or self._parse_primary() 5858 ) 5859 else: 5860 field = self._parse_primary() or self._parse_function( 5861 anonymous=anonymous_func, any_token=any_token 5862 ) 5863 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5864 5865 def _parse_function( 5866 self, 5867 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5868 anonymous: bool = False, 5869 optional_parens: bool = True, 5870 any_token: bool = False, 5871 ) -> t.Optional[exp.Expression]: 5872 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5873 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5874 fn_syntax = False 5875 if ( 5876 self._match(TokenType.L_BRACE, advance=False) 5877 and self._next 5878 and self._next.text.upper() == "FN" 5879 ): 5880 self._advance(2) 5881 fn_syntax = True 5882 5883 func = self._parse_function_call( 5884 functions=functions, 5885 anonymous=anonymous, 5886 optional_parens=optional_parens, 5887 any_token=any_token, 5888 ) 5889 5890 if fn_syntax: 5891 self._match(TokenType.R_BRACE) 5892 5893 return func 5894 5895 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5896 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5897 5898 def _parse_function_call( 5899 self, 5900 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5901 anonymous: bool = False, 5902 optional_parens: bool = True, 5903 any_token: bool = False, 5904 ) -> t.Optional[exp.Expression]: 5905 if not self._curr: 5906 return None 5907 5908 comments = self._curr.comments 5909 prev = self._prev 5910 token = self._curr 5911 token_type = self._curr.token_type 5912 this = self._curr.text 5913 upper = this.upper() 5914 5915 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5916 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5917 self._advance() 5918 return self._parse_window(parser(self)) 5919 5920 if not self._next or self._next.token_type != TokenType.L_PAREN: 5921 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5922 self._advance() 5923 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5924 5925 return None 5926 5927 if any_token: 5928 if token_type in self.RESERVED_TOKENS: 5929 return None 5930 elif token_type not in self.FUNC_TOKENS: 5931 return None 5932 5933 self._advance(2) 5934 5935 parser = self.FUNCTION_PARSERS.get(upper) 5936 if parser and not anonymous: 5937 this = parser(self) 5938 else: 5939 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5940 5941 if subquery_predicate: 5942 expr = None 5943 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5944 expr = self._parse_select() 5945 self._match_r_paren() 5946 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5947 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5948 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5949 self._advance(-1) 5950 expr = self._parse_bitwise() 5951 5952 if expr: 5953 return self.expression(subquery_predicate, comments=comments, this=expr) 5954 5955 if functions is None: 5956 functions = self.FUNCTIONS 5957 5958 function = functions.get(upper) 5959 known_function = function and not anonymous 5960 5961 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5962 args = self._parse_function_args(alias) 5963 5964 post_func_comments = self._curr and self._curr.comments 5965 if known_function and post_func_comments: 5966 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5967 # call we'll construct it as exp.Anonymous, even if it's "known" 5968 if any( 5969 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5970 for comment in post_func_comments 5971 ): 5972 known_function = False 5973 5974 if alias and known_function: 5975 args = self._kv_to_prop_eq(args) 5976 5977 if known_function: 5978 func_builder = t.cast(t.Callable, function) 5979 5980 if "dialect" in func_builder.__code__.co_varnames: 5981 func = func_builder(args, dialect=self.dialect) 5982 else: 5983 func = func_builder(args) 5984 5985 func = self.validate_expression(func, args) 5986 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5987 func.meta["name"] = this 5988 5989 this = func 5990 else: 5991 if token_type == TokenType.IDENTIFIER: 5992 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5993 5994 this = self.expression(exp.Anonymous, this=this, expressions=args) 5995 this = this.update_positions(token) 5996 5997 if isinstance(this, exp.Expression): 5998 this.add_comments(comments) 5999 6000 self._match_r_paren(this) 6001 return self._parse_window(this) 6002 6003 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6004 return expression 6005 6006 def _kv_to_prop_eq( 6007 self, expressions: t.List[exp.Expression], parse_map: bool = False 6008 ) -> t.List[exp.Expression]: 6009 transformed = [] 6010 6011 for index, e in enumerate(expressions): 6012 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6013 if isinstance(e, exp.Alias): 6014 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6015 6016 if not isinstance(e, exp.PropertyEQ): 6017 e = self.expression( 6018 exp.PropertyEQ, 6019 this=e.this if parse_map else exp.to_identifier(e.this.name), 6020 expression=e.expression, 6021 ) 6022 6023 if isinstance(e.this, exp.Column): 6024 e.this.replace(e.this.this) 6025 else: 6026 e = self._to_prop_eq(e, index) 6027 6028 transformed.append(e) 6029 6030 return transformed 6031 6032 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6033 return self._parse_statement() 6034 6035 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6036 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6037 6038 def _parse_user_defined_function( 6039 self, kind: t.Optional[TokenType] = None 6040 ) -> t.Optional[exp.Expression]: 6041 this = self._parse_table_parts(schema=True) 6042 6043 if not self._match(TokenType.L_PAREN): 6044 return this 6045 6046 expressions = self._parse_csv(self._parse_function_parameter) 6047 self._match_r_paren() 6048 return self.expression( 6049 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6050 ) 6051 6052 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6053 literal = self._parse_primary() 6054 if literal: 6055 return self.expression(exp.Introducer, this=token.text, expression=literal) 6056 6057 return self._identifier_expression(token) 6058 6059 def _parse_session_parameter(self) -> exp.SessionParameter: 6060 kind = None 6061 this = self._parse_id_var() or self._parse_primary() 6062 6063 if this and self._match(TokenType.DOT): 6064 kind = this.name 6065 this = self._parse_var() or self._parse_primary() 6066 6067 return self.expression(exp.SessionParameter, this=this, kind=kind) 6068 6069 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6070 return self._parse_id_var() 6071 6072 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6073 index = self._index 6074 6075 if self._match(TokenType.L_PAREN): 6076 expressions = t.cast( 6077 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6078 ) 6079 6080 if not self._match(TokenType.R_PAREN): 6081 self._retreat(index) 6082 else: 6083 expressions = [self._parse_lambda_arg()] 6084 6085 if self._match_set(self.LAMBDAS): 6086 return self.LAMBDAS[self._prev.token_type](self, expressions) 6087 6088 self._retreat(index) 6089 6090 this: t.Optional[exp.Expression] 6091 6092 if self._match(TokenType.DISTINCT): 6093 this = self.expression( 6094 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6095 ) 6096 else: 6097 this = self._parse_select_or_expression(alias=alias) 6098 6099 return self._parse_limit( 6100 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6101 ) 6102 6103 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6104 index = self._index 6105 if not self._match(TokenType.L_PAREN): 6106 return this 6107 6108 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6109 # expr can be of both types 6110 if self._match_set(self.SELECT_START_TOKENS): 6111 self._retreat(index) 6112 return this 6113 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6114 self._match_r_paren() 6115 return self.expression(exp.Schema, this=this, expressions=args) 6116 6117 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6118 return self._parse_column_def(self._parse_field(any_token=True)) 6119 6120 def _parse_column_def( 6121 self, this: t.Optional[exp.Expression], computed_column: bool = True 6122 ) -> t.Optional[exp.Expression]: 6123 # column defs are not really columns, they're identifiers 6124 if isinstance(this, exp.Column): 6125 this = this.this 6126 6127 if not computed_column: 6128 self._match(TokenType.ALIAS) 6129 6130 kind = self._parse_types(schema=True) 6131 6132 if self._match_text_seq("FOR", "ORDINALITY"): 6133 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6134 6135 constraints: t.List[exp.Expression] = [] 6136 6137 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6138 ("ALIAS", "MATERIALIZED") 6139 ): 6140 persisted = self._prev.text.upper() == "MATERIALIZED" 6141 constraint_kind = exp.ComputedColumnConstraint( 6142 this=self._parse_assignment(), 6143 persisted=persisted or self._match_text_seq("PERSISTED"), 6144 data_type=exp.Var(this="AUTO") 6145 if self._match_text_seq("AUTO") 6146 else self._parse_types(), 6147 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6148 ) 6149 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6150 elif ( 6151 kind 6152 and self._match(TokenType.ALIAS, advance=False) 6153 and ( 6154 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6155 or (self._next and self._next.token_type == TokenType.L_PAREN) 6156 ) 6157 ): 6158 self._advance() 6159 constraints.append( 6160 self.expression( 6161 exp.ColumnConstraint, 6162 kind=exp.ComputedColumnConstraint( 6163 this=self._parse_disjunction(), 6164 persisted=self._match_texts(("STORED", "VIRTUAL")) 6165 and self._prev.text.upper() == "STORED", 6166 ), 6167 ) 6168 ) 6169 6170 while True: 6171 constraint = self._parse_column_constraint() 6172 if not constraint: 6173 break 6174 constraints.append(constraint) 6175 6176 if not kind and not constraints: 6177 return this 6178 6179 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6180 6181 def _parse_auto_increment( 6182 self, 6183 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6184 start = None 6185 increment = None 6186 order = None 6187 6188 if self._match(TokenType.L_PAREN, advance=False): 6189 args = self._parse_wrapped_csv(self._parse_bitwise) 6190 start = seq_get(args, 0) 6191 increment = seq_get(args, 1) 6192 elif self._match_text_seq("START"): 6193 start = self._parse_bitwise() 6194 self._match_text_seq("INCREMENT") 6195 increment = self._parse_bitwise() 6196 if self._match_text_seq("ORDER"): 6197 order = True 6198 elif self._match_text_seq("NOORDER"): 6199 order = False 6200 6201 if start and increment: 6202 return exp.GeneratedAsIdentityColumnConstraint( 6203 start=start, increment=increment, this=False, order=order 6204 ) 6205 6206 return exp.AutoIncrementColumnConstraint() 6207 6208 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6209 if not self._match_text_seq("REFRESH"): 6210 self._retreat(self._index - 1) 6211 return None 6212 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6213 6214 def _parse_compress(self) -> exp.CompressColumnConstraint: 6215 if self._match(TokenType.L_PAREN, advance=False): 6216 return self.expression( 6217 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6218 ) 6219 6220 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6221 6222 def _parse_generated_as_identity( 6223 self, 6224 ) -> ( 6225 exp.GeneratedAsIdentityColumnConstraint 6226 | exp.ComputedColumnConstraint 6227 | exp.GeneratedAsRowColumnConstraint 6228 ): 6229 if self._match_text_seq("BY", "DEFAULT"): 6230 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6231 this = self.expression( 6232 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6233 ) 6234 else: 6235 self._match_text_seq("ALWAYS") 6236 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6237 6238 self._match(TokenType.ALIAS) 6239 6240 if self._match_text_seq("ROW"): 6241 start = self._match_text_seq("START") 6242 if not start: 6243 self._match(TokenType.END) 6244 hidden = self._match_text_seq("HIDDEN") 6245 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6246 6247 identity = self._match_text_seq("IDENTITY") 6248 6249 if self._match(TokenType.L_PAREN): 6250 if self._match(TokenType.START_WITH): 6251 this.set("start", self._parse_bitwise()) 6252 if self._match_text_seq("INCREMENT", "BY"): 6253 this.set("increment", self._parse_bitwise()) 6254 if self._match_text_seq("MINVALUE"): 6255 this.set("minvalue", self._parse_bitwise()) 6256 if self._match_text_seq("MAXVALUE"): 6257 this.set("maxvalue", self._parse_bitwise()) 6258 6259 if self._match_text_seq("CYCLE"): 6260 this.set("cycle", True) 6261 elif self._match_text_seq("NO", "CYCLE"): 6262 this.set("cycle", False) 6263 6264 if not identity: 6265 this.set("expression", self._parse_range()) 6266 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6267 args = self._parse_csv(self._parse_bitwise) 6268 this.set("start", seq_get(args, 0)) 6269 this.set("increment", seq_get(args, 1)) 6270 6271 self._match_r_paren() 6272 6273 return this 6274 6275 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6276 self._match_text_seq("LENGTH") 6277 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6278 6279 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6280 if self._match_text_seq("NULL"): 6281 return self.expression(exp.NotNullColumnConstraint) 6282 if self._match_text_seq("CASESPECIFIC"): 6283 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6284 if self._match_text_seq("FOR", "REPLICATION"): 6285 return self.expression(exp.NotForReplicationColumnConstraint) 6286 6287 # Unconsume the `NOT` token 6288 self._retreat(self._index - 1) 6289 return None 6290 6291 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6292 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6293 6294 procedure_option_follows = ( 6295 self._match(TokenType.WITH, advance=False) 6296 and self._next 6297 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6298 ) 6299 6300 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6301 return self.expression( 6302 exp.ColumnConstraint, 6303 this=this, 6304 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6305 ) 6306 6307 return this 6308 6309 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6310 if not self._match(TokenType.CONSTRAINT): 6311 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6312 6313 return self.expression( 6314 exp.Constraint, 6315 this=self._parse_id_var(), 6316 expressions=self._parse_unnamed_constraints(), 6317 ) 6318 6319 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6320 constraints = [] 6321 while True: 6322 constraint = self._parse_unnamed_constraint() or self._parse_function() 6323 if not constraint: 6324 break 6325 constraints.append(constraint) 6326 6327 return constraints 6328 6329 def _parse_unnamed_constraint( 6330 self, constraints: t.Optional[t.Collection[str]] = None 6331 ) -> t.Optional[exp.Expression]: 6332 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6333 constraints or self.CONSTRAINT_PARSERS 6334 ): 6335 return None 6336 6337 constraint = self._prev.text.upper() 6338 if constraint not in self.CONSTRAINT_PARSERS: 6339 self.raise_error(f"No parser found for schema constraint {constraint}.") 6340 6341 return self.CONSTRAINT_PARSERS[constraint](self) 6342 6343 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6344 return self._parse_id_var(any_token=False) 6345 6346 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6347 self._match_texts(("KEY", "INDEX")) 6348 return self.expression( 6349 exp.UniqueColumnConstraint, 6350 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6351 this=self._parse_schema(self._parse_unique_key()), 6352 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6353 on_conflict=self._parse_on_conflict(), 6354 options=self._parse_key_constraint_options(), 6355 ) 6356 6357 def _parse_key_constraint_options(self) -> t.List[str]: 6358 options = [] 6359 while True: 6360 if not self._curr: 6361 break 6362 6363 if self._match(TokenType.ON): 6364 action = None 6365 on = self._advance_any() and self._prev.text 6366 6367 if self._match_text_seq("NO", "ACTION"): 6368 action = "NO ACTION" 6369 elif self._match_text_seq("CASCADE"): 6370 action = "CASCADE" 6371 elif self._match_text_seq("RESTRICT"): 6372 action = "RESTRICT" 6373 elif self._match_pair(TokenType.SET, TokenType.NULL): 6374 action = "SET NULL" 6375 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6376 action = "SET DEFAULT" 6377 else: 6378 self.raise_error("Invalid key constraint") 6379 6380 options.append(f"ON {on} {action}") 6381 else: 6382 var = self._parse_var_from_options( 6383 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6384 ) 6385 if not var: 6386 break 6387 options.append(var.name) 6388 6389 return options 6390 6391 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6392 if match and not self._match(TokenType.REFERENCES): 6393 return None 6394 6395 expressions = None 6396 this = self._parse_table(schema=True) 6397 options = self._parse_key_constraint_options() 6398 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6399 6400 def _parse_foreign_key(self) -> exp.ForeignKey: 6401 expressions = ( 6402 self._parse_wrapped_id_vars() 6403 if not self._match(TokenType.REFERENCES, advance=False) 6404 else None 6405 ) 6406 reference = self._parse_references() 6407 on_options = {} 6408 6409 while self._match(TokenType.ON): 6410 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6411 self.raise_error("Expected DELETE or UPDATE") 6412 6413 kind = self._prev.text.lower() 6414 6415 if self._match_text_seq("NO", "ACTION"): 6416 action = "NO ACTION" 6417 elif self._match(TokenType.SET): 6418 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6419 action = "SET " + self._prev.text.upper() 6420 else: 6421 self._advance() 6422 action = self._prev.text.upper() 6423 6424 on_options[kind] = action 6425 6426 return self.expression( 6427 exp.ForeignKey, 6428 expressions=expressions, 6429 reference=reference, 6430 options=self._parse_key_constraint_options(), 6431 **on_options, # type: ignore 6432 ) 6433 6434 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6435 return self._parse_ordered() or self._parse_field() 6436 6437 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6438 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6439 self._retreat(self._index - 1) 6440 return None 6441 6442 id_vars = self._parse_wrapped_id_vars() 6443 return self.expression( 6444 exp.PeriodForSystemTimeConstraint, 6445 this=seq_get(id_vars, 0), 6446 expression=seq_get(id_vars, 1), 6447 ) 6448 6449 def _parse_primary_key( 6450 self, wrapped_optional: bool = False, in_props: bool = False 6451 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6452 desc = ( 6453 self._match_set((TokenType.ASC, TokenType.DESC)) 6454 and self._prev.token_type == TokenType.DESC 6455 ) 6456 6457 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6458 return self.expression( 6459 exp.PrimaryKeyColumnConstraint, 6460 desc=desc, 6461 options=self._parse_key_constraint_options(), 6462 ) 6463 6464 expressions = self._parse_wrapped_csv( 6465 self._parse_primary_key_part, optional=wrapped_optional 6466 ) 6467 6468 return self.expression( 6469 exp.PrimaryKey, 6470 expressions=expressions, 6471 include=self._parse_index_params(), 6472 options=self._parse_key_constraint_options(), 6473 ) 6474 6475 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6476 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6477 6478 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6479 """ 6480 Parses a datetime column in ODBC format. We parse the column into the corresponding 6481 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6482 same as we did for `DATE('yyyy-mm-dd')`. 6483 6484 Reference: 6485 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6486 """ 6487 self._match(TokenType.VAR) 6488 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6489 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6490 if not self._match(TokenType.R_BRACE): 6491 self.raise_error("Expected }") 6492 return expression 6493 6494 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6495 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6496 return this 6497 6498 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6499 map_token = seq_get(self._tokens, self._index - 2) 6500 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6501 else: 6502 parse_map = False 6503 6504 bracket_kind = self._prev.token_type 6505 if ( 6506 bracket_kind == TokenType.L_BRACE 6507 and self._curr 6508 and self._curr.token_type == TokenType.VAR 6509 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6510 ): 6511 return self._parse_odbc_datetime_literal() 6512 6513 expressions = self._parse_csv( 6514 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6515 ) 6516 6517 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6518 self.raise_error("Expected ]") 6519 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6520 self.raise_error("Expected }") 6521 6522 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6523 if bracket_kind == TokenType.L_BRACE: 6524 this = self.expression( 6525 exp.Struct, 6526 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6527 ) 6528 elif not this: 6529 this = build_array_constructor( 6530 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6531 ) 6532 else: 6533 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6534 if constructor_type: 6535 return build_array_constructor( 6536 constructor_type, 6537 args=expressions, 6538 bracket_kind=bracket_kind, 6539 dialect=self.dialect, 6540 ) 6541 6542 expressions = apply_index_offset( 6543 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6544 ) 6545 this = self.expression( 6546 exp.Bracket, 6547 this=this, 6548 expressions=expressions, 6549 comments=this.pop_comments(), 6550 ) 6551 6552 self._add_comments(this) 6553 return self._parse_bracket(this) 6554 6555 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6556 if self._match(TokenType.COLON): 6557 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6558 return this 6559 6560 def _parse_case(self) -> t.Optional[exp.Expression]: 6561 if self._match(TokenType.DOT, advance=False): 6562 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6563 self._retreat(self._index - 1) 6564 return None 6565 6566 ifs = [] 6567 default = None 6568 6569 comments = self._prev_comments 6570 expression = self._parse_assignment() 6571 6572 while self._match(TokenType.WHEN): 6573 this = self._parse_assignment() 6574 self._match(TokenType.THEN) 6575 then = self._parse_assignment() 6576 ifs.append(self.expression(exp.If, this=this, true=then)) 6577 6578 if self._match(TokenType.ELSE): 6579 default = self._parse_assignment() 6580 6581 if not self._match(TokenType.END): 6582 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6583 default = exp.column("interval") 6584 else: 6585 self.raise_error("Expected END after CASE", self._prev) 6586 6587 return self.expression( 6588 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6589 ) 6590 6591 def _parse_if(self) -> t.Optional[exp.Expression]: 6592 if self._match(TokenType.L_PAREN): 6593 args = self._parse_csv( 6594 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6595 ) 6596 this = self.validate_expression(exp.If.from_arg_list(args), args) 6597 self._match_r_paren() 6598 else: 6599 index = self._index - 1 6600 6601 if self.NO_PAREN_IF_COMMANDS and index == 0: 6602 return self._parse_as_command(self._prev) 6603 6604 condition = self._parse_assignment() 6605 6606 if not condition: 6607 self._retreat(index) 6608 return None 6609 6610 self._match(TokenType.THEN) 6611 true = self._parse_assignment() 6612 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6613 self._match(TokenType.END) 6614 this = self.expression(exp.If, this=condition, true=true, false=false) 6615 6616 return this 6617 6618 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6619 if not self._match_text_seq("VALUE", "FOR"): 6620 self._retreat(self._index - 1) 6621 return None 6622 6623 return self.expression( 6624 exp.NextValueFor, 6625 this=self._parse_column(), 6626 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6627 ) 6628 6629 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6630 this = self._parse_function() or self._parse_var_or_string(upper=True) 6631 6632 if self._match(TokenType.FROM): 6633 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6634 6635 if not self._match(TokenType.COMMA): 6636 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6637 6638 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6639 6640 def _parse_gap_fill(self) -> exp.GapFill: 6641 self._match(TokenType.TABLE) 6642 this = self._parse_table() 6643 6644 self._match(TokenType.COMMA) 6645 args = [this, *self._parse_csv(self._parse_lambda)] 6646 6647 gap_fill = exp.GapFill.from_arg_list(args) 6648 return self.validate_expression(gap_fill, args) 6649 6650 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6651 this = self._parse_assignment() 6652 6653 if not self._match(TokenType.ALIAS): 6654 if self._match(TokenType.COMMA): 6655 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6656 6657 self.raise_error("Expected AS after CAST") 6658 6659 fmt = None 6660 to = self._parse_types() 6661 6662 default = self._match(TokenType.DEFAULT) 6663 if default: 6664 default = self._parse_bitwise() 6665 self._match_text_seq("ON", "CONVERSION", "ERROR") 6666 6667 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6668 fmt_string = self._parse_string() 6669 fmt = self._parse_at_time_zone(fmt_string) 6670 6671 if not to: 6672 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6673 if to.this in exp.DataType.TEMPORAL_TYPES: 6674 this = self.expression( 6675 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6676 this=this, 6677 format=exp.Literal.string( 6678 format_time( 6679 fmt_string.this if fmt_string else "", 6680 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6681 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6682 ) 6683 ), 6684 safe=safe, 6685 ) 6686 6687 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6688 this.set("zone", fmt.args["zone"]) 6689 return this 6690 elif not to: 6691 self.raise_error("Expected TYPE after CAST") 6692 elif isinstance(to, exp.Identifier): 6693 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6694 elif to.this == exp.DataType.Type.CHAR: 6695 if self._match(TokenType.CHARACTER_SET): 6696 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6697 6698 return self.build_cast( 6699 strict=strict, 6700 this=this, 6701 to=to, 6702 format=fmt, 6703 safe=safe, 6704 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6705 default=default, 6706 ) 6707 6708 def _parse_string_agg(self) -> exp.GroupConcat: 6709 if self._match(TokenType.DISTINCT): 6710 args: t.List[t.Optional[exp.Expression]] = [ 6711 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6712 ] 6713 if self._match(TokenType.COMMA): 6714 args.extend(self._parse_csv(self._parse_assignment)) 6715 else: 6716 args = self._parse_csv(self._parse_assignment) # type: ignore 6717 6718 if self._match_text_seq("ON", "OVERFLOW"): 6719 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6720 if self._match_text_seq("ERROR"): 6721 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6722 else: 6723 self._match_text_seq("TRUNCATE") 6724 on_overflow = self.expression( 6725 exp.OverflowTruncateBehavior, 6726 this=self._parse_string(), 6727 with_count=( 6728 self._match_text_seq("WITH", "COUNT") 6729 or not self._match_text_seq("WITHOUT", "COUNT") 6730 ), 6731 ) 6732 else: 6733 on_overflow = None 6734 6735 index = self._index 6736 if not self._match(TokenType.R_PAREN) and args: 6737 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6738 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6739 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6740 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6741 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6742 6743 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6744 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6745 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6746 if not self._match_text_seq("WITHIN", "GROUP"): 6747 self._retreat(index) 6748 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6749 6750 # The corresponding match_r_paren will be called in parse_function (caller) 6751 self._match_l_paren() 6752 6753 return self.expression( 6754 exp.GroupConcat, 6755 this=self._parse_order(this=seq_get(args, 0)), 6756 separator=seq_get(args, 1), 6757 on_overflow=on_overflow, 6758 ) 6759 6760 def _parse_convert( 6761 self, strict: bool, safe: t.Optional[bool] = None 6762 ) -> t.Optional[exp.Expression]: 6763 this = self._parse_bitwise() 6764 6765 if self._match(TokenType.USING): 6766 to: t.Optional[exp.Expression] = self.expression( 6767 exp.CharacterSet, this=self._parse_var() 6768 ) 6769 elif self._match(TokenType.COMMA): 6770 to = self._parse_types() 6771 else: 6772 to = None 6773 6774 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6775 6776 def _parse_xml_table(self) -> exp.XMLTable: 6777 namespaces = None 6778 passing = None 6779 columns = None 6780 6781 if self._match_text_seq("XMLNAMESPACES", "("): 6782 namespaces = self._parse_xml_namespace() 6783 self._match_text_seq(")", ",") 6784 6785 this = self._parse_string() 6786 6787 if self._match_text_seq("PASSING"): 6788 # The BY VALUE keywords are optional and are provided for semantic clarity 6789 self._match_text_seq("BY", "VALUE") 6790 passing = self._parse_csv(self._parse_column) 6791 6792 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6793 6794 if self._match_text_seq("COLUMNS"): 6795 columns = self._parse_csv(self._parse_field_def) 6796 6797 return self.expression( 6798 exp.XMLTable, 6799 this=this, 6800 namespaces=namespaces, 6801 passing=passing, 6802 columns=columns, 6803 by_ref=by_ref, 6804 ) 6805 6806 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6807 namespaces = [] 6808 6809 while True: 6810 if self._match(TokenType.DEFAULT): 6811 uri = self._parse_string() 6812 else: 6813 uri = self._parse_alias(self._parse_string()) 6814 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6815 if not self._match(TokenType.COMMA): 6816 break 6817 6818 return namespaces 6819 6820 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6821 args = self._parse_csv(self._parse_assignment) 6822 6823 if len(args) < 3: 6824 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6825 6826 return self.expression(exp.DecodeCase, expressions=args) 6827 6828 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6829 self._match_text_seq("KEY") 6830 key = self._parse_column() 6831 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6832 self._match_text_seq("VALUE") 6833 value = self._parse_bitwise() 6834 6835 if not key and not value: 6836 return None 6837 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6838 6839 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6840 if not this or not self._match_text_seq("FORMAT", "JSON"): 6841 return this 6842 6843 return self.expression(exp.FormatJson, this=this) 6844 6845 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6846 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6847 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6848 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6849 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6850 else: 6851 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6852 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6853 6854 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6855 6856 if not empty and not error and not null: 6857 return None 6858 6859 return self.expression( 6860 exp.OnCondition, 6861 empty=empty, 6862 error=error, 6863 null=null, 6864 ) 6865 6866 def _parse_on_handling( 6867 self, on: str, *values: str 6868 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6869 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6870 for value in values: 6871 if self._match_text_seq(value, "ON", on): 6872 return f"{value} ON {on}" 6873 6874 index = self._index 6875 if self._match(TokenType.DEFAULT): 6876 default_value = self._parse_bitwise() 6877 if self._match_text_seq("ON", on): 6878 return default_value 6879 6880 self._retreat(index) 6881 6882 return None 6883 6884 @t.overload 6885 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6886 6887 @t.overload 6888 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6889 6890 def _parse_json_object(self, agg=False): 6891 star = self._parse_star() 6892 expressions = ( 6893 [star] 6894 if star 6895 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6896 ) 6897 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6898 6899 unique_keys = None 6900 if self._match_text_seq("WITH", "UNIQUE"): 6901 unique_keys = True 6902 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6903 unique_keys = False 6904 6905 self._match_text_seq("KEYS") 6906 6907 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6908 self._parse_type() 6909 ) 6910 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6911 6912 return self.expression( 6913 exp.JSONObjectAgg if agg else exp.JSONObject, 6914 expressions=expressions, 6915 null_handling=null_handling, 6916 unique_keys=unique_keys, 6917 return_type=return_type, 6918 encoding=encoding, 6919 ) 6920 6921 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6922 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6923 if not self._match_text_seq("NESTED"): 6924 this = self._parse_id_var() 6925 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 6926 kind = self._parse_types(allow_identifiers=False) 6927 nested = None 6928 else: 6929 this = None 6930 ordinality = None 6931 kind = None 6932 nested = True 6933 6934 path = self._match_text_seq("PATH") and self._parse_string() 6935 nested_schema = nested and self._parse_json_schema() 6936 6937 return self.expression( 6938 exp.JSONColumnDef, 6939 this=this, 6940 kind=kind, 6941 path=path, 6942 nested_schema=nested_schema, 6943 ordinality=ordinality, 6944 ) 6945 6946 def _parse_json_schema(self) -> exp.JSONSchema: 6947 self._match_text_seq("COLUMNS") 6948 return self.expression( 6949 exp.JSONSchema, 6950 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6951 ) 6952 6953 def _parse_json_table(self) -> exp.JSONTable: 6954 this = self._parse_format_json(self._parse_bitwise()) 6955 path = self._match(TokenType.COMMA) and self._parse_string() 6956 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6957 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6958 schema = self._parse_json_schema() 6959 6960 return exp.JSONTable( 6961 this=this, 6962 schema=schema, 6963 path=path, 6964 error_handling=error_handling, 6965 empty_handling=empty_handling, 6966 ) 6967 6968 def _parse_match_against(self) -> exp.MatchAgainst: 6969 if self._match_text_seq("TABLE"): 6970 # parse SingleStore MATCH(TABLE ...) syntax 6971 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6972 expressions = [] 6973 table = self._parse_table() 6974 if table: 6975 expressions = [table] 6976 else: 6977 expressions = self._parse_csv(self._parse_column) 6978 6979 self._match_text_seq(")", "AGAINST", "(") 6980 6981 this = self._parse_string() 6982 6983 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6984 modifier = "IN NATURAL LANGUAGE MODE" 6985 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6986 modifier = f"{modifier} WITH QUERY EXPANSION" 6987 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6988 modifier = "IN BOOLEAN MODE" 6989 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6990 modifier = "WITH QUERY EXPANSION" 6991 else: 6992 modifier = None 6993 6994 return self.expression( 6995 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6996 ) 6997 6998 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6999 def _parse_open_json(self) -> exp.OpenJSON: 7000 this = self._parse_bitwise() 7001 path = self._match(TokenType.COMMA) and self._parse_string() 7002 7003 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7004 this = self._parse_field(any_token=True) 7005 kind = self._parse_types() 7006 path = self._parse_string() 7007 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7008 7009 return self.expression( 7010 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7011 ) 7012 7013 expressions = None 7014 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7015 self._match_l_paren() 7016 expressions = self._parse_csv(_parse_open_json_column_def) 7017 7018 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7019 7020 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7021 args = self._parse_csv(self._parse_bitwise) 7022 7023 if self._match(TokenType.IN): 7024 return self.expression( 7025 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7026 ) 7027 7028 if haystack_first: 7029 haystack = seq_get(args, 0) 7030 needle = seq_get(args, 1) 7031 else: 7032 haystack = seq_get(args, 1) 7033 needle = seq_get(args, 0) 7034 7035 return self.expression( 7036 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7037 ) 7038 7039 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7040 args = self._parse_csv(self._parse_table) 7041 return exp.JoinHint(this=func_name.upper(), expressions=args) 7042 7043 def _parse_substring(self) -> exp.Substring: 7044 # Postgres supports the form: substring(string [from int] [for int]) 7045 # (despite being undocumented, the reverse order also works) 7046 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7047 7048 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7049 7050 start, length = None, None 7051 7052 while self._curr: 7053 if self._match(TokenType.FROM): 7054 start = self._parse_bitwise() 7055 elif self._match(TokenType.FOR): 7056 if not start: 7057 start = exp.Literal.number(1) 7058 length = self._parse_bitwise() 7059 else: 7060 break 7061 7062 if start: 7063 args.append(start) 7064 if length: 7065 args.append(length) 7066 7067 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7068 7069 def _parse_trim(self) -> exp.Trim: 7070 # https://www.w3resource.com/sql/character-functions/trim.php 7071 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7072 7073 position = None 7074 collation = None 7075 expression = None 7076 7077 if self._match_texts(self.TRIM_TYPES): 7078 position = self._prev.text.upper() 7079 7080 this = self._parse_bitwise() 7081 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7082 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7083 expression = self._parse_bitwise() 7084 7085 if invert_order: 7086 this, expression = expression, this 7087 7088 if self._match(TokenType.COLLATE): 7089 collation = self._parse_bitwise() 7090 7091 return self.expression( 7092 exp.Trim, this=this, position=position, expression=expression, collation=collation 7093 ) 7094 7095 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7096 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7097 7098 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7099 return self._parse_window(self._parse_id_var(), alias=True) 7100 7101 def _parse_respect_or_ignore_nulls( 7102 self, this: t.Optional[exp.Expression] 7103 ) -> t.Optional[exp.Expression]: 7104 if self._match_text_seq("IGNORE", "NULLS"): 7105 return self.expression(exp.IgnoreNulls, this=this) 7106 if self._match_text_seq("RESPECT", "NULLS"): 7107 return self.expression(exp.RespectNulls, this=this) 7108 return this 7109 7110 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7111 if self._match(TokenType.HAVING): 7112 self._match_texts(("MAX", "MIN")) 7113 max = self._prev.text.upper() != "MIN" 7114 return self.expression( 7115 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7116 ) 7117 7118 return this 7119 7120 def _parse_window( 7121 self, this: t.Optional[exp.Expression], alias: bool = False 7122 ) -> t.Optional[exp.Expression]: 7123 func = this 7124 comments = func.comments if isinstance(func, exp.Expression) else None 7125 7126 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7127 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7128 if self._match_text_seq("WITHIN", "GROUP"): 7129 order = self._parse_wrapped(self._parse_order) 7130 this = self.expression(exp.WithinGroup, this=this, expression=order) 7131 7132 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7133 self._match(TokenType.WHERE) 7134 this = self.expression( 7135 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7136 ) 7137 self._match_r_paren() 7138 7139 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7140 # Some dialects choose to implement and some do not. 7141 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7142 7143 # There is some code above in _parse_lambda that handles 7144 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7145 7146 # The below changes handle 7147 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7148 7149 # Oracle allows both formats 7150 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7151 # and Snowflake chose to do the same for familiarity 7152 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7153 if isinstance(this, exp.AggFunc): 7154 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7155 7156 if ignore_respect and ignore_respect is not this: 7157 ignore_respect.replace(ignore_respect.this) 7158 this = self.expression(ignore_respect.__class__, this=this) 7159 7160 this = self._parse_respect_or_ignore_nulls(this) 7161 7162 # bigquery select from window x AS (partition by ...) 7163 if alias: 7164 over = None 7165 self._match(TokenType.ALIAS) 7166 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7167 return this 7168 else: 7169 over = self._prev.text.upper() 7170 7171 if comments and isinstance(func, exp.Expression): 7172 func.pop_comments() 7173 7174 if not self._match(TokenType.L_PAREN): 7175 return self.expression( 7176 exp.Window, 7177 comments=comments, 7178 this=this, 7179 alias=self._parse_id_var(False), 7180 over=over, 7181 ) 7182 7183 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7184 7185 first = self._match(TokenType.FIRST) 7186 if self._match_text_seq("LAST"): 7187 first = False 7188 7189 partition, order = self._parse_partition_and_order() 7190 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7191 7192 if kind: 7193 self._match(TokenType.BETWEEN) 7194 start = self._parse_window_spec() 7195 7196 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7197 exclude = ( 7198 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7199 if self._match_text_seq("EXCLUDE") 7200 else None 7201 ) 7202 7203 spec = self.expression( 7204 exp.WindowSpec, 7205 kind=kind, 7206 start=start["value"], 7207 start_side=start["side"], 7208 end=end.get("value"), 7209 end_side=end.get("side"), 7210 exclude=exclude, 7211 ) 7212 else: 7213 spec = None 7214 7215 self._match_r_paren() 7216 7217 window = self.expression( 7218 exp.Window, 7219 comments=comments, 7220 this=this, 7221 partition_by=partition, 7222 order=order, 7223 spec=spec, 7224 alias=window_alias, 7225 over=over, 7226 first=first, 7227 ) 7228 7229 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7230 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7231 return self._parse_window(window, alias=alias) 7232 7233 return window 7234 7235 def _parse_partition_and_order( 7236 self, 7237 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7238 return self._parse_partition_by(), self._parse_order() 7239 7240 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7241 self._match(TokenType.BETWEEN) 7242 7243 return { 7244 "value": ( 7245 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7246 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7247 or self._parse_type() 7248 ), 7249 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7250 } 7251 7252 def _parse_alias( 7253 self, this: t.Optional[exp.Expression], explicit: bool = False 7254 ) -> t.Optional[exp.Expression]: 7255 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7256 # so this section tries to parse the clause version and if it fails, it treats the token 7257 # as an identifier (alias) 7258 if self._can_parse_limit_or_offset(): 7259 return this 7260 7261 any_token = self._match(TokenType.ALIAS) 7262 comments = self._prev_comments or [] 7263 7264 if explicit and not any_token: 7265 return this 7266 7267 if self._match(TokenType.L_PAREN): 7268 aliases = self.expression( 7269 exp.Aliases, 7270 comments=comments, 7271 this=this, 7272 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7273 ) 7274 self._match_r_paren(aliases) 7275 return aliases 7276 7277 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7278 self.STRING_ALIASES and self._parse_string_as_identifier() 7279 ) 7280 7281 if alias: 7282 comments.extend(alias.pop_comments()) 7283 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7284 column = this.this 7285 7286 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7287 if not this.comments and column and column.comments: 7288 this.comments = column.pop_comments() 7289 7290 return this 7291 7292 def _parse_id_var( 7293 self, 7294 any_token: bool = True, 7295 tokens: t.Optional[t.Collection[TokenType]] = None, 7296 ) -> t.Optional[exp.Expression]: 7297 expression = self._parse_identifier() 7298 if not expression and ( 7299 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7300 ): 7301 quoted = self._prev.token_type == TokenType.STRING 7302 expression = self._identifier_expression(quoted=quoted) 7303 7304 return expression 7305 7306 def _parse_string(self) -> t.Optional[exp.Expression]: 7307 if self._match_set(self.STRING_PARSERS): 7308 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7309 return self._parse_placeholder() 7310 7311 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7312 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7313 if output: 7314 output.update_positions(self._prev) 7315 return output 7316 7317 def _parse_number(self) -> t.Optional[exp.Expression]: 7318 if self._match_set(self.NUMERIC_PARSERS): 7319 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7320 return self._parse_placeholder() 7321 7322 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7323 if self._match(TokenType.IDENTIFIER): 7324 return self._identifier_expression(quoted=True) 7325 return self._parse_placeholder() 7326 7327 def _parse_var( 7328 self, 7329 any_token: bool = False, 7330 tokens: t.Optional[t.Collection[TokenType]] = None, 7331 upper: bool = False, 7332 ) -> t.Optional[exp.Expression]: 7333 if ( 7334 (any_token and self._advance_any()) 7335 or self._match(TokenType.VAR) 7336 or (self._match_set(tokens) if tokens else False) 7337 ): 7338 return self.expression( 7339 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7340 ) 7341 return self._parse_placeholder() 7342 7343 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7344 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7345 self._advance() 7346 return self._prev 7347 return None 7348 7349 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7350 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7351 7352 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7353 return self._parse_primary() or self._parse_var(any_token=True) 7354 7355 def _parse_null(self) -> t.Optional[exp.Expression]: 7356 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7357 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7358 return self._parse_placeholder() 7359 7360 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7361 if self._match(TokenType.TRUE): 7362 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7363 if self._match(TokenType.FALSE): 7364 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7365 return self._parse_placeholder() 7366 7367 def _parse_star(self) -> t.Optional[exp.Expression]: 7368 if self._match(TokenType.STAR): 7369 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7370 return self._parse_placeholder() 7371 7372 def _parse_parameter(self) -> exp.Parameter: 7373 this = self._parse_identifier() or self._parse_primary_or_var() 7374 return self.expression(exp.Parameter, this=this) 7375 7376 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7377 if self._match_set(self.PLACEHOLDER_PARSERS): 7378 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7379 if placeholder: 7380 return placeholder 7381 self._advance(-1) 7382 return None 7383 7384 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7385 if not self._match_texts(keywords): 7386 return None 7387 if self._match(TokenType.L_PAREN, advance=False): 7388 return self._parse_wrapped_csv(self._parse_expression) 7389 7390 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7391 return [expression] if expression else None 7392 7393 def _parse_csv( 7394 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7395 ) -> t.List[exp.Expression]: 7396 parse_result = parse_method() 7397 items = [parse_result] if parse_result is not None else [] 7398 7399 while self._match(sep): 7400 self._add_comments(parse_result) 7401 parse_result = parse_method() 7402 if parse_result is not None: 7403 items.append(parse_result) 7404 7405 return items 7406 7407 def _parse_tokens( 7408 self, parse_method: t.Callable, expressions: t.Dict 7409 ) -> t.Optional[exp.Expression]: 7410 this = parse_method() 7411 7412 while self._match_set(expressions): 7413 this = self.expression( 7414 expressions[self._prev.token_type], 7415 this=this, 7416 comments=self._prev_comments, 7417 expression=parse_method(), 7418 ) 7419 7420 return this 7421 7422 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7423 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7424 7425 def _parse_wrapped_csv( 7426 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7427 ) -> t.List[exp.Expression]: 7428 return self._parse_wrapped( 7429 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7430 ) 7431 7432 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7433 wrapped = self._match(TokenType.L_PAREN) 7434 if not wrapped and not optional: 7435 self.raise_error("Expecting (") 7436 parse_result = parse_method() 7437 if wrapped: 7438 self._match_r_paren() 7439 return parse_result 7440 7441 def _parse_expressions(self) -> t.List[exp.Expression]: 7442 return self._parse_csv(self._parse_expression) 7443 7444 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7445 return ( 7446 self._parse_set_operations( 7447 self._parse_alias(self._parse_assignment(), explicit=True) 7448 if alias 7449 else self._parse_assignment() 7450 ) 7451 or self._parse_select() 7452 ) 7453 7454 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7455 return self._parse_query_modifiers( 7456 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7457 ) 7458 7459 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7460 this = None 7461 if self._match_texts(self.TRANSACTION_KIND): 7462 this = self._prev.text 7463 7464 self._match_texts(("TRANSACTION", "WORK")) 7465 7466 modes = [] 7467 while True: 7468 mode = [] 7469 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7470 mode.append(self._prev.text) 7471 7472 if mode: 7473 modes.append(" ".join(mode)) 7474 if not self._match(TokenType.COMMA): 7475 break 7476 7477 return self.expression(exp.Transaction, this=this, modes=modes) 7478 7479 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7480 chain = None 7481 savepoint = None 7482 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7483 7484 self._match_texts(("TRANSACTION", "WORK")) 7485 7486 if self._match_text_seq("TO"): 7487 self._match_text_seq("SAVEPOINT") 7488 savepoint = self._parse_id_var() 7489 7490 if self._match(TokenType.AND): 7491 chain = not self._match_text_seq("NO") 7492 self._match_text_seq("CHAIN") 7493 7494 if is_rollback: 7495 return self.expression(exp.Rollback, savepoint=savepoint) 7496 7497 return self.expression(exp.Commit, chain=chain) 7498 7499 def _parse_refresh(self) -> exp.Refresh: 7500 self._match(TokenType.TABLE) 7501 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7502 7503 def _parse_column_def_with_exists(self): 7504 start = self._index 7505 self._match(TokenType.COLUMN) 7506 7507 exists_column = self._parse_exists(not_=True) 7508 expression = self._parse_field_def() 7509 7510 if not isinstance(expression, exp.ColumnDef): 7511 self._retreat(start) 7512 return None 7513 7514 expression.set("exists", exists_column) 7515 7516 return expression 7517 7518 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7519 if not self._prev.text.upper() == "ADD": 7520 return None 7521 7522 expression = self._parse_column_def_with_exists() 7523 if not expression: 7524 return None 7525 7526 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7527 if self._match_texts(("FIRST", "AFTER")): 7528 position = self._prev.text 7529 column_position = self.expression( 7530 exp.ColumnPosition, this=self._parse_column(), position=position 7531 ) 7532 expression.set("position", column_position) 7533 7534 return expression 7535 7536 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7537 drop = self._match(TokenType.DROP) and self._parse_drop() 7538 if drop and not isinstance(drop, exp.Command): 7539 drop.set("kind", drop.args.get("kind", "COLUMN")) 7540 return drop 7541 7542 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7543 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7544 return self.expression( 7545 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7546 ) 7547 7548 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7549 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7550 self._match_text_seq("ADD") 7551 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7552 return self.expression( 7553 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7554 ) 7555 7556 column_def = self._parse_add_column() 7557 if isinstance(column_def, exp.ColumnDef): 7558 return column_def 7559 7560 exists = self._parse_exists(not_=True) 7561 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7562 return self.expression( 7563 exp.AddPartition, 7564 exists=exists, 7565 this=self._parse_field(any_token=True), 7566 location=self._match_text_seq("LOCATION", advance=False) 7567 and self._parse_property(), 7568 ) 7569 7570 return None 7571 7572 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7573 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7574 or self._match_text_seq("COLUMNS") 7575 ): 7576 schema = self._parse_schema() 7577 7578 return ( 7579 ensure_list(schema) 7580 if schema 7581 else self._parse_csv(self._parse_column_def_with_exists) 7582 ) 7583 7584 return self._parse_csv(_parse_add_alteration) 7585 7586 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7587 if self._match_texts(self.ALTER_ALTER_PARSERS): 7588 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7589 7590 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7591 # keyword after ALTER we default to parsing this statement 7592 self._match(TokenType.COLUMN) 7593 column = self._parse_field(any_token=True) 7594 7595 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7596 return self.expression(exp.AlterColumn, this=column, drop=True) 7597 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7598 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7599 if self._match(TokenType.COMMENT): 7600 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7601 if self._match_text_seq("DROP", "NOT", "NULL"): 7602 return self.expression( 7603 exp.AlterColumn, 7604 this=column, 7605 drop=True, 7606 allow_null=True, 7607 ) 7608 if self._match_text_seq("SET", "NOT", "NULL"): 7609 return self.expression( 7610 exp.AlterColumn, 7611 this=column, 7612 allow_null=False, 7613 ) 7614 7615 if self._match_text_seq("SET", "VISIBLE"): 7616 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7617 if self._match_text_seq("SET", "INVISIBLE"): 7618 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7619 7620 self._match_text_seq("SET", "DATA") 7621 self._match_text_seq("TYPE") 7622 return self.expression( 7623 exp.AlterColumn, 7624 this=column, 7625 dtype=self._parse_types(), 7626 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7627 using=self._match(TokenType.USING) and self._parse_assignment(), 7628 ) 7629 7630 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7631 if self._match_texts(("ALL", "EVEN", "AUTO")): 7632 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7633 7634 self._match_text_seq("KEY", "DISTKEY") 7635 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7636 7637 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7638 if compound: 7639 self._match_text_seq("SORTKEY") 7640 7641 if self._match(TokenType.L_PAREN, advance=False): 7642 return self.expression( 7643 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7644 ) 7645 7646 self._match_texts(("AUTO", "NONE")) 7647 return self.expression( 7648 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7649 ) 7650 7651 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7652 index = self._index - 1 7653 7654 partition_exists = self._parse_exists() 7655 if self._match(TokenType.PARTITION, advance=False): 7656 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7657 7658 self._retreat(index) 7659 return self._parse_csv(self._parse_drop_column) 7660 7661 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7662 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7663 exists = self._parse_exists() 7664 old_column = self._parse_column() 7665 to = self._match_text_seq("TO") 7666 new_column = self._parse_column() 7667 7668 if old_column is None or to is None or new_column is None: 7669 return None 7670 7671 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7672 7673 self._match_text_seq("TO") 7674 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7675 7676 def _parse_alter_table_set(self) -> exp.AlterSet: 7677 alter_set = self.expression(exp.AlterSet) 7678 7679 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7680 "TABLE", "PROPERTIES" 7681 ): 7682 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7683 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7684 alter_set.set("expressions", [self._parse_assignment()]) 7685 elif self._match_texts(("LOGGED", "UNLOGGED")): 7686 alter_set.set("option", exp.var(self._prev.text.upper())) 7687 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7688 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7689 elif self._match_text_seq("LOCATION"): 7690 alter_set.set("location", self._parse_field()) 7691 elif self._match_text_seq("ACCESS", "METHOD"): 7692 alter_set.set("access_method", self._parse_field()) 7693 elif self._match_text_seq("TABLESPACE"): 7694 alter_set.set("tablespace", self._parse_field()) 7695 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7696 alter_set.set("file_format", [self._parse_field()]) 7697 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7698 alter_set.set("file_format", self._parse_wrapped_options()) 7699 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7700 alter_set.set("copy_options", self._parse_wrapped_options()) 7701 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7702 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7703 else: 7704 if self._match_text_seq("SERDE"): 7705 alter_set.set("serde", self._parse_field()) 7706 7707 properties = self._parse_wrapped(self._parse_properties, optional=True) 7708 alter_set.set("expressions", [properties]) 7709 7710 return alter_set 7711 7712 def _parse_alter_session(self) -> exp.AlterSession: 7713 """Parse ALTER SESSION SET/UNSET statements.""" 7714 if self._match(TokenType.SET): 7715 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7716 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7717 7718 self._match_text_seq("UNSET") 7719 expressions = self._parse_csv( 7720 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7721 ) 7722 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7723 7724 def _parse_alter(self) -> exp.Alter | exp.Command: 7725 start = self._prev 7726 7727 alter_token = self._match_set(self.ALTERABLES) and self._prev 7728 if not alter_token: 7729 return self._parse_as_command(start) 7730 7731 exists = self._parse_exists() 7732 only = self._match_text_seq("ONLY") 7733 7734 if alter_token.token_type == TokenType.SESSION: 7735 this = None 7736 check = None 7737 cluster = None 7738 else: 7739 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 7740 check = self._match_text_seq("WITH", "CHECK") 7741 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7742 7743 if self._next: 7744 self._advance() 7745 7746 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7747 if parser: 7748 actions = ensure_list(parser(self)) 7749 not_valid = self._match_text_seq("NOT", "VALID") 7750 options = self._parse_csv(self._parse_property) 7751 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 7752 7753 if not self._curr and actions: 7754 return self.expression( 7755 exp.Alter, 7756 this=this, 7757 kind=alter_token.text.upper(), 7758 exists=exists, 7759 actions=actions, 7760 only=only, 7761 options=options, 7762 cluster=cluster, 7763 not_valid=not_valid, 7764 check=check, 7765 cascade=cascade, 7766 ) 7767 7768 return self._parse_as_command(start) 7769 7770 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7771 start = self._prev 7772 # https://duckdb.org/docs/sql/statements/analyze 7773 if not self._curr: 7774 return self.expression(exp.Analyze) 7775 7776 options = [] 7777 while self._match_texts(self.ANALYZE_STYLES): 7778 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7779 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7780 else: 7781 options.append(self._prev.text.upper()) 7782 7783 this: t.Optional[exp.Expression] = None 7784 inner_expression: t.Optional[exp.Expression] = None 7785 7786 kind = self._curr and self._curr.text.upper() 7787 7788 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7789 this = self._parse_table_parts() 7790 elif self._match_text_seq("TABLES"): 7791 if self._match_set((TokenType.FROM, TokenType.IN)): 7792 kind = f"{kind} {self._prev.text.upper()}" 7793 this = self._parse_table(schema=True, is_db_reference=True) 7794 elif self._match_text_seq("DATABASE"): 7795 this = self._parse_table(schema=True, is_db_reference=True) 7796 elif self._match_text_seq("CLUSTER"): 7797 this = self._parse_table() 7798 # Try matching inner expr keywords before fallback to parse table. 7799 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7800 kind = None 7801 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7802 else: 7803 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7804 kind = None 7805 this = self._parse_table_parts() 7806 7807 partition = self._try_parse(self._parse_partition) 7808 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7809 return self._parse_as_command(start) 7810 7811 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7812 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7813 "WITH", "ASYNC", "MODE" 7814 ): 7815 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7816 else: 7817 mode = None 7818 7819 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7820 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7821 7822 properties = self._parse_properties() 7823 return self.expression( 7824 exp.Analyze, 7825 kind=kind, 7826 this=this, 7827 mode=mode, 7828 partition=partition, 7829 properties=properties, 7830 expression=inner_expression, 7831 options=options, 7832 ) 7833 7834 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7835 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7836 this = None 7837 kind = self._prev.text.upper() 7838 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7839 expressions = [] 7840 7841 if not self._match_text_seq("STATISTICS"): 7842 self.raise_error("Expecting token STATISTICS") 7843 7844 if self._match_text_seq("NOSCAN"): 7845 this = "NOSCAN" 7846 elif self._match(TokenType.FOR): 7847 if self._match_text_seq("ALL", "COLUMNS"): 7848 this = "FOR ALL COLUMNS" 7849 if self._match_texts("COLUMNS"): 7850 this = "FOR COLUMNS" 7851 expressions = self._parse_csv(self._parse_column_reference) 7852 elif self._match_text_seq("SAMPLE"): 7853 sample = self._parse_number() 7854 expressions = [ 7855 self.expression( 7856 exp.AnalyzeSample, 7857 sample=sample, 7858 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7859 ) 7860 ] 7861 7862 return self.expression( 7863 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7864 ) 7865 7866 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7867 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7868 kind = None 7869 this = None 7870 expression: t.Optional[exp.Expression] = None 7871 if self._match_text_seq("REF", "UPDATE"): 7872 kind = "REF" 7873 this = "UPDATE" 7874 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7875 this = "UPDATE SET DANGLING TO NULL" 7876 elif self._match_text_seq("STRUCTURE"): 7877 kind = "STRUCTURE" 7878 if self._match_text_seq("CASCADE", "FAST"): 7879 this = "CASCADE FAST" 7880 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7881 ("ONLINE", "OFFLINE") 7882 ): 7883 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7884 expression = self._parse_into() 7885 7886 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7887 7888 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7889 this = self._prev.text.upper() 7890 if self._match_text_seq("COLUMNS"): 7891 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7892 return None 7893 7894 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7895 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7896 if self._match_text_seq("STATISTICS"): 7897 return self.expression(exp.AnalyzeDelete, kind=kind) 7898 return None 7899 7900 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7901 if self._match_text_seq("CHAINED", "ROWS"): 7902 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7903 return None 7904 7905 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7906 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7907 this = self._prev.text.upper() 7908 expression: t.Optional[exp.Expression] = None 7909 expressions = [] 7910 update_options = None 7911 7912 if self._match_text_seq("HISTOGRAM", "ON"): 7913 expressions = self._parse_csv(self._parse_column_reference) 7914 with_expressions = [] 7915 while self._match(TokenType.WITH): 7916 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7917 if self._match_texts(("SYNC", "ASYNC")): 7918 if self._match_text_seq("MODE", advance=False): 7919 with_expressions.append(f"{self._prev.text.upper()} MODE") 7920 self._advance() 7921 else: 7922 buckets = self._parse_number() 7923 if self._match_text_seq("BUCKETS"): 7924 with_expressions.append(f"{buckets} BUCKETS") 7925 if with_expressions: 7926 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7927 7928 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7929 TokenType.UPDATE, advance=False 7930 ): 7931 update_options = self._prev.text.upper() 7932 self._advance() 7933 elif self._match_text_seq("USING", "DATA"): 7934 expression = self.expression(exp.UsingData, this=self._parse_string()) 7935 7936 return self.expression( 7937 exp.AnalyzeHistogram, 7938 this=this, 7939 expressions=expressions, 7940 expression=expression, 7941 update_options=update_options, 7942 ) 7943 7944 def _parse_merge(self) -> exp.Merge: 7945 self._match(TokenType.INTO) 7946 target = self._parse_table() 7947 7948 if target and self._match(TokenType.ALIAS, advance=False): 7949 target.set("alias", self._parse_table_alias()) 7950 7951 self._match(TokenType.USING) 7952 using = self._parse_table() 7953 7954 self._match(TokenType.ON) 7955 on = self._parse_assignment() 7956 7957 return self.expression( 7958 exp.Merge, 7959 this=target, 7960 using=using, 7961 on=on, 7962 whens=self._parse_when_matched(), 7963 returning=self._parse_returning(), 7964 ) 7965 7966 def _parse_when_matched(self) -> exp.Whens: 7967 whens = [] 7968 7969 while self._match(TokenType.WHEN): 7970 matched = not self._match(TokenType.NOT) 7971 self._match_text_seq("MATCHED") 7972 source = ( 7973 False 7974 if self._match_text_seq("BY", "TARGET") 7975 else self._match_text_seq("BY", "SOURCE") 7976 ) 7977 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7978 7979 self._match(TokenType.THEN) 7980 7981 if self._match(TokenType.INSERT): 7982 this = self._parse_star() 7983 if this: 7984 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7985 else: 7986 then = self.expression( 7987 exp.Insert, 7988 this=exp.var("ROW") 7989 if self._match_text_seq("ROW") 7990 else self._parse_value(values=False), 7991 expression=self._match_text_seq("VALUES") and self._parse_value(), 7992 ) 7993 elif self._match(TokenType.UPDATE): 7994 expressions = self._parse_star() 7995 if expressions: 7996 then = self.expression(exp.Update, expressions=expressions) 7997 else: 7998 then = self.expression( 7999 exp.Update, 8000 expressions=self._match(TokenType.SET) 8001 and self._parse_csv(self._parse_equality), 8002 ) 8003 elif self._match(TokenType.DELETE): 8004 then = self.expression(exp.Var, this=self._prev.text) 8005 else: 8006 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8007 8008 whens.append( 8009 self.expression( 8010 exp.When, 8011 matched=matched, 8012 source=source, 8013 condition=condition, 8014 then=then, 8015 ) 8016 ) 8017 return self.expression(exp.Whens, expressions=whens) 8018 8019 def _parse_show(self) -> t.Optional[exp.Expression]: 8020 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8021 if parser: 8022 return parser(self) 8023 return self._parse_as_command(self._prev) 8024 8025 def _parse_set_item_assignment( 8026 self, kind: t.Optional[str] = None 8027 ) -> t.Optional[exp.Expression]: 8028 index = self._index 8029 8030 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8031 return self._parse_set_transaction(global_=kind == "GLOBAL") 8032 8033 left = self._parse_primary() or self._parse_column() 8034 assignment_delimiter = self._match_texts(("=", "TO")) 8035 8036 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8037 self._retreat(index) 8038 return None 8039 8040 right = self._parse_statement() or self._parse_id_var() 8041 if isinstance(right, (exp.Column, exp.Identifier)): 8042 right = exp.var(right.name) 8043 8044 this = self.expression(exp.EQ, this=left, expression=right) 8045 return self.expression(exp.SetItem, this=this, kind=kind) 8046 8047 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8048 self._match_text_seq("TRANSACTION") 8049 characteristics = self._parse_csv( 8050 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8051 ) 8052 return self.expression( 8053 exp.SetItem, 8054 expressions=characteristics, 8055 kind="TRANSACTION", 8056 **{"global": global_}, # type: ignore 8057 ) 8058 8059 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8060 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8061 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8062 8063 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8064 index = self._index 8065 set_ = self.expression( 8066 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8067 ) 8068 8069 if self._curr: 8070 self._retreat(index) 8071 return self._parse_as_command(self._prev) 8072 8073 return set_ 8074 8075 def _parse_var_from_options( 8076 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8077 ) -> t.Optional[exp.Var]: 8078 start = self._curr 8079 if not start: 8080 return None 8081 8082 option = start.text.upper() 8083 continuations = options.get(option) 8084 8085 index = self._index 8086 self._advance() 8087 for keywords in continuations or []: 8088 if isinstance(keywords, str): 8089 keywords = (keywords,) 8090 8091 if self._match_text_seq(*keywords): 8092 option = f"{option} {' '.join(keywords)}" 8093 break 8094 else: 8095 if continuations or continuations is None: 8096 if raise_unmatched: 8097 self.raise_error(f"Unknown option {option}") 8098 8099 self._retreat(index) 8100 return None 8101 8102 return exp.var(option) 8103 8104 def _parse_as_command(self, start: Token) -> exp.Command: 8105 while self._curr: 8106 self._advance() 8107 text = self._find_sql(start, self._prev) 8108 size = len(start.text) 8109 self._warn_unsupported() 8110 return exp.Command(this=text[:size], expression=text[size:]) 8111 8112 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8113 settings = [] 8114 8115 self._match_l_paren() 8116 kind = self._parse_id_var() 8117 8118 if self._match(TokenType.L_PAREN): 8119 while True: 8120 key = self._parse_id_var() 8121 value = self._parse_primary() 8122 if not key and value is None: 8123 break 8124 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8125 self._match(TokenType.R_PAREN) 8126 8127 self._match_r_paren() 8128 8129 return self.expression( 8130 exp.DictProperty, 8131 this=this, 8132 kind=kind.this if kind else None, 8133 settings=settings, 8134 ) 8135 8136 def _parse_dict_range(self, this: str) -> exp.DictRange: 8137 self._match_l_paren() 8138 has_min = self._match_text_seq("MIN") 8139 if has_min: 8140 min = self._parse_var() or self._parse_primary() 8141 self._match_text_seq("MAX") 8142 max = self._parse_var() or self._parse_primary() 8143 else: 8144 max = self._parse_var() or self._parse_primary() 8145 min = exp.Literal.number(0) 8146 self._match_r_paren() 8147 return self.expression(exp.DictRange, this=this, min=min, max=max) 8148 8149 def _parse_comprehension( 8150 self, this: t.Optional[exp.Expression] 8151 ) -> t.Optional[exp.Comprehension]: 8152 index = self._index 8153 expression = self._parse_column() 8154 if not self._match(TokenType.IN): 8155 self._retreat(index - 1) 8156 return None 8157 iterator = self._parse_column() 8158 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8159 return self.expression( 8160 exp.Comprehension, 8161 this=this, 8162 expression=expression, 8163 iterator=iterator, 8164 condition=condition, 8165 ) 8166 8167 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8168 if self._match(TokenType.HEREDOC_STRING): 8169 return self.expression(exp.Heredoc, this=self._prev.text) 8170 8171 if not self._match_text_seq("$"): 8172 return None 8173 8174 tags = ["$"] 8175 tag_text = None 8176 8177 if self._is_connected(): 8178 self._advance() 8179 tags.append(self._prev.text.upper()) 8180 else: 8181 self.raise_error("No closing $ found") 8182 8183 if tags[-1] != "$": 8184 if self._is_connected() and self._match_text_seq("$"): 8185 tag_text = tags[-1] 8186 tags.append("$") 8187 else: 8188 self.raise_error("No closing $ found") 8189 8190 heredoc_start = self._curr 8191 8192 while self._curr: 8193 if self._match_text_seq(*tags, advance=False): 8194 this = self._find_sql(heredoc_start, self._prev) 8195 self._advance(len(tags)) 8196 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8197 8198 self._advance() 8199 8200 self.raise_error(f"No closing {''.join(tags)} found") 8201 return None 8202 8203 def _find_parser( 8204 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8205 ) -> t.Optional[t.Callable]: 8206 if not self._curr: 8207 return None 8208 8209 index = self._index 8210 this = [] 8211 while True: 8212 # The current token might be multiple words 8213 curr = self._curr.text.upper() 8214 key = curr.split(" ") 8215 this.append(curr) 8216 8217 self._advance() 8218 result, trie = in_trie(trie, key) 8219 if result == TrieResult.FAILED: 8220 break 8221 8222 if result == TrieResult.EXISTS: 8223 subparser = parsers[" ".join(this)] 8224 return subparser 8225 8226 self._retreat(index) 8227 return None 8228 8229 def _match(self, token_type, advance=True, expression=None): 8230 if not self._curr: 8231 return None 8232 8233 if self._curr.token_type == token_type: 8234 if advance: 8235 self._advance() 8236 self._add_comments(expression) 8237 return True 8238 8239 return None 8240 8241 def _match_set(self, types, advance=True): 8242 if not self._curr: 8243 return None 8244 8245 if self._curr.token_type in types: 8246 if advance: 8247 self._advance() 8248 return True 8249 8250 return None 8251 8252 def _match_pair(self, token_type_a, token_type_b, advance=True): 8253 if not self._curr or not self._next: 8254 return None 8255 8256 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8257 if advance: 8258 self._advance(2) 8259 return True 8260 8261 return None 8262 8263 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8264 if not self._match(TokenType.L_PAREN, expression=expression): 8265 self.raise_error("Expecting (") 8266 8267 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8268 if not self._match(TokenType.R_PAREN, expression=expression): 8269 self.raise_error("Expecting )") 8270 8271 def _match_texts(self, texts, advance=True): 8272 if ( 8273 self._curr 8274 and self._curr.token_type != TokenType.STRING 8275 and self._curr.text.upper() in texts 8276 ): 8277 if advance: 8278 self._advance() 8279 return True 8280 return None 8281 8282 def _match_text_seq(self, *texts, advance=True): 8283 index = self._index 8284 for text in texts: 8285 if ( 8286 self._curr 8287 and self._curr.token_type != TokenType.STRING 8288 and self._curr.text.upper() == text 8289 ): 8290 self._advance() 8291 else: 8292 self._retreat(index) 8293 return None 8294 8295 if not advance: 8296 self._retreat(index) 8297 8298 return True 8299 8300 def _replace_lambda( 8301 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8302 ) -> t.Optional[exp.Expression]: 8303 if not node: 8304 return node 8305 8306 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8307 8308 for column in node.find_all(exp.Column): 8309 typ = lambda_types.get(column.parts[0].name) 8310 if typ is not None: 8311 dot_or_id = column.to_dot() if column.table else column.this 8312 8313 if typ: 8314 dot_or_id = self.expression( 8315 exp.Cast, 8316 this=dot_or_id, 8317 to=typ, 8318 ) 8319 8320 parent = column.parent 8321 8322 while isinstance(parent, exp.Dot): 8323 if not isinstance(parent.parent, exp.Dot): 8324 parent.replace(dot_or_id) 8325 break 8326 parent = parent.parent 8327 else: 8328 if column is node: 8329 node = dot_or_id 8330 else: 8331 column.replace(dot_or_id) 8332 return node 8333 8334 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8335 start = self._prev 8336 8337 # Not to be confused with TRUNCATE(number, decimals) function call 8338 if self._match(TokenType.L_PAREN): 8339 self._retreat(self._index - 2) 8340 return self._parse_function() 8341 8342 # Clickhouse supports TRUNCATE DATABASE as well 8343 is_database = self._match(TokenType.DATABASE) 8344 8345 self._match(TokenType.TABLE) 8346 8347 exists = self._parse_exists(not_=False) 8348 8349 expressions = self._parse_csv( 8350 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8351 ) 8352 8353 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8354 8355 if self._match_text_seq("RESTART", "IDENTITY"): 8356 identity = "RESTART" 8357 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8358 identity = "CONTINUE" 8359 else: 8360 identity = None 8361 8362 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8363 option = self._prev.text 8364 else: 8365 option = None 8366 8367 partition = self._parse_partition() 8368 8369 # Fallback case 8370 if self._curr: 8371 return self._parse_as_command(start) 8372 8373 return self.expression( 8374 exp.TruncateTable, 8375 expressions=expressions, 8376 is_database=is_database, 8377 exists=exists, 8378 cluster=cluster, 8379 identity=identity, 8380 option=option, 8381 partition=partition, 8382 ) 8383 8384 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8385 this = self._parse_ordered(self._parse_opclass) 8386 8387 if not self._match(TokenType.WITH): 8388 return this 8389 8390 op = self._parse_var(any_token=True) 8391 8392 return self.expression(exp.WithOperator, this=this, op=op) 8393 8394 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8395 self._match(TokenType.EQ) 8396 self._match(TokenType.L_PAREN) 8397 8398 opts: t.List[t.Optional[exp.Expression]] = [] 8399 option: exp.Expression | None 8400 while self._curr and not self._match(TokenType.R_PAREN): 8401 if self._match_text_seq("FORMAT_NAME", "="): 8402 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8403 option = self._parse_format_name() 8404 else: 8405 option = self._parse_property() 8406 8407 if option is None: 8408 self.raise_error("Unable to parse option") 8409 break 8410 8411 opts.append(option) 8412 8413 return opts 8414 8415 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8416 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8417 8418 options = [] 8419 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8420 option = self._parse_var(any_token=True) 8421 prev = self._prev.text.upper() 8422 8423 # Different dialects might separate options and values by white space, "=" and "AS" 8424 self._match(TokenType.EQ) 8425 self._match(TokenType.ALIAS) 8426 8427 param = self.expression(exp.CopyParameter, this=option) 8428 8429 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8430 TokenType.L_PAREN, advance=False 8431 ): 8432 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8433 param.set("expressions", self._parse_wrapped_options()) 8434 elif prev == "FILE_FORMAT": 8435 # T-SQL's external file format case 8436 param.set("expression", self._parse_field()) 8437 else: 8438 param.set("expression", self._parse_unquoted_field()) 8439 8440 options.append(param) 8441 self._match(sep) 8442 8443 return options 8444 8445 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8446 expr = self.expression(exp.Credentials) 8447 8448 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8449 expr.set("storage", self._parse_field()) 8450 if self._match_text_seq("CREDENTIALS"): 8451 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8452 creds = ( 8453 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8454 ) 8455 expr.set("credentials", creds) 8456 if self._match_text_seq("ENCRYPTION"): 8457 expr.set("encryption", self._parse_wrapped_options()) 8458 if self._match_text_seq("IAM_ROLE"): 8459 expr.set("iam_role", self._parse_field()) 8460 if self._match_text_seq("REGION"): 8461 expr.set("region", self._parse_field()) 8462 8463 return expr 8464 8465 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8466 return self._parse_field() 8467 8468 def _parse_copy(self) -> exp.Copy | exp.Command: 8469 start = self._prev 8470 8471 self._match(TokenType.INTO) 8472 8473 this = ( 8474 self._parse_select(nested=True, parse_subquery_alias=False) 8475 if self._match(TokenType.L_PAREN, advance=False) 8476 else self._parse_table(schema=True) 8477 ) 8478 8479 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8480 8481 files = self._parse_csv(self._parse_file_location) 8482 if self._match(TokenType.EQ, advance=False): 8483 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8484 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8485 # list via `_parse_wrapped(..)` below. 8486 self._advance(-1) 8487 files = [] 8488 8489 credentials = self._parse_credentials() 8490 8491 self._match_text_seq("WITH") 8492 8493 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8494 8495 # Fallback case 8496 if self._curr: 8497 return self._parse_as_command(start) 8498 8499 return self.expression( 8500 exp.Copy, 8501 this=this, 8502 kind=kind, 8503 credentials=credentials, 8504 files=files, 8505 params=params, 8506 ) 8507 8508 def _parse_normalize(self) -> exp.Normalize: 8509 return self.expression( 8510 exp.Normalize, 8511 this=self._parse_bitwise(), 8512 form=self._match(TokenType.COMMA) and self._parse_var(), 8513 ) 8514 8515 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8516 args = self._parse_csv(lambda: self._parse_lambda()) 8517 8518 this = seq_get(args, 0) 8519 decimals = seq_get(args, 1) 8520 8521 return expr_type( 8522 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8523 ) 8524 8525 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8526 star_token = self._prev 8527 8528 if self._match_text_seq("COLUMNS", "(", advance=False): 8529 this = self._parse_function() 8530 if isinstance(this, exp.Columns): 8531 this.set("unpack", True) 8532 return this 8533 8534 return self.expression( 8535 exp.Star, 8536 **{ # type: ignore 8537 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8538 "replace": self._parse_star_op("REPLACE"), 8539 "rename": self._parse_star_op("RENAME"), 8540 }, 8541 ).update_positions(star_token) 8542 8543 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8544 privilege_parts = [] 8545 8546 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8547 # (end of privilege list) or L_PAREN (start of column list) are met 8548 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8549 privilege_parts.append(self._curr.text.upper()) 8550 self._advance() 8551 8552 this = exp.var(" ".join(privilege_parts)) 8553 expressions = ( 8554 self._parse_wrapped_csv(self._parse_column) 8555 if self._match(TokenType.L_PAREN, advance=False) 8556 else None 8557 ) 8558 8559 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8560 8561 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8562 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8563 principal = self._parse_id_var() 8564 8565 if not principal: 8566 return None 8567 8568 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8569 8570 def _parse_grant_revoke_common( 8571 self, 8572 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8573 privileges = self._parse_csv(self._parse_grant_privilege) 8574 8575 self._match(TokenType.ON) 8576 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8577 8578 # Attempt to parse the securable e.g. MySQL allows names 8579 # such as "foo.*", "*.*" which are not easily parseable yet 8580 securable = self._try_parse(self._parse_table_parts) 8581 8582 return privileges, kind, securable 8583 8584 def _parse_grant(self) -> exp.Grant | exp.Command: 8585 start = self._prev 8586 8587 privileges, kind, securable = self._parse_grant_revoke_common() 8588 8589 if not securable or not self._match_text_seq("TO"): 8590 return self._parse_as_command(start) 8591 8592 principals = self._parse_csv(self._parse_grant_principal) 8593 8594 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8595 8596 if self._curr: 8597 return self._parse_as_command(start) 8598 8599 return self.expression( 8600 exp.Grant, 8601 privileges=privileges, 8602 kind=kind, 8603 securable=securable, 8604 principals=principals, 8605 grant_option=grant_option, 8606 ) 8607 8608 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8609 start = self._prev 8610 8611 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8612 8613 privileges, kind, securable = self._parse_grant_revoke_common() 8614 8615 if not securable or not self._match_text_seq("FROM"): 8616 return self._parse_as_command(start) 8617 8618 principals = self._parse_csv(self._parse_grant_principal) 8619 8620 cascade = None 8621 if self._match_texts(("CASCADE", "RESTRICT")): 8622 cascade = self._prev.text.upper() 8623 8624 if self._curr: 8625 return self._parse_as_command(start) 8626 8627 return self.expression( 8628 exp.Revoke, 8629 privileges=privileges, 8630 kind=kind, 8631 securable=securable, 8632 principals=principals, 8633 grant_option=grant_option, 8634 cascade=cascade, 8635 ) 8636 8637 def _parse_overlay(self) -> exp.Overlay: 8638 return self.expression( 8639 exp.Overlay, 8640 **{ # type: ignore 8641 "this": self._parse_bitwise(), 8642 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8643 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8644 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8645 }, 8646 ) 8647 8648 def _parse_format_name(self) -> exp.Property: 8649 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8650 # for FILE_FORMAT = <format_name> 8651 return self.expression( 8652 exp.Property, 8653 this=exp.var("FORMAT_NAME"), 8654 value=self._parse_string() or self._parse_table_parts(), 8655 ) 8656 8657 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8658 args: t.List[exp.Expression] = [] 8659 8660 if self._match(TokenType.DISTINCT): 8661 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8662 self._match(TokenType.COMMA) 8663 8664 args.extend(self._parse_function_args()) 8665 8666 return self.expression( 8667 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8668 ) 8669 8670 def _identifier_expression( 8671 self, token: t.Optional[Token] = None, **kwargs: t.Any 8672 ) -> exp.Identifier: 8673 token = token or self._prev 8674 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8675 expression.update_positions(token) 8676 return expression 8677 8678 def _build_pipe_cte( 8679 self, 8680 query: exp.Query, 8681 expressions: t.List[exp.Expression], 8682 alias_cte: t.Optional[exp.TableAlias] = None, 8683 ) -> exp.Select: 8684 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8685 if alias_cte: 8686 new_cte = alias_cte 8687 else: 8688 self._pipe_cte_counter += 1 8689 new_cte = f"__tmp{self._pipe_cte_counter}" 8690 8691 with_ = query.args.get("with") 8692 ctes = with_.pop() if with_ else None 8693 8694 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8695 if ctes: 8696 new_select.set("with", ctes) 8697 8698 return new_select.with_(new_cte, as_=query, copy=False) 8699 8700 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8701 select = self._parse_select(consume_pipe=False) 8702 if not select: 8703 return query 8704 8705 return self._build_pipe_cte( 8706 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8707 ) 8708 8709 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8710 limit = self._parse_limit() 8711 offset = self._parse_offset() 8712 if limit: 8713 curr_limit = query.args.get("limit", limit) 8714 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8715 query.limit(limit, copy=False) 8716 if offset: 8717 curr_offset = query.args.get("offset") 8718 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8719 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8720 8721 return query 8722 8723 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8724 this = self._parse_assignment() 8725 if self._match_text_seq("GROUP", "AND", advance=False): 8726 return this 8727 8728 this = self._parse_alias(this) 8729 8730 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8731 return self._parse_ordered(lambda: this) 8732 8733 return this 8734 8735 def _parse_pipe_syntax_aggregate_group_order_by( 8736 self, query: exp.Select, group_by_exists: bool = True 8737 ) -> exp.Select: 8738 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8739 aggregates_or_groups, orders = [], [] 8740 for element in expr: 8741 if isinstance(element, exp.Ordered): 8742 this = element.this 8743 if isinstance(this, exp.Alias): 8744 element.set("this", this.args["alias"]) 8745 orders.append(element) 8746 else: 8747 this = element 8748 aggregates_or_groups.append(this) 8749 8750 if group_by_exists: 8751 query.select(*aggregates_or_groups, copy=False).group_by( 8752 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8753 copy=False, 8754 ) 8755 else: 8756 query.select(*aggregates_or_groups, append=False, copy=False) 8757 8758 if orders: 8759 return query.order_by(*orders, append=False, copy=False) 8760 8761 return query 8762 8763 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8764 self._match_text_seq("AGGREGATE") 8765 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8766 8767 if self._match(TokenType.GROUP_BY) or ( 8768 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8769 ): 8770 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8771 8772 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8773 8774 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8775 first_setop = self.parse_set_operation(this=query) 8776 if not first_setop: 8777 return None 8778 8779 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8780 expr = self._parse_paren() 8781 return expr.assert_is(exp.Subquery).unnest() if expr else None 8782 8783 first_setop.this.pop() 8784 8785 setops = [ 8786 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8787 *self._parse_csv(_parse_and_unwrap_query), 8788 ] 8789 8790 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8791 with_ = query.args.get("with") 8792 ctes = with_.pop() if with_ else None 8793 8794 if isinstance(first_setop, exp.Union): 8795 query = query.union(*setops, copy=False, **first_setop.args) 8796 elif isinstance(first_setop, exp.Except): 8797 query = query.except_(*setops, copy=False, **first_setop.args) 8798 else: 8799 query = query.intersect(*setops, copy=False, **first_setop.args) 8800 8801 query.set("with", ctes) 8802 8803 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8804 8805 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8806 join = self._parse_join() 8807 if not join: 8808 return None 8809 8810 if isinstance(query, exp.Select): 8811 return query.join(join, copy=False) 8812 8813 return query 8814 8815 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8816 pivots = self._parse_pivots() 8817 if not pivots: 8818 return query 8819 8820 from_ = query.args.get("from") 8821 if from_: 8822 from_.this.set("pivots", pivots) 8823 8824 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8825 8826 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8827 self._match_text_seq("EXTEND") 8828 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8829 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8830 8831 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8832 sample = self._parse_table_sample() 8833 8834 with_ = query.args.get("with") 8835 if with_: 8836 with_.expressions[-1].this.set("sample", sample) 8837 else: 8838 query.set("sample", sample) 8839 8840 return query 8841 8842 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8843 if isinstance(query, exp.Subquery): 8844 query = exp.select("*").from_(query, copy=False) 8845 8846 if not query.args.get("from"): 8847 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8848 8849 while self._match(TokenType.PIPE_GT): 8850 start = self._curr 8851 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8852 if not parser: 8853 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8854 # keywords, making it tricky to disambiguate them without lookahead. The approach 8855 # here is to try and parse a set operation and if that fails, then try to parse a 8856 # join operator. If that fails as well, then the operator is not supported. 8857 parsed_query = self._parse_pipe_syntax_set_operator(query) 8858 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8859 if not parsed_query: 8860 self._retreat(start) 8861 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8862 break 8863 query = parsed_query 8864 else: 8865 query = parser(self, query) 8866 8867 return query 8868 8869 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8870 vars = self._parse_csv(self._parse_id_var) 8871 if not vars: 8872 return None 8873 8874 return self.expression( 8875 exp.DeclareItem, 8876 this=vars, 8877 kind=self._parse_types(), 8878 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8879 ) 8880 8881 def _parse_declare(self) -> exp.Declare | exp.Command: 8882 start = self._prev 8883 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8884 8885 if not expressions or self._curr: 8886 return self._parse_as_command(start) 8887 8888 return self.expression(exp.Declare, expressions=expressions) 8889 8890 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8891 exp_class = exp.Cast if strict else exp.TryCast 8892 8893 if exp_class == exp.TryCast: 8894 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8895 8896 return self.expression(exp_class, **kwargs) 8897 8898 def _parse_json_value(self) -> exp.JSONValue: 8899 this = self._parse_bitwise() 8900 self._match(TokenType.COMMA) 8901 path = self._parse_bitwise() 8902 8903 returning = self._match(TokenType.RETURNING) and self._parse_type() 8904 8905 return self.expression( 8906 exp.JSONValue, 8907 this=this, 8908 path=self.dialect.to_json_path(path), 8909 returning=returning, 8910 on_condition=self._parse_on_condition(), 8911 ) 8912 8913 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8914 def concat_exprs( 8915 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8916 ) -> exp.Expression: 8917 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8918 concat_exprs = [ 8919 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8920 ] 8921 node.set("expressions", concat_exprs) 8922 return node 8923 if len(exprs) == 1: 8924 return exprs[0] 8925 return self.expression(exp.Concat, expressions=args, safe=True) 8926 8927 args = self._parse_csv(self._parse_lambda) 8928 8929 if args: 8930 order = args[-1] if isinstance(args[-1], exp.Order) else None 8931 8932 if order: 8933 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8934 # remove 'expr' from exp.Order and add it back to args 8935 args[-1] = order.this 8936 order.set("this", concat_exprs(order.this, args)) 8937 8938 this = order or concat_exprs(args[0], args) 8939 else: 8940 this = None 8941 8942 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8943 8944 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset()
1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 )
Logs or raises any found errors, depending on the chosen error level setting.
1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4879 def parse_set_operation( 4880 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4881 ) -> t.Optional[exp.Expression]: 4882 start = self._index 4883 _, side_token, kind_token = self._parse_join_parts() 4884 4885 side = side_token.text if side_token else None 4886 kind = kind_token.text if kind_token else None 4887 4888 if not self._match_set(self.SET_OPERATIONS): 4889 self._retreat(start) 4890 return None 4891 4892 token_type = self._prev.token_type 4893 4894 if token_type == TokenType.UNION: 4895 operation: t.Type[exp.SetOperation] = exp.Union 4896 elif token_type == TokenType.EXCEPT: 4897 operation = exp.Except 4898 else: 4899 operation = exp.Intersect 4900 4901 comments = self._prev.comments 4902 4903 if self._match(TokenType.DISTINCT): 4904 distinct: t.Optional[bool] = True 4905 elif self._match(TokenType.ALL): 4906 distinct = False 4907 else: 4908 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4909 if distinct is None: 4910 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4911 4912 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4913 "STRICT", "CORRESPONDING" 4914 ) 4915 if self._match_text_seq("CORRESPONDING"): 4916 by_name = True 4917 if not side and not kind: 4918 kind = "INNER" 4919 4920 on_column_list = None 4921 if by_name and self._match_texts(("ON", "BY")): 4922 on_column_list = self._parse_wrapped_csv(self._parse_column) 4923 4924 expression = self._parse_select( 4925 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4926 ) 4927 4928 return self.expression( 4929 operation, 4930 comments=comments, 4931 this=this, 4932 distinct=distinct, 4933 by_name=by_name, 4934 expression=expression, 4935 side=side, 4936 kind=kind, 4937 on=on_column_list, 4938 )