sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 813 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 814 exp.Group: lambda self: self._parse_group(), 815 exp.Having: lambda self: self._parse_having(), 816 exp.Hint: lambda self: self._parse_hint_body(), 817 exp.Identifier: lambda self: self._parse_id_var(), 818 exp.Join: lambda self: self._parse_join(), 819 exp.Lambda: lambda self: self._parse_lambda(), 820 exp.Lateral: lambda self: self._parse_lateral(), 821 exp.Limit: lambda self: self._parse_limit(), 822 exp.Offset: lambda self: self._parse_offset(), 823 exp.Order: lambda self: self._parse_order(), 824 exp.Ordered: lambda self: self._parse_ordered(), 825 exp.Properties: lambda self: self._parse_properties(), 826 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 827 exp.Qualify: lambda self: self._parse_qualify(), 828 exp.Returning: lambda self: self._parse_returning(), 829 exp.Select: lambda self: self._parse_select(), 830 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 831 exp.Table: lambda self: self._parse_table_parts(), 832 exp.TableAlias: lambda self: self._parse_table_alias(), 833 exp.Tuple: lambda self: self._parse_value(values=False), 834 exp.Whens: lambda self: self._parse_when_matched(), 835 exp.Where: lambda self: self._parse_where(), 836 exp.Window: lambda self: self._parse_named_window(), 837 exp.With: lambda self: self._parse_with(), 838 "JOIN_TYPE": lambda self: self._parse_join_parts(), 839 } 840 841 STATEMENT_PARSERS = { 842 TokenType.ALTER: lambda self: self._parse_alter(), 843 TokenType.ANALYZE: lambda self: self._parse_analyze(), 844 TokenType.BEGIN: lambda self: self._parse_transaction(), 845 TokenType.CACHE: lambda self: self._parse_cache(), 846 TokenType.COMMENT: lambda self: self._parse_comment(), 847 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 848 TokenType.COPY: lambda self: self._parse_copy(), 849 TokenType.CREATE: lambda self: self._parse_create(), 850 TokenType.DELETE: lambda self: self._parse_delete(), 851 TokenType.DESC: lambda self: self._parse_describe(), 852 TokenType.DESCRIBE: lambda self: self._parse_describe(), 853 TokenType.DROP: lambda self: self._parse_drop(), 854 TokenType.GRANT: lambda self: self._parse_grant(), 855 TokenType.REVOKE: lambda self: self._parse_revoke(), 856 TokenType.INSERT: lambda self: self._parse_insert(), 857 TokenType.KILL: lambda self: self._parse_kill(), 858 TokenType.LOAD: lambda self: self._parse_load(), 859 TokenType.MERGE: lambda self: self._parse_merge(), 860 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 861 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 862 TokenType.REFRESH: lambda self: self._parse_refresh(), 863 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 864 TokenType.SET: lambda self: self._parse_set(), 865 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 866 TokenType.UNCACHE: lambda self: self._parse_uncache(), 867 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 868 TokenType.UPDATE: lambda self: self._parse_update(), 869 TokenType.USE: lambda self: self._parse_use(), 870 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 871 } 872 873 UNARY_PARSERS = { 874 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 875 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 876 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 877 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 878 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 879 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 880 } 881 882 STRING_PARSERS = { 883 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 884 exp.RawString, this=token.text 885 ), 886 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 887 exp.National, this=token.text 888 ), 889 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 890 TokenType.STRING: lambda self, token: self.expression( 891 exp.Literal, this=token.text, is_string=True 892 ), 893 TokenType.UNICODE_STRING: lambda self, token: self.expression( 894 exp.UnicodeString, 895 this=token.text, 896 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 897 ), 898 } 899 900 NUMERIC_PARSERS = { 901 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 902 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 903 TokenType.HEX_STRING: lambda self, token: self.expression( 904 exp.HexString, 905 this=token.text, 906 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 907 ), 908 TokenType.NUMBER: lambda self, token: self.expression( 909 exp.Literal, this=token.text, is_string=False 910 ), 911 } 912 913 PRIMARY_PARSERS = { 914 **STRING_PARSERS, 915 **NUMERIC_PARSERS, 916 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 917 TokenType.NULL: lambda self, _: self.expression(exp.Null), 918 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 919 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 920 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 921 TokenType.STAR: lambda self, _: self._parse_star_ops(), 922 } 923 924 PLACEHOLDER_PARSERS = { 925 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 926 TokenType.PARAMETER: lambda self: self._parse_parameter(), 927 TokenType.COLON: lambda self: ( 928 self.expression(exp.Placeholder, this=self._prev.text) 929 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 930 else None 931 ), 932 } 933 934 RANGE_PARSERS = { 935 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 936 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 937 TokenType.GLOB: binary_range_parser(exp.Glob), 938 TokenType.ILIKE: binary_range_parser(exp.ILike), 939 TokenType.IN: lambda self, this: self._parse_in(this), 940 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 941 TokenType.IS: lambda self, this: self._parse_is(this), 942 TokenType.LIKE: binary_range_parser(exp.Like), 943 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 944 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 945 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 946 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 947 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 948 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 949 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 950 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 951 } 952 953 PIPE_SYNTAX_TRANSFORM_PARSERS = { 954 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 955 "AS": lambda self, query: self._build_pipe_cte( 956 query, [exp.Star()], self._parse_table_alias() 957 ), 958 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 959 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 960 "ORDER BY": lambda self, query: query.order_by( 961 self._parse_order(), append=False, copy=False 962 ), 963 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 965 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 966 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 967 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 968 } 969 970 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 971 "ALLOWED_VALUES": lambda self: self.expression( 972 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 973 ), 974 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 975 "AUTO": lambda self: self._parse_auto_property(), 976 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 977 "BACKUP": lambda self: self.expression( 978 exp.BackupProperty, this=self._parse_var(any_token=True) 979 ), 980 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 981 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHECKSUM": lambda self: self._parse_checksum(), 984 "CLUSTER BY": lambda self: self._parse_cluster(), 985 "CLUSTERED": lambda self: self._parse_clustered_by(), 986 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 987 exp.CollateProperty, **kwargs 988 ), 989 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 990 "CONTAINS": lambda self: self._parse_contains_property(), 991 "COPY": lambda self: self._parse_copy_property(), 992 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 993 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 994 "DEFINER": lambda self: self._parse_definer(), 995 "DETERMINISTIC": lambda self: self.expression( 996 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 997 ), 998 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 999 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1000 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1001 "DISTKEY": lambda self: self._parse_distkey(), 1002 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1003 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1004 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1005 "ENVIRONMENT": lambda self: self.expression( 1006 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1007 ), 1008 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1009 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1010 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1011 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1012 "FREESPACE": lambda self: self._parse_freespace(), 1013 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1014 "HEAP": lambda self: self.expression(exp.HeapProperty), 1015 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1016 "IMMUTABLE": lambda self: self.expression( 1017 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1018 ), 1019 "INHERITS": lambda self: self.expression( 1020 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1021 ), 1022 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1023 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1024 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1025 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1026 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1027 "LIKE": lambda self: self._parse_create_like(), 1028 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1029 "LOCK": lambda self: self._parse_locking(), 1030 "LOCKING": lambda self: self._parse_locking(), 1031 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1032 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1033 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1034 "MODIFIES": lambda self: self._parse_modifies_property(), 1035 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1036 "NO": lambda self: self._parse_no_property(), 1037 "ON": lambda self: self._parse_on_property(), 1038 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1039 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1040 "PARTITION": lambda self: self._parse_partitioned_of(), 1041 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1044 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1045 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1046 "READS": lambda self: self._parse_reads_property(), 1047 "REMOTE": lambda self: self._parse_remote_with_connection(), 1048 "RETURNS": lambda self: self._parse_returns(), 1049 "STRICT": lambda self: self.expression(exp.StrictProperty), 1050 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1051 "ROW": lambda self: self._parse_row(), 1052 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1053 "SAMPLE": lambda self: self.expression( 1054 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1055 ), 1056 "SECURE": lambda self: self.expression(exp.SecureProperty), 1057 "SECURITY": lambda self: self._parse_security(), 1058 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1059 "SETTINGS": lambda self: self._parse_settings_property(), 1060 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1061 "SORTKEY": lambda self: self._parse_sortkey(), 1062 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1063 "STABLE": lambda self: self.expression( 1064 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1065 ), 1066 "STORED": lambda self: self._parse_stored(), 1067 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1068 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1069 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1070 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1071 "TO": lambda self: self._parse_to_table(), 1072 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1073 "TRANSFORM": lambda self: self.expression( 1074 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1075 ), 1076 "TTL": lambda self: self._parse_ttl(), 1077 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1078 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1079 "VOLATILE": lambda self: self._parse_volatile_property(), 1080 "WITH": lambda self: self._parse_with_property(), 1081 } 1082 1083 CONSTRAINT_PARSERS = { 1084 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1085 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1086 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1087 "CHARACTER SET": lambda self: self.expression( 1088 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1089 ), 1090 "CHECK": lambda self: self.expression( 1091 exp.CheckColumnConstraint, 1092 this=self._parse_wrapped(self._parse_assignment), 1093 enforced=self._match_text_seq("ENFORCED"), 1094 ), 1095 "COLLATE": lambda self: self.expression( 1096 exp.CollateColumnConstraint, 1097 this=self._parse_identifier() or self._parse_column(), 1098 ), 1099 "COMMENT": lambda self: self.expression( 1100 exp.CommentColumnConstraint, this=self._parse_string() 1101 ), 1102 "COMPRESS": lambda self: self._parse_compress(), 1103 "CLUSTERED": lambda self: self.expression( 1104 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "NONCLUSTERED": lambda self: self.expression( 1107 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1108 ), 1109 "DEFAULT": lambda self: self.expression( 1110 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1111 ), 1112 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1113 "EPHEMERAL": lambda self: self.expression( 1114 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1115 ), 1116 "EXCLUDE": lambda self: self.expression( 1117 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1118 ), 1119 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1120 "FORMAT": lambda self: self.expression( 1121 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1122 ), 1123 "GENERATED": lambda self: self._parse_generated_as_identity(), 1124 "IDENTITY": lambda self: self._parse_auto_increment(), 1125 "INLINE": lambda self: self._parse_inline(), 1126 "LIKE": lambda self: self._parse_create_like(), 1127 "NOT": lambda self: self._parse_not_constraint(), 1128 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1129 "ON": lambda self: ( 1130 self._match(TokenType.UPDATE) 1131 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1132 ) 1133 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1134 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1135 "PERIOD": lambda self: self._parse_period_for_system_time(), 1136 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1137 "REFERENCES": lambda self: self._parse_references(match=False), 1138 "TITLE": lambda self: self.expression( 1139 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1140 ), 1141 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1142 "UNIQUE": lambda self: self._parse_unique(), 1143 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1144 "WITH": lambda self: self.expression( 1145 exp.Properties, expressions=self._parse_wrapped_properties() 1146 ), 1147 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 } 1150 1151 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1152 if not self._match(TokenType.L_PAREN, advance=False): 1153 # Partitioning by bucket or truncate follows the syntax: 1154 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1155 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1156 self._retreat(self._index - 1) 1157 return None 1158 1159 klass = ( 1160 exp.PartitionedByBucket 1161 if self._prev.text.upper() == "BUCKET" 1162 else exp.PartitionByTruncate 1163 ) 1164 1165 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1166 this, expression = seq_get(args, 0), seq_get(args, 1) 1167 1168 if isinstance(this, exp.Literal): 1169 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1170 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1171 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1172 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1173 # 1174 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1175 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1176 this, expression = expression, this 1177 1178 return self.expression(klass, this=this, expression=expression) 1179 1180 ALTER_PARSERS = { 1181 "ADD": lambda self: self._parse_alter_table_add(), 1182 "AS": lambda self: self._parse_select(), 1183 "ALTER": lambda self: self._parse_alter_table_alter(), 1184 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1185 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1186 "DROP": lambda self: self._parse_alter_table_drop(), 1187 "RENAME": lambda self: self._parse_alter_table_rename(), 1188 "SET": lambda self: self._parse_alter_table_set(), 1189 "SWAP": lambda self: self.expression( 1190 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1191 ), 1192 } 1193 1194 ALTER_ALTER_PARSERS = { 1195 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1196 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1197 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1198 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1199 } 1200 1201 SCHEMA_UNNAMED_CONSTRAINTS = { 1202 "CHECK", 1203 "EXCLUDE", 1204 "FOREIGN KEY", 1205 "LIKE", 1206 "PERIOD", 1207 "PRIMARY KEY", 1208 "UNIQUE", 1209 "BUCKET", 1210 "TRUNCATE", 1211 } 1212 1213 NO_PAREN_FUNCTION_PARSERS = { 1214 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1215 "CASE": lambda self: self._parse_case(), 1216 "CONNECT_BY_ROOT": lambda self: self.expression( 1217 exp.ConnectByRoot, this=self._parse_column() 1218 ), 1219 "IF": lambda self: self._parse_if(), 1220 } 1221 1222 INVALID_FUNC_NAME_TOKENS = { 1223 TokenType.IDENTIFIER, 1224 TokenType.STRING, 1225 } 1226 1227 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1228 1229 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1230 1231 FUNCTION_PARSERS = { 1232 **{ 1233 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1234 }, 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1237 }, 1238 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1239 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1240 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1241 "DECODE": lambda self: self._parse_decode(), 1242 "EXTRACT": lambda self: self._parse_extract(), 1243 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1244 "GAP_FILL": lambda self: self._parse_gap_fill(), 1245 "JSON_OBJECT": lambda self: self._parse_json_object(), 1246 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1247 "JSON_TABLE": lambda self: self._parse_json_table(), 1248 "MATCH": lambda self: self._parse_match_against(), 1249 "NORMALIZE": lambda self: self._parse_normalize(), 1250 "OPENJSON": lambda self: self._parse_open_json(), 1251 "OVERLAY": lambda self: self._parse_overlay(), 1252 "POSITION": lambda self: self._parse_position(), 1253 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "STRING_AGG": lambda self: self._parse_string_agg(), 1255 "SUBSTRING": lambda self: self._parse_substring(), 1256 "TRIM": lambda self: self._parse_trim(), 1257 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1259 "XMLELEMENT": lambda self: self.expression( 1260 exp.XMLElement, 1261 this=self._match_text_seq("NAME") and self._parse_id_var(), 1262 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1263 ), 1264 "XMLTABLE": lambda self: self._parse_xml_table(), 1265 } 1266 1267 QUERY_MODIFIER_PARSERS = { 1268 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1269 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1270 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1271 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1272 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1273 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1274 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1275 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1276 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1277 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1278 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1279 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1280 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1281 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.CLUSTER_BY: lambda self: ( 1284 "cluster", 1285 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1286 ), 1287 TokenType.DISTRIBUTE_BY: lambda self: ( 1288 "distribute", 1289 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1290 ), 1291 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1292 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1293 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1294 } 1295 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1296 1297 SET_PARSERS = { 1298 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1299 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1300 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1301 "TRANSACTION": lambda self: self._parse_set_transaction(), 1302 } 1303 1304 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1305 1306 TYPE_LITERAL_PARSERS = { 1307 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1308 } 1309 1310 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1311 1312 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1313 1314 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1315 1316 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1317 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1318 "ISOLATION": ( 1319 ("LEVEL", "REPEATABLE", "READ"), 1320 ("LEVEL", "READ", "COMMITTED"), 1321 ("LEVEL", "READ", "UNCOMITTED"), 1322 ("LEVEL", "SERIALIZABLE"), 1323 ), 1324 "READ": ("WRITE", "ONLY"), 1325 } 1326 1327 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1328 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1329 ) 1330 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1331 1332 CREATE_SEQUENCE: OPTIONS_TYPE = { 1333 "SCALE": ("EXTEND", "NOEXTEND"), 1334 "SHARD": ("EXTEND", "NOEXTEND"), 1335 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1336 **dict.fromkeys( 1337 ( 1338 "SESSION", 1339 "GLOBAL", 1340 "KEEP", 1341 "NOKEEP", 1342 "ORDER", 1343 "NOORDER", 1344 "NOCACHE", 1345 "CYCLE", 1346 "NOCYCLE", 1347 "NOMINVALUE", 1348 "NOMAXVALUE", 1349 "NOSCALE", 1350 "NOSHARD", 1351 ), 1352 tuple(), 1353 ), 1354 } 1355 1356 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1357 1358 USABLES: OPTIONS_TYPE = dict.fromkeys( 1359 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1360 ) 1361 1362 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1363 1364 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1365 "TYPE": ("EVOLUTION",), 1366 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1367 } 1368 1369 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1370 1371 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1372 1373 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1374 "NOT": ("ENFORCED",), 1375 "MATCH": ( 1376 "FULL", 1377 "PARTIAL", 1378 "SIMPLE", 1379 ), 1380 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1381 "USING": ( 1382 "BTREE", 1383 "HASH", 1384 ), 1385 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1386 } 1387 1388 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1389 "NO": ("OTHERS",), 1390 "CURRENT": ("ROW",), 1391 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1392 } 1393 1394 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1395 1396 CLONE_KEYWORDS = {"CLONE", "COPY"} 1397 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1398 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1399 1400 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1401 1402 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1403 1404 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1405 1406 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1407 1408 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1409 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1410 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1411 1412 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1413 1414 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1415 1416 ADD_CONSTRAINT_TOKENS = { 1417 TokenType.CONSTRAINT, 1418 TokenType.FOREIGN_KEY, 1419 TokenType.INDEX, 1420 TokenType.KEY, 1421 TokenType.PRIMARY_KEY, 1422 TokenType.UNIQUE, 1423 } 1424 1425 DISTINCT_TOKENS = {TokenType.DISTINCT} 1426 1427 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1428 1429 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1430 1431 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1432 1433 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1434 1435 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1436 1437 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1438 1439 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1440 1441 # The style options for the DESCRIBE statement 1442 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1443 1444 # The style options for the ANALYZE statement 1445 ANALYZE_STYLES = { 1446 "BUFFER_USAGE_LIMIT", 1447 "FULL", 1448 "LOCAL", 1449 "NO_WRITE_TO_BINLOG", 1450 "SAMPLE", 1451 "SKIP_LOCKED", 1452 "VERBOSE", 1453 } 1454 1455 ANALYZE_EXPRESSION_PARSERS = { 1456 "ALL": lambda self: self._parse_analyze_columns(), 1457 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1458 "DELETE": lambda self: self._parse_analyze_delete(), 1459 "DROP": lambda self: self._parse_analyze_histogram(), 1460 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1461 "LIST": lambda self: self._parse_analyze_list(), 1462 "PREDICATE": lambda self: self._parse_analyze_columns(), 1463 "UPDATE": lambda self: self._parse_analyze_histogram(), 1464 "VALIDATE": lambda self: self._parse_analyze_validate(), 1465 } 1466 1467 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1468 1469 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1470 1471 OPERATION_MODIFIERS: t.Set[str] = set() 1472 1473 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1474 1475 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1476 1477 STRICT_CAST = True 1478 1479 PREFIXED_PIVOT_COLUMNS = False 1480 IDENTIFY_PIVOT_STRINGS = False 1481 1482 LOG_DEFAULTS_TO_LN = False 1483 1484 # Whether the table sample clause expects CSV syntax 1485 TABLESAMPLE_CSV = False 1486 1487 # The default method used for table sampling 1488 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1489 1490 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1491 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1492 1493 # Whether the TRIM function expects the characters to trim as its first argument 1494 TRIM_PATTERN_FIRST = False 1495 1496 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1497 STRING_ALIASES = False 1498 1499 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1500 MODIFIERS_ATTACHED_TO_SET_OP = True 1501 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1502 1503 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1504 NO_PAREN_IF_COMMANDS = True 1505 1506 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1507 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1508 1509 # Whether the `:` operator is used to extract a value from a VARIANT column 1510 COLON_IS_VARIANT_EXTRACT = False 1511 1512 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1513 # If this is True and '(' is not found, the keyword will be treated as an identifier 1514 VALUES_FOLLOWED_BY_PAREN = True 1515 1516 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1517 SUPPORTS_IMPLICIT_UNNEST = False 1518 1519 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1520 INTERVAL_SPANS = True 1521 1522 # Whether a PARTITION clause can follow a table reference 1523 SUPPORTS_PARTITION_SELECTION = False 1524 1525 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1526 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1527 1528 # Whether the 'AS' keyword is optional in the CTE definition syntax 1529 OPTIONAL_ALIAS_TOKEN_CTE = True 1530 1531 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1532 ALTER_RENAME_REQUIRES_COLUMN = True 1533 1534 # Whether Alter statements are allowed to contain Partition specifications 1535 ALTER_TABLE_PARTITIONS = False 1536 1537 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1538 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1539 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1540 # as BigQuery, where all joins have the same precedence. 1541 JOINS_HAVE_EQUAL_PRECEDENCE = False 1542 1543 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1544 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1545 1546 # Whether map literals support arbitrary expressions as keys. 1547 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1548 # When False, keys are typically restricted to identifiers. 1549 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1550 1551 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1552 # is true for Snowflake but not for BigQuery which can also process strings 1553 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1554 1555 # Dialects like Databricks support JOINS without join criteria 1556 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1557 ADD_JOIN_ON_TRUE = False 1558 1559 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1560 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1561 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1562 1563 __slots__ = ( 1564 "error_level", 1565 "error_message_context", 1566 "max_errors", 1567 "dialect", 1568 "sql", 1569 "errors", 1570 "_tokens", 1571 "_index", 1572 "_curr", 1573 "_next", 1574 "_prev", 1575 "_prev_comments", 1576 "_pipe_cte_counter", 1577 ) 1578 1579 # Autofilled 1580 SHOW_TRIE: t.Dict = {} 1581 SET_TRIE: t.Dict = {} 1582 1583 def __init__( 1584 self, 1585 error_level: t.Optional[ErrorLevel] = None, 1586 error_message_context: int = 100, 1587 max_errors: int = 3, 1588 dialect: DialectType = None, 1589 ): 1590 from sqlglot.dialects import Dialect 1591 1592 self.error_level = error_level or ErrorLevel.IMMEDIATE 1593 self.error_message_context = error_message_context 1594 self.max_errors = max_errors 1595 self.dialect = Dialect.get_or_raise(dialect) 1596 self.reset() 1597 1598 def reset(self): 1599 self.sql = "" 1600 self.errors = [] 1601 self._tokens = [] 1602 self._index = 0 1603 self._curr = None 1604 self._next = None 1605 self._prev = None 1606 self._prev_comments = None 1607 self._pipe_cte_counter = 0 1608 1609 def parse( 1610 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1611 ) -> t.List[t.Optional[exp.Expression]]: 1612 """ 1613 Parses a list of tokens and returns a list of syntax trees, one tree 1614 per parsed SQL statement. 1615 1616 Args: 1617 raw_tokens: The list of tokens. 1618 sql: The original SQL string, used to produce helpful debug messages. 1619 1620 Returns: 1621 The list of the produced syntax trees. 1622 """ 1623 return self._parse( 1624 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1625 ) 1626 1627 def parse_into( 1628 self, 1629 expression_types: exp.IntoType, 1630 raw_tokens: t.List[Token], 1631 sql: t.Optional[str] = None, 1632 ) -> t.List[t.Optional[exp.Expression]]: 1633 """ 1634 Parses a list of tokens into a given Expression type. If a collection of Expression 1635 types is given instead, this method will try to parse the token list into each one 1636 of them, stopping at the first for which the parsing succeeds. 1637 1638 Args: 1639 expression_types: The expression type(s) to try and parse the token list into. 1640 raw_tokens: The list of tokens. 1641 sql: The original SQL string, used to produce helpful debug messages. 1642 1643 Returns: 1644 The target Expression. 1645 """ 1646 errors = [] 1647 for expression_type in ensure_list(expression_types): 1648 parser = self.EXPRESSION_PARSERS.get(expression_type) 1649 if not parser: 1650 raise TypeError(f"No parser registered for {expression_type}") 1651 1652 try: 1653 return self._parse(parser, raw_tokens, sql) 1654 except ParseError as e: 1655 e.errors[0]["into_expression"] = expression_type 1656 errors.append(e) 1657 1658 raise ParseError( 1659 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1660 errors=merge_errors(errors), 1661 ) from errors[-1] 1662 1663 def _parse( 1664 self, 1665 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1666 raw_tokens: t.List[Token], 1667 sql: t.Optional[str] = None, 1668 ) -> t.List[t.Optional[exp.Expression]]: 1669 self.reset() 1670 self.sql = sql or "" 1671 1672 total = len(raw_tokens) 1673 chunks: t.List[t.List[Token]] = [[]] 1674 1675 for i, token in enumerate(raw_tokens): 1676 if token.token_type == TokenType.SEMICOLON: 1677 if token.comments: 1678 chunks.append([token]) 1679 1680 if i < total - 1: 1681 chunks.append([]) 1682 else: 1683 chunks[-1].append(token) 1684 1685 expressions = [] 1686 1687 for tokens in chunks: 1688 self._index = -1 1689 self._tokens = tokens 1690 self._advance() 1691 1692 expressions.append(parse_method(self)) 1693 1694 if self._index < len(self._tokens): 1695 self.raise_error("Invalid expression / Unexpected token") 1696 1697 self.check_errors() 1698 1699 return expressions 1700 1701 def check_errors(self) -> None: 1702 """Logs or raises any found errors, depending on the chosen error level setting.""" 1703 if self.error_level == ErrorLevel.WARN: 1704 for error in self.errors: 1705 logger.error(str(error)) 1706 elif self.error_level == ErrorLevel.RAISE and self.errors: 1707 raise ParseError( 1708 concat_messages(self.errors, self.max_errors), 1709 errors=merge_errors(self.errors), 1710 ) 1711 1712 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1713 """ 1714 Appends an error in the list of recorded errors or raises it, depending on the chosen 1715 error level setting. 1716 """ 1717 token = token or self._curr or self._prev or Token.string("") 1718 start = token.start 1719 end = token.end + 1 1720 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1721 highlight = self.sql[start:end] 1722 end_context = self.sql[end : end + self.error_message_context] 1723 1724 error = ParseError.new( 1725 f"{message}. Line {token.line}, Col: {token.col}.\n" 1726 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1727 description=message, 1728 line=token.line, 1729 col=token.col, 1730 start_context=start_context, 1731 highlight=highlight, 1732 end_context=end_context, 1733 ) 1734 1735 if self.error_level == ErrorLevel.IMMEDIATE: 1736 raise error 1737 1738 self.errors.append(error) 1739 1740 def expression( 1741 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1742 ) -> E: 1743 """ 1744 Creates a new, validated Expression. 1745 1746 Args: 1747 exp_class: The expression class to instantiate. 1748 comments: An optional list of comments to attach to the expression. 1749 kwargs: The arguments to set for the expression along with their respective values. 1750 1751 Returns: 1752 The target expression. 1753 """ 1754 instance = exp_class(**kwargs) 1755 instance.add_comments(comments) if comments else self._add_comments(instance) 1756 return self.validate_expression(instance) 1757 1758 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1759 if expression and self._prev_comments: 1760 expression.add_comments(self._prev_comments) 1761 self._prev_comments = None 1762 1763 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1764 """ 1765 Validates an Expression, making sure that all its mandatory arguments are set. 1766 1767 Args: 1768 expression: The expression to validate. 1769 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1770 1771 Returns: 1772 The validated expression. 1773 """ 1774 if self.error_level != ErrorLevel.IGNORE: 1775 for error_message in expression.error_messages(args): 1776 self.raise_error(error_message) 1777 1778 return expression 1779 1780 def _find_sql(self, start: Token, end: Token) -> str: 1781 return self.sql[start.start : end.end + 1] 1782 1783 def _is_connected(self) -> bool: 1784 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1785 1786 def _advance(self, times: int = 1) -> None: 1787 self._index += times 1788 self._curr = seq_get(self._tokens, self._index) 1789 self._next = seq_get(self._tokens, self._index + 1) 1790 1791 if self._index > 0: 1792 self._prev = self._tokens[self._index - 1] 1793 self._prev_comments = self._prev.comments 1794 else: 1795 self._prev = None 1796 self._prev_comments = None 1797 1798 def _retreat(self, index: int) -> None: 1799 if index != self._index: 1800 self._advance(index - self._index) 1801 1802 def _warn_unsupported(self) -> None: 1803 if len(self._tokens) <= 1: 1804 return 1805 1806 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1807 # interested in emitting a warning for the one being currently processed. 1808 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1809 1810 logger.warning( 1811 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1812 ) 1813 1814 def _parse_command(self) -> exp.Command: 1815 self._warn_unsupported() 1816 return self.expression( 1817 exp.Command, 1818 comments=self._prev_comments, 1819 this=self._prev.text.upper(), 1820 expression=self._parse_string(), 1821 ) 1822 1823 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1824 """ 1825 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1826 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1827 solve this by setting & resetting the parser state accordingly 1828 """ 1829 index = self._index 1830 error_level = self.error_level 1831 1832 self.error_level = ErrorLevel.IMMEDIATE 1833 try: 1834 this = parse_method() 1835 except ParseError: 1836 this = None 1837 finally: 1838 if not this or retreat: 1839 self._retreat(index) 1840 self.error_level = error_level 1841 1842 return this 1843 1844 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1845 start = self._prev 1846 exists = self._parse_exists() if allow_exists else None 1847 1848 self._match(TokenType.ON) 1849 1850 materialized = self._match_text_seq("MATERIALIZED") 1851 kind = self._match_set(self.CREATABLES) and self._prev 1852 if not kind: 1853 return self._parse_as_command(start) 1854 1855 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1856 this = self._parse_user_defined_function(kind=kind.token_type) 1857 elif kind.token_type == TokenType.TABLE: 1858 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1859 elif kind.token_type == TokenType.COLUMN: 1860 this = self._parse_column() 1861 else: 1862 this = self._parse_id_var() 1863 1864 self._match(TokenType.IS) 1865 1866 return self.expression( 1867 exp.Comment, 1868 this=this, 1869 kind=kind.text, 1870 expression=self._parse_string(), 1871 exists=exists, 1872 materialized=materialized, 1873 ) 1874 1875 def _parse_to_table( 1876 self, 1877 ) -> exp.ToTableProperty: 1878 table = self._parse_table_parts(schema=True) 1879 return self.expression(exp.ToTableProperty, this=table) 1880 1881 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1882 def _parse_ttl(self) -> exp.Expression: 1883 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1884 this = self._parse_bitwise() 1885 1886 if self._match_text_seq("DELETE"): 1887 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1888 if self._match_text_seq("RECOMPRESS"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1891 ) 1892 if self._match_text_seq("TO", "DISK"): 1893 return self.expression( 1894 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1895 ) 1896 if self._match_text_seq("TO", "VOLUME"): 1897 return self.expression( 1898 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1899 ) 1900 1901 return this 1902 1903 expressions = self._parse_csv(_parse_ttl_action) 1904 where = self._parse_where() 1905 group = self._parse_group() 1906 1907 aggregates = None 1908 if group and self._match(TokenType.SET): 1909 aggregates = self._parse_csv(self._parse_set_item) 1910 1911 return self.expression( 1912 exp.MergeTreeTTL, 1913 expressions=expressions, 1914 where=where, 1915 group=group, 1916 aggregates=aggregates, 1917 ) 1918 1919 def _parse_statement(self) -> t.Optional[exp.Expression]: 1920 if self._curr is None: 1921 return None 1922 1923 if self._match_set(self.STATEMENT_PARSERS): 1924 comments = self._prev_comments 1925 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1926 stmt.add_comments(comments, prepend=True) 1927 return stmt 1928 1929 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1930 return self._parse_command() 1931 1932 expression = self._parse_expression() 1933 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1934 return self._parse_query_modifiers(expression) 1935 1936 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1937 start = self._prev 1938 temporary = self._match(TokenType.TEMPORARY) 1939 materialized = self._match_text_seq("MATERIALIZED") 1940 1941 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1942 if not kind: 1943 return self._parse_as_command(start) 1944 1945 concurrently = self._match_text_seq("CONCURRENTLY") 1946 if_exists = exists or self._parse_exists() 1947 1948 if kind == "COLUMN": 1949 this = self._parse_column() 1950 else: 1951 this = self._parse_table_parts( 1952 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1953 ) 1954 1955 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1956 1957 if self._match(TokenType.L_PAREN, advance=False): 1958 expressions = self._parse_wrapped_csv(self._parse_types) 1959 else: 1960 expressions = None 1961 1962 return self.expression( 1963 exp.Drop, 1964 exists=if_exists, 1965 this=this, 1966 expressions=expressions, 1967 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1968 temporary=temporary, 1969 materialized=materialized, 1970 cascade=self._match_text_seq("CASCADE"), 1971 constraints=self._match_text_seq("CONSTRAINTS"), 1972 purge=self._match_text_seq("PURGE"), 1973 cluster=cluster, 1974 concurrently=concurrently, 1975 ) 1976 1977 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1978 return ( 1979 self._match_text_seq("IF") 1980 and (not not_ or self._match(TokenType.NOT)) 1981 and self._match(TokenType.EXISTS) 1982 ) 1983 1984 def _parse_create(self) -> exp.Create | exp.Command: 1985 # Note: this can't be None because we've matched a statement parser 1986 start = self._prev 1987 1988 replace = ( 1989 start.token_type == TokenType.REPLACE 1990 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1991 or self._match_pair(TokenType.OR, TokenType.ALTER) 1992 ) 1993 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1994 1995 unique = self._match(TokenType.UNIQUE) 1996 1997 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1998 clustered = True 1999 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2000 "COLUMNSTORE" 2001 ): 2002 clustered = False 2003 else: 2004 clustered = None 2005 2006 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2007 self._advance() 2008 2009 properties = None 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not create_token: 2013 # exp.Properties.Location.POST_CREATE 2014 properties = self._parse_properties() 2015 create_token = self._match_set(self.CREATABLES) and self._prev 2016 2017 if not properties or not create_token: 2018 return self._parse_as_command(start) 2019 2020 concurrently = self._match_text_seq("CONCURRENTLY") 2021 exists = self._parse_exists(not_=True) 2022 this = None 2023 expression: t.Optional[exp.Expression] = None 2024 indexes = None 2025 no_schema_binding = None 2026 begin = None 2027 end = None 2028 clone = None 2029 2030 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2031 nonlocal properties 2032 if properties and temp_props: 2033 properties.expressions.extend(temp_props.expressions) 2034 elif temp_props: 2035 properties = temp_props 2036 2037 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2038 this = self._parse_user_defined_function(kind=create_token.token_type) 2039 2040 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2041 extend_props(self._parse_properties()) 2042 2043 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2044 extend_props(self._parse_properties()) 2045 2046 if not expression: 2047 if self._match(TokenType.COMMAND): 2048 expression = self._parse_as_command(self._prev) 2049 else: 2050 begin = self._match(TokenType.BEGIN) 2051 return_ = self._match_text_seq("RETURN") 2052 2053 if self._match(TokenType.STRING, advance=False): 2054 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2055 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2056 expression = self._parse_string() 2057 extend_props(self._parse_properties()) 2058 else: 2059 expression = self._parse_user_defined_function_expression() 2060 2061 end = self._match_text_seq("END") 2062 2063 if return_: 2064 expression = self.expression(exp.Return, this=expression) 2065 elif create_token.token_type == TokenType.INDEX: 2066 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2067 if not self._match(TokenType.ON): 2068 index = self._parse_id_var() 2069 anonymous = False 2070 else: 2071 index = None 2072 anonymous = True 2073 2074 this = self._parse_index(index=index, anonymous=anonymous) 2075 elif create_token.token_type in self.DB_CREATABLES: 2076 table_parts = self._parse_table_parts( 2077 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2078 ) 2079 2080 # exp.Properties.Location.POST_NAME 2081 self._match(TokenType.COMMA) 2082 extend_props(self._parse_properties(before=True)) 2083 2084 this = self._parse_schema(this=table_parts) 2085 2086 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2087 extend_props(self._parse_properties()) 2088 2089 has_alias = self._match(TokenType.ALIAS) 2090 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2091 # exp.Properties.Location.POST_ALIAS 2092 extend_props(self._parse_properties()) 2093 2094 if create_token.token_type == TokenType.SEQUENCE: 2095 expression = self._parse_types() 2096 props = self._parse_properties() 2097 if props: 2098 sequence_props = exp.SequenceProperties() 2099 options = [] 2100 for prop in props: 2101 if isinstance(prop, exp.SequenceProperties): 2102 for arg, value in prop.args.items(): 2103 if arg == "options": 2104 options.extend(value) 2105 else: 2106 sequence_props.set(arg, value) 2107 prop.pop() 2108 2109 if options: 2110 sequence_props.set("options", options) 2111 2112 props.append("expressions", sequence_props) 2113 extend_props(props) 2114 else: 2115 expression = self._parse_ddl_select() 2116 2117 # Some dialects also support using a table as an alias instead of a SELECT. 2118 # Here we fallback to this as an alternative. 2119 if not expression and has_alias: 2120 expression = self._try_parse(self._parse_table_parts) 2121 2122 if create_token.token_type == TokenType.TABLE: 2123 # exp.Properties.Location.POST_EXPRESSION 2124 extend_props(self._parse_properties()) 2125 2126 indexes = [] 2127 while True: 2128 index = self._parse_index() 2129 2130 # exp.Properties.Location.POST_INDEX 2131 extend_props(self._parse_properties()) 2132 if not index: 2133 break 2134 else: 2135 self._match(TokenType.COMMA) 2136 indexes.append(index) 2137 elif create_token.token_type == TokenType.VIEW: 2138 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2139 no_schema_binding = True 2140 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2141 extend_props(self._parse_properties()) 2142 2143 shallow = self._match_text_seq("SHALLOW") 2144 2145 if self._match_texts(self.CLONE_KEYWORDS): 2146 copy = self._prev.text.lower() == "copy" 2147 clone = self.expression( 2148 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2149 ) 2150 2151 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2152 return self._parse_as_command(start) 2153 2154 create_kind_text = create_token.text.upper() 2155 return self.expression( 2156 exp.Create, 2157 this=this, 2158 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2159 replace=replace, 2160 refresh=refresh, 2161 unique=unique, 2162 expression=expression, 2163 exists=exists, 2164 properties=properties, 2165 indexes=indexes, 2166 no_schema_binding=no_schema_binding, 2167 begin=begin, 2168 end=end, 2169 clone=clone, 2170 concurrently=concurrently, 2171 clustered=clustered, 2172 ) 2173 2174 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2175 seq = exp.SequenceProperties() 2176 2177 options = [] 2178 index = self._index 2179 2180 while self._curr: 2181 self._match(TokenType.COMMA) 2182 if self._match_text_seq("INCREMENT"): 2183 self._match_text_seq("BY") 2184 self._match_text_seq("=") 2185 seq.set("increment", self._parse_term()) 2186 elif self._match_text_seq("MINVALUE"): 2187 seq.set("minvalue", self._parse_term()) 2188 elif self._match_text_seq("MAXVALUE"): 2189 seq.set("maxvalue", self._parse_term()) 2190 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2191 self._match_text_seq("=") 2192 seq.set("start", self._parse_term()) 2193 elif self._match_text_seq("CACHE"): 2194 # T-SQL allows empty CACHE which is initialized dynamically 2195 seq.set("cache", self._parse_number() or True) 2196 elif self._match_text_seq("OWNED", "BY"): 2197 # "OWNED BY NONE" is the default 2198 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2199 else: 2200 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2201 if opt: 2202 options.append(opt) 2203 else: 2204 break 2205 2206 seq.set("options", options if options else None) 2207 return None if self._index == index else seq 2208 2209 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2210 # only used for teradata currently 2211 self._match(TokenType.COMMA) 2212 2213 kwargs = { 2214 "no": self._match_text_seq("NO"), 2215 "dual": self._match_text_seq("DUAL"), 2216 "before": self._match_text_seq("BEFORE"), 2217 "default": self._match_text_seq("DEFAULT"), 2218 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2219 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2220 "after": self._match_text_seq("AFTER"), 2221 "minimum": self._match_texts(("MIN", "MINIMUM")), 2222 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2223 } 2224 2225 if self._match_texts(self.PROPERTY_PARSERS): 2226 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2227 try: 2228 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2229 except TypeError: 2230 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2231 2232 return None 2233 2234 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2235 return self._parse_wrapped_csv(self._parse_property) 2236 2237 def _parse_property(self) -> t.Optional[exp.Expression]: 2238 if self._match_texts(self.PROPERTY_PARSERS): 2239 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2240 2241 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2242 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2243 2244 if self._match_text_seq("COMPOUND", "SORTKEY"): 2245 return self._parse_sortkey(compound=True) 2246 2247 if self._match_text_seq("SQL", "SECURITY"): 2248 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2249 2250 index = self._index 2251 2252 seq_props = self._parse_sequence_properties() 2253 if seq_props: 2254 return seq_props 2255 2256 self._retreat(index) 2257 key = self._parse_column() 2258 2259 if not self._match(TokenType.EQ): 2260 self._retreat(index) 2261 return None 2262 2263 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2264 if isinstance(key, exp.Column): 2265 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2266 2267 value = self._parse_bitwise() or self._parse_var(any_token=True) 2268 2269 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2270 if isinstance(value, exp.Column): 2271 value = exp.var(value.name) 2272 2273 return self.expression(exp.Property, this=key, value=value) 2274 2275 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2276 if self._match_text_seq("BY"): 2277 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2278 2279 self._match(TokenType.ALIAS) 2280 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2281 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2282 2283 return self.expression( 2284 exp.FileFormatProperty, 2285 this=( 2286 self.expression( 2287 exp.InputOutputFormat, 2288 input_format=input_format, 2289 output_format=output_format, 2290 ) 2291 if input_format or output_format 2292 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2293 ), 2294 hive_format=True, 2295 ) 2296 2297 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2298 field = self._parse_field() 2299 if isinstance(field, exp.Identifier) and not field.quoted: 2300 field = exp.var(field) 2301 2302 return field 2303 2304 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2305 self._match(TokenType.EQ) 2306 self._match(TokenType.ALIAS) 2307 2308 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2309 2310 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2311 properties = [] 2312 while True: 2313 if before: 2314 prop = self._parse_property_before() 2315 else: 2316 prop = self._parse_property() 2317 if not prop: 2318 break 2319 for p in ensure_list(prop): 2320 properties.append(p) 2321 2322 if properties: 2323 return self.expression(exp.Properties, expressions=properties) 2324 2325 return None 2326 2327 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2328 return self.expression( 2329 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2330 ) 2331 2332 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2333 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2334 security_specifier = self._prev.text.upper() 2335 return self.expression(exp.SecurityProperty, this=security_specifier) 2336 return None 2337 2338 def _parse_settings_property(self) -> exp.SettingsProperty: 2339 return self.expression( 2340 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2341 ) 2342 2343 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2344 if self._index >= 2: 2345 pre_volatile_token = self._tokens[self._index - 2] 2346 else: 2347 pre_volatile_token = None 2348 2349 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2350 return exp.VolatileProperty() 2351 2352 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2353 2354 def _parse_retention_period(self) -> exp.Var: 2355 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2356 number = self._parse_number() 2357 number_str = f"{number} " if number else "" 2358 unit = self._parse_var(any_token=True) 2359 return exp.var(f"{number_str}{unit}") 2360 2361 def _parse_system_versioning_property( 2362 self, with_: bool = False 2363 ) -> exp.WithSystemVersioningProperty: 2364 self._match(TokenType.EQ) 2365 prop = self.expression( 2366 exp.WithSystemVersioningProperty, 2367 **{ # type: ignore 2368 "on": True, 2369 "with": with_, 2370 }, 2371 ) 2372 2373 if self._match_text_seq("OFF"): 2374 prop.set("on", False) 2375 return prop 2376 2377 self._match(TokenType.ON) 2378 if self._match(TokenType.L_PAREN): 2379 while self._curr and not self._match(TokenType.R_PAREN): 2380 if self._match_text_seq("HISTORY_TABLE", "="): 2381 prop.set("this", self._parse_table_parts()) 2382 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2383 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2384 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2385 prop.set("retention_period", self._parse_retention_period()) 2386 2387 self._match(TokenType.COMMA) 2388 2389 return prop 2390 2391 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2392 self._match(TokenType.EQ) 2393 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2394 prop = self.expression(exp.DataDeletionProperty, on=on) 2395 2396 if self._match(TokenType.L_PAREN): 2397 while self._curr and not self._match(TokenType.R_PAREN): 2398 if self._match_text_seq("FILTER_COLUMN", "="): 2399 prop.set("filter_column", self._parse_column()) 2400 elif self._match_text_seq("RETENTION_PERIOD", "="): 2401 prop.set("retention_period", self._parse_retention_period()) 2402 2403 self._match(TokenType.COMMA) 2404 2405 return prop 2406 2407 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2408 kind = "HASH" 2409 expressions: t.Optional[t.List[exp.Expression]] = None 2410 if self._match_text_seq("BY", "HASH"): 2411 expressions = self._parse_wrapped_csv(self._parse_id_var) 2412 elif self._match_text_seq("BY", "RANDOM"): 2413 kind = "RANDOM" 2414 2415 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2416 buckets: t.Optional[exp.Expression] = None 2417 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2418 buckets = self._parse_number() 2419 2420 return self.expression( 2421 exp.DistributedByProperty, 2422 expressions=expressions, 2423 kind=kind, 2424 buckets=buckets, 2425 order=self._parse_order(), 2426 ) 2427 2428 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2429 self._match_text_seq("KEY") 2430 expressions = self._parse_wrapped_id_vars() 2431 return self.expression(expr_type, expressions=expressions) 2432 2433 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2434 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2435 prop = self._parse_system_versioning_property(with_=True) 2436 self._match_r_paren() 2437 return prop 2438 2439 if self._match(TokenType.L_PAREN, advance=False): 2440 return self._parse_wrapped_properties() 2441 2442 if self._match_text_seq("JOURNAL"): 2443 return self._parse_withjournaltable() 2444 2445 if self._match_texts(self.VIEW_ATTRIBUTES): 2446 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2447 2448 if self._match_text_seq("DATA"): 2449 return self._parse_withdata(no=False) 2450 elif self._match_text_seq("NO", "DATA"): 2451 return self._parse_withdata(no=True) 2452 2453 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2454 return self._parse_serde_properties(with_=True) 2455 2456 if self._match(TokenType.SCHEMA): 2457 return self.expression( 2458 exp.WithSchemaBindingProperty, 2459 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2460 ) 2461 2462 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2463 return self.expression( 2464 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2465 ) 2466 2467 if not self._next: 2468 return None 2469 2470 return self._parse_withisolatedloading() 2471 2472 def _parse_procedure_option(self) -> exp.Expression | None: 2473 if self._match_text_seq("EXECUTE", "AS"): 2474 return self.expression( 2475 exp.ExecuteAsProperty, 2476 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2477 or self._parse_string(), 2478 ) 2479 2480 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2481 2482 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2483 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2484 self._match(TokenType.EQ) 2485 2486 user = self._parse_id_var() 2487 self._match(TokenType.PARAMETER) 2488 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2489 2490 if not user or not host: 2491 return None 2492 2493 return exp.DefinerProperty(this=f"{user}@{host}") 2494 2495 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2496 self._match(TokenType.TABLE) 2497 self._match(TokenType.EQ) 2498 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2499 2500 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2501 return self.expression(exp.LogProperty, no=no) 2502 2503 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2504 return self.expression(exp.JournalProperty, **kwargs) 2505 2506 def _parse_checksum(self) -> exp.ChecksumProperty: 2507 self._match(TokenType.EQ) 2508 2509 on = None 2510 if self._match(TokenType.ON): 2511 on = True 2512 elif self._match_text_seq("OFF"): 2513 on = False 2514 2515 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2516 2517 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2518 return self.expression( 2519 exp.Cluster, 2520 expressions=( 2521 self._parse_wrapped_csv(self._parse_ordered) 2522 if wrapped 2523 else self._parse_csv(self._parse_ordered) 2524 ), 2525 ) 2526 2527 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2528 self._match_text_seq("BY") 2529 2530 self._match_l_paren() 2531 expressions = self._parse_csv(self._parse_column) 2532 self._match_r_paren() 2533 2534 if self._match_text_seq("SORTED", "BY"): 2535 self._match_l_paren() 2536 sorted_by = self._parse_csv(self._parse_ordered) 2537 self._match_r_paren() 2538 else: 2539 sorted_by = None 2540 2541 self._match(TokenType.INTO) 2542 buckets = self._parse_number() 2543 self._match_text_seq("BUCKETS") 2544 2545 return self.expression( 2546 exp.ClusteredByProperty, 2547 expressions=expressions, 2548 sorted_by=sorted_by, 2549 buckets=buckets, 2550 ) 2551 2552 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2553 if not self._match_text_seq("GRANTS"): 2554 self._retreat(self._index - 1) 2555 return None 2556 2557 return self.expression(exp.CopyGrantsProperty) 2558 2559 def _parse_freespace(self) -> exp.FreespaceProperty: 2560 self._match(TokenType.EQ) 2561 return self.expression( 2562 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2563 ) 2564 2565 def _parse_mergeblockratio( 2566 self, no: bool = False, default: bool = False 2567 ) -> exp.MergeBlockRatioProperty: 2568 if self._match(TokenType.EQ): 2569 return self.expression( 2570 exp.MergeBlockRatioProperty, 2571 this=self._parse_number(), 2572 percent=self._match(TokenType.PERCENT), 2573 ) 2574 2575 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2576 2577 def _parse_datablocksize( 2578 self, 2579 default: t.Optional[bool] = None, 2580 minimum: t.Optional[bool] = None, 2581 maximum: t.Optional[bool] = None, 2582 ) -> exp.DataBlocksizeProperty: 2583 self._match(TokenType.EQ) 2584 size = self._parse_number() 2585 2586 units = None 2587 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2588 units = self._prev.text 2589 2590 return self.expression( 2591 exp.DataBlocksizeProperty, 2592 size=size, 2593 units=units, 2594 default=default, 2595 minimum=minimum, 2596 maximum=maximum, 2597 ) 2598 2599 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2600 self._match(TokenType.EQ) 2601 always = self._match_text_seq("ALWAYS") 2602 manual = self._match_text_seq("MANUAL") 2603 never = self._match_text_seq("NEVER") 2604 default = self._match_text_seq("DEFAULT") 2605 2606 autotemp = None 2607 if self._match_text_seq("AUTOTEMP"): 2608 autotemp = self._parse_schema() 2609 2610 return self.expression( 2611 exp.BlockCompressionProperty, 2612 always=always, 2613 manual=manual, 2614 never=never, 2615 default=default, 2616 autotemp=autotemp, 2617 ) 2618 2619 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2620 index = self._index 2621 no = self._match_text_seq("NO") 2622 concurrent = self._match_text_seq("CONCURRENT") 2623 2624 if not self._match_text_seq("ISOLATED", "LOADING"): 2625 self._retreat(index) 2626 return None 2627 2628 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2629 return self.expression( 2630 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2631 ) 2632 2633 def _parse_locking(self) -> exp.LockingProperty: 2634 if self._match(TokenType.TABLE): 2635 kind = "TABLE" 2636 elif self._match(TokenType.VIEW): 2637 kind = "VIEW" 2638 elif self._match(TokenType.ROW): 2639 kind = "ROW" 2640 elif self._match_text_seq("DATABASE"): 2641 kind = "DATABASE" 2642 else: 2643 kind = None 2644 2645 if kind in ("DATABASE", "TABLE", "VIEW"): 2646 this = self._parse_table_parts() 2647 else: 2648 this = None 2649 2650 if self._match(TokenType.FOR): 2651 for_or_in = "FOR" 2652 elif self._match(TokenType.IN): 2653 for_or_in = "IN" 2654 else: 2655 for_or_in = None 2656 2657 if self._match_text_seq("ACCESS"): 2658 lock_type = "ACCESS" 2659 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2660 lock_type = "EXCLUSIVE" 2661 elif self._match_text_seq("SHARE"): 2662 lock_type = "SHARE" 2663 elif self._match_text_seq("READ"): 2664 lock_type = "READ" 2665 elif self._match_text_seq("WRITE"): 2666 lock_type = "WRITE" 2667 elif self._match_text_seq("CHECKSUM"): 2668 lock_type = "CHECKSUM" 2669 else: 2670 lock_type = None 2671 2672 override = self._match_text_seq("OVERRIDE") 2673 2674 return self.expression( 2675 exp.LockingProperty, 2676 this=this, 2677 kind=kind, 2678 for_or_in=for_or_in, 2679 lock_type=lock_type, 2680 override=override, 2681 ) 2682 2683 def _parse_partition_by(self) -> t.List[exp.Expression]: 2684 if self._match(TokenType.PARTITION_BY): 2685 return self._parse_csv(self._parse_assignment) 2686 return [] 2687 2688 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2689 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2690 if self._match_text_seq("MINVALUE"): 2691 return exp.var("MINVALUE") 2692 if self._match_text_seq("MAXVALUE"): 2693 return exp.var("MAXVALUE") 2694 return self._parse_bitwise() 2695 2696 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2697 expression = None 2698 from_expressions = None 2699 to_expressions = None 2700 2701 if self._match(TokenType.IN): 2702 this = self._parse_wrapped_csv(self._parse_bitwise) 2703 elif self._match(TokenType.FROM): 2704 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2705 self._match_text_seq("TO") 2706 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 elif self._match_text_seq("WITH", "(", "MODULUS"): 2708 this = self._parse_number() 2709 self._match_text_seq(",", "REMAINDER") 2710 expression = self._parse_number() 2711 self._match_r_paren() 2712 else: 2713 self.raise_error("Failed to parse partition bound spec.") 2714 2715 return self.expression( 2716 exp.PartitionBoundSpec, 2717 this=this, 2718 expression=expression, 2719 from_expressions=from_expressions, 2720 to_expressions=to_expressions, 2721 ) 2722 2723 # https://www.postgresql.org/docs/current/sql-createtable.html 2724 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2725 if not self._match_text_seq("OF"): 2726 self._retreat(self._index - 1) 2727 return None 2728 2729 this = self._parse_table(schema=True) 2730 2731 if self._match(TokenType.DEFAULT): 2732 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2733 elif self._match_text_seq("FOR", "VALUES"): 2734 expression = self._parse_partition_bound_spec() 2735 else: 2736 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2737 2738 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2739 2740 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2741 self._match(TokenType.EQ) 2742 return self.expression( 2743 exp.PartitionedByProperty, 2744 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2745 ) 2746 2747 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2748 if self._match_text_seq("AND", "STATISTICS"): 2749 statistics = True 2750 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2751 statistics = False 2752 else: 2753 statistics = None 2754 2755 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2756 2757 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL"): 2759 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2760 return None 2761 2762 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2763 if self._match_text_seq("SQL", "DATA"): 2764 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2765 return None 2766 2767 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2768 if self._match_text_seq("PRIMARY", "INDEX"): 2769 return exp.NoPrimaryIndexProperty() 2770 if self._match_text_seq("SQL"): 2771 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2772 return None 2773 2774 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2775 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2776 return exp.OnCommitProperty() 2777 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2778 return exp.OnCommitProperty(delete=True) 2779 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2780 2781 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2782 if self._match_text_seq("SQL", "DATA"): 2783 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2784 return None 2785 2786 def _parse_distkey(self) -> exp.DistKeyProperty: 2787 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2788 2789 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2790 table = self._parse_table(schema=True) 2791 2792 options = [] 2793 while self._match_texts(("INCLUDING", "EXCLUDING")): 2794 this = self._prev.text.upper() 2795 2796 id_var = self._parse_id_var() 2797 if not id_var: 2798 return None 2799 2800 options.append( 2801 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2802 ) 2803 2804 return self.expression(exp.LikeProperty, this=table, expressions=options) 2805 2806 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2807 return self.expression( 2808 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2809 ) 2810 2811 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2812 self._match(TokenType.EQ) 2813 return self.expression( 2814 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2815 ) 2816 2817 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2818 self._match_text_seq("WITH", "CONNECTION") 2819 return self.expression( 2820 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2821 ) 2822 2823 def _parse_returns(self) -> exp.ReturnsProperty: 2824 value: t.Optional[exp.Expression] 2825 null = None 2826 is_table = self._match(TokenType.TABLE) 2827 2828 if is_table: 2829 if self._match(TokenType.LT): 2830 value = self.expression( 2831 exp.Schema, 2832 this="TABLE", 2833 expressions=self._parse_csv(self._parse_struct_types), 2834 ) 2835 if not self._match(TokenType.GT): 2836 self.raise_error("Expecting >") 2837 else: 2838 value = self._parse_schema(exp.var("TABLE")) 2839 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2840 null = True 2841 value = None 2842 else: 2843 value = self._parse_types() 2844 2845 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2846 2847 def _parse_describe(self) -> exp.Describe: 2848 kind = self._match_set(self.CREATABLES) and self._prev.text 2849 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2850 if self._match(TokenType.DOT): 2851 style = None 2852 self._retreat(self._index - 2) 2853 2854 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2855 2856 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2857 this = self._parse_statement() 2858 else: 2859 this = self._parse_table(schema=True) 2860 2861 properties = self._parse_properties() 2862 expressions = properties.expressions if properties else None 2863 partition = self._parse_partition() 2864 return self.expression( 2865 exp.Describe, 2866 this=this, 2867 style=style, 2868 kind=kind, 2869 expressions=expressions, 2870 partition=partition, 2871 format=format, 2872 ) 2873 2874 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2875 kind = self._prev.text.upper() 2876 expressions = [] 2877 2878 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2879 if self._match(TokenType.WHEN): 2880 expression = self._parse_disjunction() 2881 self._match(TokenType.THEN) 2882 else: 2883 expression = None 2884 2885 else_ = self._match(TokenType.ELSE) 2886 2887 if not self._match(TokenType.INTO): 2888 return None 2889 2890 return self.expression( 2891 exp.ConditionalInsert, 2892 this=self.expression( 2893 exp.Insert, 2894 this=self._parse_table(schema=True), 2895 expression=self._parse_derived_table_values(), 2896 ), 2897 expression=expression, 2898 else_=else_, 2899 ) 2900 2901 expression = parse_conditional_insert() 2902 while expression is not None: 2903 expressions.append(expression) 2904 expression = parse_conditional_insert() 2905 2906 return self.expression( 2907 exp.MultitableInserts, 2908 kind=kind, 2909 comments=comments, 2910 expressions=expressions, 2911 source=self._parse_table(), 2912 ) 2913 2914 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2915 comments = [] 2916 hint = self._parse_hint() 2917 overwrite = self._match(TokenType.OVERWRITE) 2918 ignore = self._match(TokenType.IGNORE) 2919 local = self._match_text_seq("LOCAL") 2920 alternative = None 2921 is_function = None 2922 2923 if self._match_text_seq("DIRECTORY"): 2924 this: t.Optional[exp.Expression] = self.expression( 2925 exp.Directory, 2926 this=self._parse_var_or_string(), 2927 local=local, 2928 row_format=self._parse_row_format(match_row=True), 2929 ) 2930 else: 2931 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2932 comments += ensure_list(self._prev_comments) 2933 return self._parse_multitable_inserts(comments) 2934 2935 if self._match(TokenType.OR): 2936 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2937 2938 self._match(TokenType.INTO) 2939 comments += ensure_list(self._prev_comments) 2940 self._match(TokenType.TABLE) 2941 is_function = self._match(TokenType.FUNCTION) 2942 2943 this = ( 2944 self._parse_table(schema=True, parse_partition=True) 2945 if not is_function 2946 else self._parse_function() 2947 ) 2948 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2949 this.set("alias", self._parse_table_alias()) 2950 2951 returning = self._parse_returning() 2952 2953 return self.expression( 2954 exp.Insert, 2955 comments=comments, 2956 hint=hint, 2957 is_function=is_function, 2958 this=this, 2959 stored=self._match_text_seq("STORED") and self._parse_stored(), 2960 by_name=self._match_text_seq("BY", "NAME"), 2961 exists=self._parse_exists(), 2962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2963 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2964 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2965 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2966 conflict=self._parse_on_conflict(), 2967 returning=returning or self._parse_returning(), 2968 overwrite=overwrite, 2969 alternative=alternative, 2970 ignore=ignore, 2971 source=self._match(TokenType.TABLE) and self._parse_table(), 2972 ) 2973 2974 def _parse_kill(self) -> exp.Kill: 2975 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2976 2977 return self.expression( 2978 exp.Kill, 2979 this=self._parse_primary(), 2980 kind=kind, 2981 ) 2982 2983 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2984 conflict = self._match_text_seq("ON", "CONFLICT") 2985 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2986 2987 if not conflict and not duplicate: 2988 return None 2989 2990 conflict_keys = None 2991 constraint = None 2992 2993 if conflict: 2994 if self._match_text_seq("ON", "CONSTRAINT"): 2995 constraint = self._parse_id_var() 2996 elif self._match(TokenType.L_PAREN): 2997 conflict_keys = self._parse_csv(self._parse_id_var) 2998 self._match_r_paren() 2999 3000 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3001 if self._prev.token_type == TokenType.UPDATE: 3002 self._match(TokenType.SET) 3003 expressions = self._parse_csv(self._parse_equality) 3004 else: 3005 expressions = None 3006 3007 return self.expression( 3008 exp.OnConflict, 3009 duplicate=duplicate, 3010 expressions=expressions, 3011 action=action, 3012 conflict_keys=conflict_keys, 3013 constraint=constraint, 3014 where=self._parse_where(), 3015 ) 3016 3017 def _parse_returning(self) -> t.Optional[exp.Returning]: 3018 if not self._match(TokenType.RETURNING): 3019 return None 3020 return self.expression( 3021 exp.Returning, 3022 expressions=self._parse_csv(self._parse_expression), 3023 into=self._match(TokenType.INTO) and self._parse_table_part(), 3024 ) 3025 3026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3027 if not self._match(TokenType.FORMAT): 3028 return None 3029 return self._parse_row_format() 3030 3031 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3032 index = self._index 3033 with_ = with_ or self._match_text_seq("WITH") 3034 3035 if not self._match(TokenType.SERDE_PROPERTIES): 3036 self._retreat(index) 3037 return None 3038 return self.expression( 3039 exp.SerdeProperties, 3040 **{ # type: ignore 3041 "expressions": self._parse_wrapped_properties(), 3042 "with": with_, 3043 }, 3044 ) 3045 3046 def _parse_row_format( 3047 self, match_row: bool = False 3048 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3049 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3050 return None 3051 3052 if self._match_text_seq("SERDE"): 3053 this = self._parse_string() 3054 3055 serde_properties = self._parse_serde_properties() 3056 3057 return self.expression( 3058 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3059 ) 3060 3061 self._match_text_seq("DELIMITED") 3062 3063 kwargs = {} 3064 3065 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3066 kwargs["fields"] = self._parse_string() 3067 if self._match_text_seq("ESCAPED", "BY"): 3068 kwargs["escaped"] = self._parse_string() 3069 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3070 kwargs["collection_items"] = self._parse_string() 3071 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3072 kwargs["map_keys"] = self._parse_string() 3073 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3074 kwargs["lines"] = self._parse_string() 3075 if self._match_text_seq("NULL", "DEFINED", "AS"): 3076 kwargs["null"] = self._parse_string() 3077 3078 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3079 3080 def _parse_load(self) -> exp.LoadData | exp.Command: 3081 if self._match_text_seq("DATA"): 3082 local = self._match_text_seq("LOCAL") 3083 self._match_text_seq("INPATH") 3084 inpath = self._parse_string() 3085 overwrite = self._match(TokenType.OVERWRITE) 3086 self._match_pair(TokenType.INTO, TokenType.TABLE) 3087 3088 return self.expression( 3089 exp.LoadData, 3090 this=self._parse_table(schema=True), 3091 local=local, 3092 overwrite=overwrite, 3093 inpath=inpath, 3094 partition=self._parse_partition(), 3095 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3096 serde=self._match_text_seq("SERDE") and self._parse_string(), 3097 ) 3098 return self._parse_as_command(self._prev) 3099 3100 def _parse_delete(self) -> exp.Delete: 3101 # This handles MySQL's "Multiple-Table Syntax" 3102 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3103 tables = None 3104 if not self._match(TokenType.FROM, advance=False): 3105 tables = self._parse_csv(self._parse_table) or None 3106 3107 returning = self._parse_returning() 3108 3109 return self.expression( 3110 exp.Delete, 3111 tables=tables, 3112 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3113 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3114 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3115 where=self._parse_where(), 3116 returning=returning or self._parse_returning(), 3117 limit=self._parse_limit(), 3118 ) 3119 3120 def _parse_update(self) -> exp.Update: 3121 kwargs: t.Dict[str, t.Any] = { 3122 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3123 } 3124 while self._curr: 3125 if self._match(TokenType.SET): 3126 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3127 elif self._match(TokenType.RETURNING, advance=False): 3128 kwargs["returning"] = self._parse_returning() 3129 elif self._match(TokenType.FROM, advance=False): 3130 kwargs["from"] = self._parse_from(joins=True) 3131 elif self._match(TokenType.WHERE, advance=False): 3132 kwargs["where"] = self._parse_where() 3133 elif self._match(TokenType.ORDER_BY, advance=False): 3134 kwargs["order"] = self._parse_order() 3135 elif self._match(TokenType.LIMIT, advance=False): 3136 kwargs["limit"] = self._parse_limit() 3137 else: 3138 break 3139 3140 return self.expression(exp.Update, **kwargs) 3141 3142 def _parse_use(self) -> exp.Use: 3143 return self.expression( 3144 exp.Use, 3145 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3146 this=self._parse_table(schema=False), 3147 ) 3148 3149 def _parse_uncache(self) -> exp.Uncache: 3150 if not self._match(TokenType.TABLE): 3151 self.raise_error("Expecting TABLE after UNCACHE") 3152 3153 return self.expression( 3154 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3155 ) 3156 3157 def _parse_cache(self) -> exp.Cache: 3158 lazy = self._match_text_seq("LAZY") 3159 self._match(TokenType.TABLE) 3160 table = self._parse_table(schema=True) 3161 3162 options = [] 3163 if self._match_text_seq("OPTIONS"): 3164 self._match_l_paren() 3165 k = self._parse_string() 3166 self._match(TokenType.EQ) 3167 v = self._parse_string() 3168 options = [k, v] 3169 self._match_r_paren() 3170 3171 self._match(TokenType.ALIAS) 3172 return self.expression( 3173 exp.Cache, 3174 this=table, 3175 lazy=lazy, 3176 options=options, 3177 expression=self._parse_select(nested=True), 3178 ) 3179 3180 def _parse_partition(self) -> t.Optional[exp.Partition]: 3181 if not self._match_texts(self.PARTITION_KEYWORDS): 3182 return None 3183 3184 return self.expression( 3185 exp.Partition, 3186 subpartition=self._prev.text.upper() == "SUBPARTITION", 3187 expressions=self._parse_wrapped_csv(self._parse_assignment), 3188 ) 3189 3190 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3191 def _parse_value_expression() -> t.Optional[exp.Expression]: 3192 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3193 return exp.var(self._prev.text.upper()) 3194 return self._parse_expression() 3195 3196 if self._match(TokenType.L_PAREN): 3197 expressions = self._parse_csv(_parse_value_expression) 3198 self._match_r_paren() 3199 return self.expression(exp.Tuple, expressions=expressions) 3200 3201 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3202 expression = self._parse_expression() 3203 if expression: 3204 return self.expression(exp.Tuple, expressions=[expression]) 3205 return None 3206 3207 def _parse_projections(self) -> t.List[exp.Expression]: 3208 return self._parse_expressions() 3209 3210 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3211 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3212 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3213 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3214 ) 3215 elif self._match(TokenType.FROM): 3216 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3217 # Support parentheses for duckdb FROM-first syntax 3218 select = self._parse_select(from_=from_) 3219 if select: 3220 if not select.args.get("from"): 3221 select.set("from", from_) 3222 this = select 3223 else: 3224 this = exp.select("*").from_(t.cast(exp.From, from_)) 3225 else: 3226 this = ( 3227 self._parse_table(consume_pipe=True) 3228 if table 3229 else self._parse_select(nested=True, parse_set_operation=False) 3230 ) 3231 3232 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3233 # in case a modifier (e.g. join) is following 3234 if table and isinstance(this, exp.Values) and this.alias: 3235 alias = this.args["alias"].pop() 3236 this = exp.Table(this=this, alias=alias) 3237 3238 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3239 3240 return this 3241 3242 def _parse_select( 3243 self, 3244 nested: bool = False, 3245 table: bool = False, 3246 parse_subquery_alias: bool = True, 3247 parse_set_operation: bool = True, 3248 consume_pipe: bool = True, 3249 from_: t.Optional[exp.From] = None, 3250 ) -> t.Optional[exp.Expression]: 3251 query = self._parse_select_query( 3252 nested=nested, 3253 table=table, 3254 parse_subquery_alias=parse_subquery_alias, 3255 parse_set_operation=parse_set_operation, 3256 ) 3257 3258 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3259 if not query and from_: 3260 query = exp.select("*").from_(from_) 3261 if isinstance(query, exp.Query): 3262 query = self._parse_pipe_syntax_query(query) 3263 query = query.subquery(copy=False) if query and table else query 3264 3265 return query 3266 3267 def _parse_select_query( 3268 self, 3269 nested: bool = False, 3270 table: bool = False, 3271 parse_subquery_alias: bool = True, 3272 parse_set_operation: bool = True, 3273 ) -> t.Optional[exp.Expression]: 3274 cte = self._parse_with() 3275 3276 if cte: 3277 this = self._parse_statement() 3278 3279 if not this: 3280 self.raise_error("Failed to parse any statement following CTE") 3281 return cte 3282 3283 if "with" in this.arg_types: 3284 this.set("with", cte) 3285 else: 3286 self.raise_error(f"{this.key} does not support CTE") 3287 this = cte 3288 3289 return this 3290 3291 # duckdb supports leading with FROM x 3292 from_ = ( 3293 self._parse_from(consume_pipe=True) 3294 if self._match(TokenType.FROM, advance=False) 3295 else None 3296 ) 3297 3298 if self._match(TokenType.SELECT): 3299 comments = self._prev_comments 3300 3301 hint = self._parse_hint() 3302 3303 if self._next and not self._next.token_type == TokenType.DOT: 3304 all_ = self._match(TokenType.ALL) 3305 distinct = self._match_set(self.DISTINCT_TOKENS) 3306 else: 3307 all_, distinct = None, None 3308 3309 kind = ( 3310 self._match(TokenType.ALIAS) 3311 and self._match_texts(("STRUCT", "VALUE")) 3312 and self._prev.text.upper() 3313 ) 3314 3315 if distinct: 3316 distinct = self.expression( 3317 exp.Distinct, 3318 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3319 ) 3320 3321 if all_ and distinct: 3322 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3323 3324 operation_modifiers = [] 3325 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3326 operation_modifiers.append(exp.var(self._prev.text.upper())) 3327 3328 limit = self._parse_limit(top=True) 3329 projections = self._parse_projections() 3330 3331 this = self.expression( 3332 exp.Select, 3333 kind=kind, 3334 hint=hint, 3335 distinct=distinct, 3336 expressions=projections, 3337 limit=limit, 3338 operation_modifiers=operation_modifiers or None, 3339 ) 3340 this.comments = comments 3341 3342 into = self._parse_into() 3343 if into: 3344 this.set("into", into) 3345 3346 if not from_: 3347 from_ = self._parse_from() 3348 3349 if from_: 3350 this.set("from", from_) 3351 3352 this = self._parse_query_modifiers(this) 3353 elif (table or nested) and self._match(TokenType.L_PAREN): 3354 this = self._parse_wrapped_select(table=table) 3355 3356 # We return early here so that the UNION isn't attached to the subquery by the 3357 # following call to _parse_set_operations, but instead becomes the parent node 3358 self._match_r_paren() 3359 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3360 elif self._match(TokenType.VALUES, advance=False): 3361 this = self._parse_derived_table_values() 3362 elif from_: 3363 this = exp.select("*").from_(from_.this, copy=False) 3364 elif self._match(TokenType.SUMMARIZE): 3365 table = self._match(TokenType.TABLE) 3366 this = self._parse_select() or self._parse_string() or self._parse_table() 3367 return self.expression(exp.Summarize, this=this, table=table) 3368 elif self._match(TokenType.DESCRIBE): 3369 this = self._parse_describe() 3370 elif self._match_text_seq("STREAM"): 3371 this = self._parse_function() 3372 if this: 3373 this = self.expression(exp.Stream, this=this) 3374 else: 3375 self._retreat(self._index - 1) 3376 else: 3377 this = None 3378 3379 return self._parse_set_operations(this) if parse_set_operation else this 3380 3381 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3382 self._match_text_seq("SEARCH") 3383 3384 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3385 3386 if not kind: 3387 return None 3388 3389 self._match_text_seq("FIRST", "BY") 3390 3391 return self.expression( 3392 exp.RecursiveWithSearch, 3393 kind=kind, 3394 this=self._parse_id_var(), 3395 expression=self._match_text_seq("SET") and self._parse_id_var(), 3396 using=self._match_text_seq("USING") and self._parse_id_var(), 3397 ) 3398 3399 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3400 if not skip_with_token and not self._match(TokenType.WITH): 3401 return None 3402 3403 comments = self._prev_comments 3404 recursive = self._match(TokenType.RECURSIVE) 3405 3406 last_comments = None 3407 expressions = [] 3408 while True: 3409 cte = self._parse_cte() 3410 if isinstance(cte, exp.CTE): 3411 expressions.append(cte) 3412 if last_comments: 3413 cte.add_comments(last_comments) 3414 3415 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3416 break 3417 else: 3418 self._match(TokenType.WITH) 3419 3420 last_comments = self._prev_comments 3421 3422 return self.expression( 3423 exp.With, 3424 comments=comments, 3425 expressions=expressions, 3426 recursive=recursive, 3427 search=self._parse_recursive_with_search(), 3428 ) 3429 3430 def _parse_cte(self) -> t.Optional[exp.CTE]: 3431 index = self._index 3432 3433 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3434 if not alias or not alias.this: 3435 self.raise_error("Expected CTE to have alias") 3436 3437 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3438 self._retreat(index) 3439 return None 3440 3441 comments = self._prev_comments 3442 3443 if self._match_text_seq("NOT", "MATERIALIZED"): 3444 materialized = False 3445 elif self._match_text_seq("MATERIALIZED"): 3446 materialized = True 3447 else: 3448 materialized = None 3449 3450 cte = self.expression( 3451 exp.CTE, 3452 this=self._parse_wrapped(self._parse_statement), 3453 alias=alias, 3454 materialized=materialized, 3455 comments=comments, 3456 ) 3457 3458 values = cte.this 3459 if isinstance(values, exp.Values): 3460 if values.alias: 3461 cte.set("this", exp.select("*").from_(values)) 3462 else: 3463 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3464 3465 return cte 3466 3467 def _parse_table_alias( 3468 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3469 ) -> t.Optional[exp.TableAlias]: 3470 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3471 # so this section tries to parse the clause version and if it fails, it treats the token 3472 # as an identifier (alias) 3473 if self._can_parse_limit_or_offset(): 3474 return None 3475 3476 any_token = self._match(TokenType.ALIAS) 3477 alias = ( 3478 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3479 or self._parse_string_as_identifier() 3480 ) 3481 3482 index = self._index 3483 if self._match(TokenType.L_PAREN): 3484 columns = self._parse_csv(self._parse_function_parameter) 3485 self._match_r_paren() if columns else self._retreat(index) 3486 else: 3487 columns = None 3488 3489 if not alias and not columns: 3490 return None 3491 3492 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3493 3494 # We bubble up comments from the Identifier to the TableAlias 3495 if isinstance(alias, exp.Identifier): 3496 table_alias.add_comments(alias.pop_comments()) 3497 3498 return table_alias 3499 3500 def _parse_subquery( 3501 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3502 ) -> t.Optional[exp.Subquery]: 3503 if not this: 3504 return None 3505 3506 return self.expression( 3507 exp.Subquery, 3508 this=this, 3509 pivots=self._parse_pivots(), 3510 alias=self._parse_table_alias() if parse_alias else None, 3511 sample=self._parse_table_sample(), 3512 ) 3513 3514 def _implicit_unnests_to_explicit(self, this: E) -> E: 3515 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3516 3517 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3518 for i, join in enumerate(this.args.get("joins") or []): 3519 table = join.this 3520 normalized_table = table.copy() 3521 normalized_table.meta["maybe_column"] = True 3522 normalized_table = _norm(normalized_table, dialect=self.dialect) 3523 3524 if isinstance(table, exp.Table) and not join.args.get("on"): 3525 if normalized_table.parts[0].name in refs: 3526 table_as_column = table.to_column() 3527 unnest = exp.Unnest(expressions=[table_as_column]) 3528 3529 # Table.to_column creates a parent Alias node that we want to convert to 3530 # a TableAlias and attach to the Unnest, so it matches the parser's output 3531 if isinstance(table.args.get("alias"), exp.TableAlias): 3532 table_as_column.replace(table_as_column.this) 3533 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3534 3535 table.replace(unnest) 3536 3537 refs.add(normalized_table.alias_or_name) 3538 3539 return this 3540 3541 @t.overload 3542 def _parse_query_modifiers(self, this: E) -> E: ... 3543 3544 @t.overload 3545 def _parse_query_modifiers(self, this: None) -> None: ... 3546 3547 def _parse_query_modifiers(self, this): 3548 if isinstance(this, self.MODIFIABLES): 3549 for join in self._parse_joins(): 3550 this.append("joins", join) 3551 for lateral in iter(self._parse_lateral, None): 3552 this.append("laterals", lateral) 3553 3554 while True: 3555 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3556 modifier_token = self._curr 3557 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3558 key, expression = parser(self) 3559 3560 if expression: 3561 if this.args.get(key): 3562 self.raise_error( 3563 f"Found multiple '{modifier_token.text.upper()}' clauses", 3564 token=modifier_token, 3565 ) 3566 3567 this.set(key, expression) 3568 if key == "limit": 3569 offset = expression.args.pop("offset", None) 3570 3571 if offset: 3572 offset = exp.Offset(expression=offset) 3573 this.set("offset", offset) 3574 3575 limit_by_expressions = expression.expressions 3576 expression.set("expressions", None) 3577 offset.set("expressions", limit_by_expressions) 3578 continue 3579 break 3580 3581 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3582 this = self._implicit_unnests_to_explicit(this) 3583 3584 return this 3585 3586 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3587 start = self._curr 3588 while self._curr: 3589 self._advance() 3590 3591 end = self._tokens[self._index - 1] 3592 return exp.Hint(expressions=[self._find_sql(start, end)]) 3593 3594 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3595 return self._parse_function_call() 3596 3597 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3598 start_index = self._index 3599 should_fallback_to_string = False 3600 3601 hints = [] 3602 try: 3603 for hint in iter( 3604 lambda: self._parse_csv( 3605 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3606 ), 3607 [], 3608 ): 3609 hints.extend(hint) 3610 except ParseError: 3611 should_fallback_to_string = True 3612 3613 if should_fallback_to_string or self._curr: 3614 self._retreat(start_index) 3615 return self._parse_hint_fallback_to_string() 3616 3617 return self.expression(exp.Hint, expressions=hints) 3618 3619 def _parse_hint(self) -> t.Optional[exp.Hint]: 3620 if self._match(TokenType.HINT) and self._prev_comments: 3621 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3622 3623 return None 3624 3625 def _parse_into(self) -> t.Optional[exp.Into]: 3626 if not self._match(TokenType.INTO): 3627 return None 3628 3629 temp = self._match(TokenType.TEMPORARY) 3630 unlogged = self._match_text_seq("UNLOGGED") 3631 self._match(TokenType.TABLE) 3632 3633 return self.expression( 3634 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3635 ) 3636 3637 def _parse_from( 3638 self, 3639 joins: bool = False, 3640 skip_from_token: bool = False, 3641 consume_pipe: bool = False, 3642 ) -> t.Optional[exp.From]: 3643 if not skip_from_token and not self._match(TokenType.FROM): 3644 return None 3645 3646 return self.expression( 3647 exp.From, 3648 comments=self._prev_comments, 3649 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3650 ) 3651 3652 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3653 return self.expression( 3654 exp.MatchRecognizeMeasure, 3655 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3656 this=self._parse_expression(), 3657 ) 3658 3659 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3660 if not self._match(TokenType.MATCH_RECOGNIZE): 3661 return None 3662 3663 self._match_l_paren() 3664 3665 partition = self._parse_partition_by() 3666 order = self._parse_order() 3667 3668 measures = ( 3669 self._parse_csv(self._parse_match_recognize_measure) 3670 if self._match_text_seq("MEASURES") 3671 else None 3672 ) 3673 3674 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3675 rows = exp.var("ONE ROW PER MATCH") 3676 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3677 text = "ALL ROWS PER MATCH" 3678 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3679 text += " SHOW EMPTY MATCHES" 3680 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3681 text += " OMIT EMPTY MATCHES" 3682 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3683 text += " WITH UNMATCHED ROWS" 3684 rows = exp.var(text) 3685 else: 3686 rows = None 3687 3688 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3689 text = "AFTER MATCH SKIP" 3690 if self._match_text_seq("PAST", "LAST", "ROW"): 3691 text += " PAST LAST ROW" 3692 elif self._match_text_seq("TO", "NEXT", "ROW"): 3693 text += " TO NEXT ROW" 3694 elif self._match_text_seq("TO", "FIRST"): 3695 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3696 elif self._match_text_seq("TO", "LAST"): 3697 text += f" TO LAST {self._advance_any().text}" # type: ignore 3698 after = exp.var(text) 3699 else: 3700 after = None 3701 3702 if self._match_text_seq("PATTERN"): 3703 self._match_l_paren() 3704 3705 if not self._curr: 3706 self.raise_error("Expecting )", self._curr) 3707 3708 paren = 1 3709 start = self._curr 3710 3711 while self._curr and paren > 0: 3712 if self._curr.token_type == TokenType.L_PAREN: 3713 paren += 1 3714 if self._curr.token_type == TokenType.R_PAREN: 3715 paren -= 1 3716 3717 end = self._prev 3718 self._advance() 3719 3720 if paren > 0: 3721 self.raise_error("Expecting )", self._curr) 3722 3723 pattern = exp.var(self._find_sql(start, end)) 3724 else: 3725 pattern = None 3726 3727 define = ( 3728 self._parse_csv(self._parse_name_as_expression) 3729 if self._match_text_seq("DEFINE") 3730 else None 3731 ) 3732 3733 self._match_r_paren() 3734 3735 return self.expression( 3736 exp.MatchRecognize, 3737 partition_by=partition, 3738 order=order, 3739 measures=measures, 3740 rows=rows, 3741 after=after, 3742 pattern=pattern, 3743 define=define, 3744 alias=self._parse_table_alias(), 3745 ) 3746 3747 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3748 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3749 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3750 cross_apply = False 3751 3752 if cross_apply is not None: 3753 this = self._parse_select(table=True) 3754 view = None 3755 outer = None 3756 elif self._match(TokenType.LATERAL): 3757 this = self._parse_select(table=True) 3758 view = self._match(TokenType.VIEW) 3759 outer = self._match(TokenType.OUTER) 3760 else: 3761 return None 3762 3763 if not this: 3764 this = ( 3765 self._parse_unnest() 3766 or self._parse_function() 3767 or self._parse_id_var(any_token=False) 3768 ) 3769 3770 while self._match(TokenType.DOT): 3771 this = exp.Dot( 3772 this=this, 3773 expression=self._parse_function() or self._parse_id_var(any_token=False), 3774 ) 3775 3776 ordinality: t.Optional[bool] = None 3777 3778 if view: 3779 table = self._parse_id_var(any_token=False) 3780 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3781 table_alias: t.Optional[exp.TableAlias] = self.expression( 3782 exp.TableAlias, this=table, columns=columns 3783 ) 3784 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3785 # We move the alias from the lateral's child node to the lateral itself 3786 table_alias = this.args["alias"].pop() 3787 else: 3788 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3789 table_alias = self._parse_table_alias() 3790 3791 return self.expression( 3792 exp.Lateral, 3793 this=this, 3794 view=view, 3795 outer=outer, 3796 alias=table_alias, 3797 cross_apply=cross_apply, 3798 ordinality=ordinality, 3799 ) 3800 3801 def _parse_join_parts( 3802 self, 3803 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3804 return ( 3805 self._match_set(self.JOIN_METHODS) and self._prev, 3806 self._match_set(self.JOIN_SIDES) and self._prev, 3807 self._match_set(self.JOIN_KINDS) and self._prev, 3808 ) 3809 3810 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3811 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3812 this = self._parse_column() 3813 if isinstance(this, exp.Column): 3814 return this.this 3815 return this 3816 3817 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3818 3819 def _parse_join( 3820 self, skip_join_token: bool = False, parse_bracket: bool = False 3821 ) -> t.Optional[exp.Join]: 3822 if self._match(TokenType.COMMA): 3823 table = self._try_parse(self._parse_table) 3824 cross_join = self.expression(exp.Join, this=table) if table else None 3825 3826 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3827 cross_join.set("kind", "CROSS") 3828 3829 return cross_join 3830 3831 index = self._index 3832 method, side, kind = self._parse_join_parts() 3833 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3834 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3835 join_comments = self._prev_comments 3836 3837 if not skip_join_token and not join: 3838 self._retreat(index) 3839 kind = None 3840 method = None 3841 side = None 3842 3843 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3844 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3845 3846 if not skip_join_token and not join and not outer_apply and not cross_apply: 3847 return None 3848 3849 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3850 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3851 kwargs["expressions"] = self._parse_csv( 3852 lambda: self._parse_table(parse_bracket=parse_bracket) 3853 ) 3854 3855 if method: 3856 kwargs["method"] = method.text 3857 if side: 3858 kwargs["side"] = side.text 3859 if kind: 3860 kwargs["kind"] = kind.text 3861 if hint: 3862 kwargs["hint"] = hint 3863 3864 if self._match(TokenType.MATCH_CONDITION): 3865 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3866 3867 if self._match(TokenType.ON): 3868 kwargs["on"] = self._parse_assignment() 3869 elif self._match(TokenType.USING): 3870 kwargs["using"] = self._parse_using_identifiers() 3871 elif ( 3872 not method 3873 and not (outer_apply or cross_apply) 3874 and not isinstance(kwargs["this"], exp.Unnest) 3875 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3876 ): 3877 index = self._index 3878 joins: t.Optional[list] = list(self._parse_joins()) 3879 3880 if joins and self._match(TokenType.ON): 3881 kwargs["on"] = self._parse_assignment() 3882 elif joins and self._match(TokenType.USING): 3883 kwargs["using"] = self._parse_using_identifiers() 3884 else: 3885 joins = None 3886 self._retreat(index) 3887 3888 kwargs["this"].set("joins", joins if joins else None) 3889 3890 kwargs["pivots"] = self._parse_pivots() 3891 3892 comments = [c for token in (method, side, kind) if token for c in token.comments] 3893 comments = (join_comments or []) + comments 3894 3895 if ( 3896 self.ADD_JOIN_ON_TRUE 3897 and not kwargs.get("on") 3898 and not kwargs.get("using") 3899 and not kwargs.get("method") 3900 and kwargs.get("kind") in (None, "INNER", "OUTER") 3901 ): 3902 kwargs["on"] = exp.true() 3903 3904 return self.expression(exp.Join, comments=comments, **kwargs) 3905 3906 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3907 this = self._parse_assignment() 3908 3909 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3910 return this 3911 3912 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3913 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3914 3915 return this 3916 3917 def _parse_index_params(self) -> exp.IndexParameters: 3918 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3919 3920 if self._match(TokenType.L_PAREN, advance=False): 3921 columns = self._parse_wrapped_csv(self._parse_with_operator) 3922 else: 3923 columns = None 3924 3925 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3926 partition_by = self._parse_partition_by() 3927 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3928 tablespace = ( 3929 self._parse_var(any_token=True) 3930 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3931 else None 3932 ) 3933 where = self._parse_where() 3934 3935 on = self._parse_field() if self._match(TokenType.ON) else None 3936 3937 return self.expression( 3938 exp.IndexParameters, 3939 using=using, 3940 columns=columns, 3941 include=include, 3942 partition_by=partition_by, 3943 where=where, 3944 with_storage=with_storage, 3945 tablespace=tablespace, 3946 on=on, 3947 ) 3948 3949 def _parse_index( 3950 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3951 ) -> t.Optional[exp.Index]: 3952 if index or anonymous: 3953 unique = None 3954 primary = None 3955 amp = None 3956 3957 self._match(TokenType.ON) 3958 self._match(TokenType.TABLE) # hive 3959 table = self._parse_table_parts(schema=True) 3960 else: 3961 unique = self._match(TokenType.UNIQUE) 3962 primary = self._match_text_seq("PRIMARY") 3963 amp = self._match_text_seq("AMP") 3964 3965 if not self._match(TokenType.INDEX): 3966 return None 3967 3968 index = self._parse_id_var() 3969 table = None 3970 3971 params = self._parse_index_params() 3972 3973 return self.expression( 3974 exp.Index, 3975 this=index, 3976 table=table, 3977 unique=unique, 3978 primary=primary, 3979 amp=amp, 3980 params=params, 3981 ) 3982 3983 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3984 hints: t.List[exp.Expression] = [] 3985 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3986 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3987 hints.append( 3988 self.expression( 3989 exp.WithTableHint, 3990 expressions=self._parse_csv( 3991 lambda: self._parse_function() or self._parse_var(any_token=True) 3992 ), 3993 ) 3994 ) 3995 self._match_r_paren() 3996 else: 3997 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3998 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3999 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4000 4001 self._match_set((TokenType.INDEX, TokenType.KEY)) 4002 if self._match(TokenType.FOR): 4003 hint.set("target", self._advance_any() and self._prev.text.upper()) 4004 4005 hint.set("expressions", self._parse_wrapped_id_vars()) 4006 hints.append(hint) 4007 4008 return hints or None 4009 4010 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4011 return ( 4012 (not schema and self._parse_function(optional_parens=False)) 4013 or self._parse_id_var(any_token=False) 4014 or self._parse_string_as_identifier() 4015 or self._parse_placeholder() 4016 ) 4017 4018 def _parse_table_parts( 4019 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4020 ) -> exp.Table: 4021 catalog = None 4022 db = None 4023 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4024 4025 while self._match(TokenType.DOT): 4026 if catalog: 4027 # This allows nesting the table in arbitrarily many dot expressions if needed 4028 table = self.expression( 4029 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4030 ) 4031 else: 4032 catalog = db 4033 db = table 4034 # "" used for tsql FROM a..b case 4035 table = self._parse_table_part(schema=schema) or "" 4036 4037 if ( 4038 wildcard 4039 and self._is_connected() 4040 and (isinstance(table, exp.Identifier) or not table) 4041 and self._match(TokenType.STAR) 4042 ): 4043 if isinstance(table, exp.Identifier): 4044 table.args["this"] += "*" 4045 else: 4046 table = exp.Identifier(this="*") 4047 4048 # We bubble up comments from the Identifier to the Table 4049 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4050 4051 if is_db_reference: 4052 catalog = db 4053 db = table 4054 table = None 4055 4056 if not table and not is_db_reference: 4057 self.raise_error(f"Expected table name but got {self._curr}") 4058 if not db and is_db_reference: 4059 self.raise_error(f"Expected database name but got {self._curr}") 4060 4061 table = self.expression( 4062 exp.Table, 4063 comments=comments, 4064 this=table, 4065 db=db, 4066 catalog=catalog, 4067 ) 4068 4069 changes = self._parse_changes() 4070 if changes: 4071 table.set("changes", changes) 4072 4073 at_before = self._parse_historical_data() 4074 if at_before: 4075 table.set("when", at_before) 4076 4077 pivots = self._parse_pivots() 4078 if pivots: 4079 table.set("pivots", pivots) 4080 4081 return table 4082 4083 def _parse_table( 4084 self, 4085 schema: bool = False, 4086 joins: bool = False, 4087 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4088 parse_bracket: bool = False, 4089 is_db_reference: bool = False, 4090 parse_partition: bool = False, 4091 consume_pipe: bool = False, 4092 ) -> t.Optional[exp.Expression]: 4093 lateral = self._parse_lateral() 4094 if lateral: 4095 return lateral 4096 4097 unnest = self._parse_unnest() 4098 if unnest: 4099 return unnest 4100 4101 values = self._parse_derived_table_values() 4102 if values: 4103 return values 4104 4105 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4106 if subquery: 4107 if not subquery.args.get("pivots"): 4108 subquery.set("pivots", self._parse_pivots()) 4109 return subquery 4110 4111 bracket = parse_bracket and self._parse_bracket(None) 4112 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4113 4114 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4115 self._parse_table 4116 ) 4117 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4118 4119 only = self._match(TokenType.ONLY) 4120 4121 this = t.cast( 4122 exp.Expression, 4123 bracket 4124 or rows_from 4125 or self._parse_bracket( 4126 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4127 ), 4128 ) 4129 4130 if only: 4131 this.set("only", only) 4132 4133 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4134 self._match_text_seq("*") 4135 4136 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4137 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4138 this.set("partition", self._parse_partition()) 4139 4140 if schema: 4141 return self._parse_schema(this=this) 4142 4143 version = self._parse_version() 4144 4145 if version: 4146 this.set("version", version) 4147 4148 if self.dialect.ALIAS_POST_TABLESAMPLE: 4149 this.set("sample", self._parse_table_sample()) 4150 4151 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4152 if alias: 4153 this.set("alias", alias) 4154 4155 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4156 return self.expression( 4157 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4158 ) 4159 4160 this.set("hints", self._parse_table_hints()) 4161 4162 if not this.args.get("pivots"): 4163 this.set("pivots", self._parse_pivots()) 4164 4165 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4166 this.set("sample", self._parse_table_sample()) 4167 4168 if joins: 4169 for join in self._parse_joins(): 4170 this.append("joins", join) 4171 4172 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4173 this.set("ordinality", True) 4174 this.set("alias", self._parse_table_alias()) 4175 4176 return this 4177 4178 def _parse_version(self) -> t.Optional[exp.Version]: 4179 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4180 this = "TIMESTAMP" 4181 elif self._match(TokenType.VERSION_SNAPSHOT): 4182 this = "VERSION" 4183 else: 4184 return None 4185 4186 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4187 kind = self._prev.text.upper() 4188 start = self._parse_bitwise() 4189 self._match_texts(("TO", "AND")) 4190 end = self._parse_bitwise() 4191 expression: t.Optional[exp.Expression] = self.expression( 4192 exp.Tuple, expressions=[start, end] 4193 ) 4194 elif self._match_text_seq("CONTAINED", "IN"): 4195 kind = "CONTAINED IN" 4196 expression = self.expression( 4197 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4198 ) 4199 elif self._match(TokenType.ALL): 4200 kind = "ALL" 4201 expression = None 4202 else: 4203 self._match_text_seq("AS", "OF") 4204 kind = "AS OF" 4205 expression = self._parse_type() 4206 4207 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4208 4209 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4210 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4211 index = self._index 4212 historical_data = None 4213 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4214 this = self._prev.text.upper() 4215 kind = ( 4216 self._match(TokenType.L_PAREN) 4217 and self._match_texts(self.HISTORICAL_DATA_KIND) 4218 and self._prev.text.upper() 4219 ) 4220 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4221 4222 if expression: 4223 self._match_r_paren() 4224 historical_data = self.expression( 4225 exp.HistoricalData, this=this, kind=kind, expression=expression 4226 ) 4227 else: 4228 self._retreat(index) 4229 4230 return historical_data 4231 4232 def _parse_changes(self) -> t.Optional[exp.Changes]: 4233 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4234 return None 4235 4236 information = self._parse_var(any_token=True) 4237 self._match_r_paren() 4238 4239 return self.expression( 4240 exp.Changes, 4241 information=information, 4242 at_before=self._parse_historical_data(), 4243 end=self._parse_historical_data(), 4244 ) 4245 4246 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4247 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4248 return None 4249 4250 self._advance() 4251 4252 expressions = self._parse_wrapped_csv(self._parse_equality) 4253 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4254 4255 alias = self._parse_table_alias() if with_alias else None 4256 4257 if alias: 4258 if self.dialect.UNNEST_COLUMN_ONLY: 4259 if alias.args.get("columns"): 4260 self.raise_error("Unexpected extra column alias in unnest.") 4261 4262 alias.set("columns", [alias.this]) 4263 alias.set("this", None) 4264 4265 columns = alias.args.get("columns") or [] 4266 if offset and len(expressions) < len(columns): 4267 offset = columns.pop() 4268 4269 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4270 self._match(TokenType.ALIAS) 4271 offset = self._parse_id_var( 4272 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4273 ) or exp.to_identifier("offset") 4274 4275 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4276 4277 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4278 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4279 if not is_derived and not ( 4280 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4281 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4282 ): 4283 return None 4284 4285 expressions = self._parse_csv(self._parse_value) 4286 alias = self._parse_table_alias() 4287 4288 if is_derived: 4289 self._match_r_paren() 4290 4291 return self.expression( 4292 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4293 ) 4294 4295 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4296 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4297 as_modifier and self._match_text_seq("USING", "SAMPLE") 4298 ): 4299 return None 4300 4301 bucket_numerator = None 4302 bucket_denominator = None 4303 bucket_field = None 4304 percent = None 4305 size = None 4306 seed = None 4307 4308 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4309 matched_l_paren = self._match(TokenType.L_PAREN) 4310 4311 if self.TABLESAMPLE_CSV: 4312 num = None 4313 expressions = self._parse_csv(self._parse_primary) 4314 else: 4315 expressions = None 4316 num = ( 4317 self._parse_factor() 4318 if self._match(TokenType.NUMBER, advance=False) 4319 else self._parse_primary() or self._parse_placeholder() 4320 ) 4321 4322 if self._match_text_seq("BUCKET"): 4323 bucket_numerator = self._parse_number() 4324 self._match_text_seq("OUT", "OF") 4325 bucket_denominator = bucket_denominator = self._parse_number() 4326 self._match(TokenType.ON) 4327 bucket_field = self._parse_field() 4328 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4329 percent = num 4330 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4331 size = num 4332 else: 4333 percent = num 4334 4335 if matched_l_paren: 4336 self._match_r_paren() 4337 4338 if self._match(TokenType.L_PAREN): 4339 method = self._parse_var(upper=True) 4340 seed = self._match(TokenType.COMMA) and self._parse_number() 4341 self._match_r_paren() 4342 elif self._match_texts(("SEED", "REPEATABLE")): 4343 seed = self._parse_wrapped(self._parse_number) 4344 4345 if not method and self.DEFAULT_SAMPLING_METHOD: 4346 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4347 4348 return self.expression( 4349 exp.TableSample, 4350 expressions=expressions, 4351 method=method, 4352 bucket_numerator=bucket_numerator, 4353 bucket_denominator=bucket_denominator, 4354 bucket_field=bucket_field, 4355 percent=percent, 4356 size=size, 4357 seed=seed, 4358 ) 4359 4360 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4361 return list(iter(self._parse_pivot, None)) or None 4362 4363 def _parse_joins(self) -> t.Iterator[exp.Join]: 4364 return iter(self._parse_join, None) 4365 4366 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4367 if not self._match(TokenType.INTO): 4368 return None 4369 4370 return self.expression( 4371 exp.UnpivotColumns, 4372 this=self._match_text_seq("NAME") and self._parse_column(), 4373 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4374 ) 4375 4376 # https://duckdb.org/docs/sql/statements/pivot 4377 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4378 def _parse_on() -> t.Optional[exp.Expression]: 4379 this = self._parse_bitwise() 4380 4381 if self._match(TokenType.IN): 4382 # PIVOT ... ON col IN (row_val1, row_val2) 4383 return self._parse_in(this) 4384 if self._match(TokenType.ALIAS, advance=False): 4385 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4386 return self._parse_alias(this) 4387 4388 return this 4389 4390 this = self._parse_table() 4391 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4392 into = self._parse_unpivot_columns() 4393 using = self._match(TokenType.USING) and self._parse_csv( 4394 lambda: self._parse_alias(self._parse_function()) 4395 ) 4396 group = self._parse_group() 4397 4398 return self.expression( 4399 exp.Pivot, 4400 this=this, 4401 expressions=expressions, 4402 using=using, 4403 group=group, 4404 unpivot=is_unpivot, 4405 into=into, 4406 ) 4407 4408 def _parse_pivot_in(self) -> exp.In: 4409 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4410 this = self._parse_select_or_expression() 4411 4412 self._match(TokenType.ALIAS) 4413 alias = self._parse_bitwise() 4414 if alias: 4415 if isinstance(alias, exp.Column) and not alias.db: 4416 alias = alias.this 4417 return self.expression(exp.PivotAlias, this=this, alias=alias) 4418 4419 return this 4420 4421 value = self._parse_column() 4422 4423 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4424 self.raise_error("Expecting IN (") 4425 4426 if self._match(TokenType.ANY): 4427 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4428 else: 4429 exprs = self._parse_csv(_parse_aliased_expression) 4430 4431 self._match_r_paren() 4432 return self.expression(exp.In, this=value, expressions=exprs) 4433 4434 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4435 func = self._parse_function() 4436 if not func: 4437 if self._prev and self._prev.token_type == TokenType.COMMA: 4438 return None 4439 self.raise_error("Expecting an aggregation function in PIVOT") 4440 4441 return self._parse_alias(func) 4442 4443 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4444 index = self._index 4445 include_nulls = None 4446 4447 if self._match(TokenType.PIVOT): 4448 unpivot = False 4449 elif self._match(TokenType.UNPIVOT): 4450 unpivot = True 4451 4452 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4453 if self._match_text_seq("INCLUDE", "NULLS"): 4454 include_nulls = True 4455 elif self._match_text_seq("EXCLUDE", "NULLS"): 4456 include_nulls = False 4457 else: 4458 return None 4459 4460 expressions = [] 4461 4462 if not self._match(TokenType.L_PAREN): 4463 self._retreat(index) 4464 return None 4465 4466 if unpivot: 4467 expressions = self._parse_csv(self._parse_column) 4468 else: 4469 expressions = self._parse_csv(self._parse_pivot_aggregation) 4470 4471 if not expressions: 4472 self.raise_error("Failed to parse PIVOT's aggregation list") 4473 4474 if not self._match(TokenType.FOR): 4475 self.raise_error("Expecting FOR") 4476 4477 fields = [] 4478 while True: 4479 field = self._try_parse(self._parse_pivot_in) 4480 if not field: 4481 break 4482 fields.append(field) 4483 4484 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4485 self._parse_bitwise 4486 ) 4487 4488 group = self._parse_group() 4489 4490 self._match_r_paren() 4491 4492 pivot = self.expression( 4493 exp.Pivot, 4494 expressions=expressions, 4495 fields=fields, 4496 unpivot=unpivot, 4497 include_nulls=include_nulls, 4498 default_on_null=default_on_null, 4499 group=group, 4500 ) 4501 4502 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4503 pivot.set("alias", self._parse_table_alias()) 4504 4505 if not unpivot: 4506 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4507 4508 columns: t.List[exp.Expression] = [] 4509 all_fields = [] 4510 for pivot_field in pivot.fields: 4511 pivot_field_expressions = pivot_field.expressions 4512 4513 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4514 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4515 continue 4516 4517 all_fields.append( 4518 [ 4519 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4520 for fld in pivot_field_expressions 4521 ] 4522 ) 4523 4524 if all_fields: 4525 if names: 4526 all_fields.append(names) 4527 4528 # Generate all possible combinations of the pivot columns 4529 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4530 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4531 for fld_parts_tuple in itertools.product(*all_fields): 4532 fld_parts = list(fld_parts_tuple) 4533 4534 if names and self.PREFIXED_PIVOT_COLUMNS: 4535 # Move the "name" to the front of the list 4536 fld_parts.insert(0, fld_parts.pop(-1)) 4537 4538 columns.append(exp.to_identifier("_".join(fld_parts))) 4539 4540 pivot.set("columns", columns) 4541 4542 return pivot 4543 4544 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4545 return [agg.alias for agg in aggregations if agg.alias] 4546 4547 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4548 if not skip_where_token and not self._match(TokenType.PREWHERE): 4549 return None 4550 4551 return self.expression( 4552 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4553 ) 4554 4555 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4556 if not skip_where_token and not self._match(TokenType.WHERE): 4557 return None 4558 4559 return self.expression( 4560 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4561 ) 4562 4563 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4564 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4565 return None 4566 comments = self._prev_comments 4567 4568 elements: t.Dict[str, t.Any] = defaultdict(list) 4569 4570 if self._match(TokenType.ALL): 4571 elements["all"] = True 4572 elif self._match(TokenType.DISTINCT): 4573 elements["all"] = False 4574 4575 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4576 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4577 4578 while True: 4579 index = self._index 4580 4581 elements["expressions"].extend( 4582 self._parse_csv( 4583 lambda: None 4584 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4585 else self._parse_assignment() 4586 ) 4587 ) 4588 4589 before_with_index = self._index 4590 with_prefix = self._match(TokenType.WITH) 4591 4592 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4593 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4594 elements[key].append(cube_or_rollup) 4595 elif grouping_sets := self._parse_grouping_sets(): 4596 elements["grouping_sets"].append(grouping_sets) 4597 elif self._match_text_seq("TOTALS"): 4598 elements["totals"] = True # type: ignore 4599 4600 if before_with_index <= self._index <= before_with_index + 1: 4601 self._retreat(before_with_index) 4602 break 4603 4604 if index == self._index: 4605 break 4606 4607 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4608 4609 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4610 if self._match(TokenType.CUBE): 4611 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4612 elif self._match(TokenType.ROLLUP): 4613 kind = exp.Rollup 4614 else: 4615 return None 4616 4617 return self.expression( 4618 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4619 ) 4620 4621 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4622 if self._match(TokenType.GROUPING_SETS): 4623 return self.expression( 4624 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4625 ) 4626 return None 4627 4628 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4629 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4630 4631 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4632 if not skip_having_token and not self._match(TokenType.HAVING): 4633 return None 4634 return self.expression( 4635 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4636 ) 4637 4638 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4639 if not self._match(TokenType.QUALIFY): 4640 return None 4641 return self.expression(exp.Qualify, this=self._parse_assignment()) 4642 4643 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4644 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4645 exp.Prior, this=self._parse_bitwise() 4646 ) 4647 connect = self._parse_assignment() 4648 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4649 return connect 4650 4651 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4652 if skip_start_token: 4653 start = None 4654 elif self._match(TokenType.START_WITH): 4655 start = self._parse_assignment() 4656 else: 4657 return None 4658 4659 self._match(TokenType.CONNECT_BY) 4660 nocycle = self._match_text_seq("NOCYCLE") 4661 connect = self._parse_connect_with_prior() 4662 4663 if not start and self._match(TokenType.START_WITH): 4664 start = self._parse_assignment() 4665 4666 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4667 4668 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4669 this = self._parse_id_var(any_token=True) 4670 if self._match(TokenType.ALIAS): 4671 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4672 return this 4673 4674 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4675 if self._match_text_seq("INTERPOLATE"): 4676 return self._parse_wrapped_csv(self._parse_name_as_expression) 4677 return None 4678 4679 def _parse_order( 4680 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4681 ) -> t.Optional[exp.Expression]: 4682 siblings = None 4683 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4684 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4685 return this 4686 4687 siblings = True 4688 4689 return self.expression( 4690 exp.Order, 4691 comments=self._prev_comments, 4692 this=this, 4693 expressions=self._parse_csv(self._parse_ordered), 4694 siblings=siblings, 4695 ) 4696 4697 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4698 if not self._match(token): 4699 return None 4700 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4701 4702 def _parse_ordered( 4703 self, parse_method: t.Optional[t.Callable] = None 4704 ) -> t.Optional[exp.Ordered]: 4705 this = parse_method() if parse_method else self._parse_assignment() 4706 if not this: 4707 return None 4708 4709 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4710 this = exp.var("ALL") 4711 4712 asc = self._match(TokenType.ASC) 4713 desc = self._match(TokenType.DESC) or (asc and False) 4714 4715 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4716 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4717 4718 nulls_first = is_nulls_first or False 4719 explicitly_null_ordered = is_nulls_first or is_nulls_last 4720 4721 if ( 4722 not explicitly_null_ordered 4723 and ( 4724 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4725 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4726 ) 4727 and self.dialect.NULL_ORDERING != "nulls_are_last" 4728 ): 4729 nulls_first = True 4730 4731 if self._match_text_seq("WITH", "FILL"): 4732 with_fill = self.expression( 4733 exp.WithFill, 4734 **{ # type: ignore 4735 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4736 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4737 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4738 "interpolate": self._parse_interpolate(), 4739 }, 4740 ) 4741 else: 4742 with_fill = None 4743 4744 return self.expression( 4745 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4746 ) 4747 4748 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4749 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4750 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4751 self._match_text_seq("ONLY") 4752 with_ties = self._match_text_seq("WITH", "TIES") 4753 4754 if not (percent or rows or with_ties): 4755 return None 4756 4757 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4758 4759 def _parse_limit( 4760 self, 4761 this: t.Optional[exp.Expression] = None, 4762 top: bool = False, 4763 skip_limit_token: bool = False, 4764 ) -> t.Optional[exp.Expression]: 4765 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4766 comments = self._prev_comments 4767 if top: 4768 limit_paren = self._match(TokenType.L_PAREN) 4769 expression = self._parse_term() if limit_paren else self._parse_number() 4770 4771 if limit_paren: 4772 self._match_r_paren() 4773 4774 else: 4775 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4776 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4777 # consume the factor plus parse the percentage separately 4778 expression = self._try_parse(self._parse_term) or self._parse_factor() 4779 4780 limit_options = self._parse_limit_options() 4781 4782 if self._match(TokenType.COMMA): 4783 offset = expression 4784 expression = self._parse_term() 4785 else: 4786 offset = None 4787 4788 limit_exp = self.expression( 4789 exp.Limit, 4790 this=this, 4791 expression=expression, 4792 offset=offset, 4793 comments=comments, 4794 limit_options=limit_options, 4795 expressions=self._parse_limit_by(), 4796 ) 4797 4798 return limit_exp 4799 4800 if self._match(TokenType.FETCH): 4801 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4802 direction = self._prev.text.upper() if direction else "FIRST" 4803 4804 count = self._parse_field(tokens=self.FETCH_TOKENS) 4805 4806 return self.expression( 4807 exp.Fetch, 4808 direction=direction, 4809 count=count, 4810 limit_options=self._parse_limit_options(), 4811 ) 4812 4813 return this 4814 4815 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4816 if not self._match(TokenType.OFFSET): 4817 return this 4818 4819 count = self._parse_term() 4820 self._match_set((TokenType.ROW, TokenType.ROWS)) 4821 4822 return self.expression( 4823 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4824 ) 4825 4826 def _can_parse_limit_or_offset(self) -> bool: 4827 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4828 return False 4829 4830 index = self._index 4831 result = bool( 4832 self._try_parse(self._parse_limit, retreat=True) 4833 or self._try_parse(self._parse_offset, retreat=True) 4834 ) 4835 self._retreat(index) 4836 return result 4837 4838 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4839 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4840 4841 def _parse_locks(self) -> t.List[exp.Lock]: 4842 locks = [] 4843 while True: 4844 update, key = None, None 4845 if self._match_text_seq("FOR", "UPDATE"): 4846 update = True 4847 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4848 "LOCK", "IN", "SHARE", "MODE" 4849 ): 4850 update = False 4851 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4852 update, key = False, True 4853 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4854 update, key = True, True 4855 else: 4856 break 4857 4858 expressions = None 4859 if self._match_text_seq("OF"): 4860 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4861 4862 wait: t.Optional[bool | exp.Expression] = None 4863 if self._match_text_seq("NOWAIT"): 4864 wait = True 4865 elif self._match_text_seq("WAIT"): 4866 wait = self._parse_primary() 4867 elif self._match_text_seq("SKIP", "LOCKED"): 4868 wait = False 4869 4870 locks.append( 4871 self.expression( 4872 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4873 ) 4874 ) 4875 4876 return locks 4877 4878 def parse_set_operation( 4879 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4880 ) -> t.Optional[exp.Expression]: 4881 start = self._index 4882 _, side_token, kind_token = self._parse_join_parts() 4883 4884 side = side_token.text if side_token else None 4885 kind = kind_token.text if kind_token else None 4886 4887 if not self._match_set(self.SET_OPERATIONS): 4888 self._retreat(start) 4889 return None 4890 4891 token_type = self._prev.token_type 4892 4893 if token_type == TokenType.UNION: 4894 operation: t.Type[exp.SetOperation] = exp.Union 4895 elif token_type == TokenType.EXCEPT: 4896 operation = exp.Except 4897 else: 4898 operation = exp.Intersect 4899 4900 comments = self._prev.comments 4901 4902 if self._match(TokenType.DISTINCT): 4903 distinct: t.Optional[bool] = True 4904 elif self._match(TokenType.ALL): 4905 distinct = False 4906 else: 4907 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4908 if distinct is None: 4909 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4910 4911 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4912 "STRICT", "CORRESPONDING" 4913 ) 4914 if self._match_text_seq("CORRESPONDING"): 4915 by_name = True 4916 if not side and not kind: 4917 kind = "INNER" 4918 4919 on_column_list = None 4920 if by_name and self._match_texts(("ON", "BY")): 4921 on_column_list = self._parse_wrapped_csv(self._parse_column) 4922 4923 expression = self._parse_select( 4924 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4925 ) 4926 4927 return self.expression( 4928 operation, 4929 comments=comments, 4930 this=this, 4931 distinct=distinct, 4932 by_name=by_name, 4933 expression=expression, 4934 side=side, 4935 kind=kind, 4936 on=on_column_list, 4937 ) 4938 4939 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4940 while this: 4941 setop = self.parse_set_operation(this) 4942 if not setop: 4943 break 4944 this = setop 4945 4946 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4947 expression = this.expression 4948 4949 if expression: 4950 for arg in self.SET_OP_MODIFIERS: 4951 expr = expression.args.get(arg) 4952 if expr: 4953 this.set(arg, expr.pop()) 4954 4955 return this 4956 4957 def _parse_expression(self) -> t.Optional[exp.Expression]: 4958 return self._parse_alias(self._parse_assignment()) 4959 4960 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4961 this = self._parse_disjunction() 4962 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4963 # This allows us to parse <non-identifier token> := <expr> 4964 this = exp.column( 4965 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4966 ) 4967 4968 while self._match_set(self.ASSIGNMENT): 4969 if isinstance(this, exp.Column) and len(this.parts) == 1: 4970 this = this.this 4971 4972 this = self.expression( 4973 self.ASSIGNMENT[self._prev.token_type], 4974 this=this, 4975 comments=self._prev_comments, 4976 expression=self._parse_assignment(), 4977 ) 4978 4979 return this 4980 4981 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4982 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4983 4984 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4985 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4986 4987 def _parse_equality(self) -> t.Optional[exp.Expression]: 4988 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4989 4990 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4991 return self._parse_tokens(self._parse_range, self.COMPARISON) 4992 4993 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4994 this = this or self._parse_bitwise() 4995 negate = self._match(TokenType.NOT) 4996 4997 if self._match_set(self.RANGE_PARSERS): 4998 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4999 if not expression: 5000 return this 5001 5002 this = expression 5003 elif self._match(TokenType.ISNULL): 5004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5005 5006 # Postgres supports ISNULL and NOTNULL for conditions. 5007 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5008 if self._match(TokenType.NOTNULL): 5009 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5010 this = self.expression(exp.Not, this=this) 5011 5012 if negate: 5013 this = self._negate_range(this) 5014 5015 if self._match(TokenType.IS): 5016 this = self._parse_is(this) 5017 5018 return this 5019 5020 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5021 if not this: 5022 return this 5023 5024 return self.expression(exp.Not, this=this) 5025 5026 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5027 index = self._index - 1 5028 negate = self._match(TokenType.NOT) 5029 5030 if self._match_text_seq("DISTINCT", "FROM"): 5031 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5032 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5033 5034 if self._match(TokenType.JSON): 5035 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5036 5037 if self._match_text_seq("WITH"): 5038 _with = True 5039 elif self._match_text_seq("WITHOUT"): 5040 _with = False 5041 else: 5042 _with = None 5043 5044 unique = self._match(TokenType.UNIQUE) 5045 self._match_text_seq("KEYS") 5046 expression: t.Optional[exp.Expression] = self.expression( 5047 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5048 ) 5049 else: 5050 expression = self._parse_primary() or self._parse_null() 5051 if not expression: 5052 self._retreat(index) 5053 return None 5054 5055 this = self.expression(exp.Is, this=this, expression=expression) 5056 this = self.expression(exp.Not, this=this) if negate else this 5057 return self._parse_column_ops(this) 5058 5059 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5060 unnest = self._parse_unnest(with_alias=False) 5061 if unnest: 5062 this = self.expression(exp.In, this=this, unnest=unnest) 5063 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5064 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5065 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5066 5067 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5068 this = self.expression( 5069 exp.In, 5070 this=this, 5071 query=self._parse_query_modifiers(query).subquery(copy=False), 5072 ) 5073 else: 5074 this = self.expression(exp.In, this=this, expressions=expressions) 5075 5076 if matched_l_paren: 5077 self._match_r_paren(this) 5078 elif not self._match(TokenType.R_BRACKET, expression=this): 5079 self.raise_error("Expecting ]") 5080 else: 5081 this = self.expression(exp.In, this=this, field=self._parse_column()) 5082 5083 return this 5084 5085 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5086 symmetric = None 5087 if self._match_text_seq("SYMMETRIC"): 5088 symmetric = True 5089 elif self._match_text_seq("ASYMMETRIC"): 5090 symmetric = False 5091 5092 low = self._parse_bitwise() 5093 self._match(TokenType.AND) 5094 high = self._parse_bitwise() 5095 5096 return self.expression( 5097 exp.Between, 5098 this=this, 5099 low=low, 5100 high=high, 5101 symmetric=symmetric, 5102 ) 5103 5104 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5105 if not self._match(TokenType.ESCAPE): 5106 return this 5107 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5108 5109 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5110 index = self._index 5111 5112 if not self._match(TokenType.INTERVAL) and match_interval: 5113 return None 5114 5115 if self._match(TokenType.STRING, advance=False): 5116 this = self._parse_primary() 5117 else: 5118 this = self._parse_term() 5119 5120 if not this or ( 5121 isinstance(this, exp.Column) 5122 and not this.table 5123 and not this.this.quoted 5124 and self._curr 5125 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5126 ): 5127 self._retreat(index) 5128 return None 5129 5130 # handle day-time format interval span with omitted units: 5131 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5132 interval_span_units_omitted = None 5133 if ( 5134 this 5135 and this.is_string 5136 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5137 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5138 ): 5139 index = self._index 5140 5141 # Var "TO" Var 5142 first_unit = self._parse_var(any_token=True, upper=True) 5143 second_unit = None 5144 if first_unit and self._match_text_seq("TO"): 5145 second_unit = self._parse_var(any_token=True, upper=True) 5146 5147 interval_span_units_omitted = not (first_unit and second_unit) 5148 5149 self._retreat(index) 5150 5151 unit = ( 5152 None 5153 if interval_span_units_omitted 5154 else ( 5155 self._parse_function() 5156 or ( 5157 not self._match(TokenType.ALIAS, advance=False) 5158 and self._parse_var(any_token=True, upper=True) 5159 ) 5160 ) 5161 ) 5162 5163 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5164 # each INTERVAL expression into this canonical form so it's easy to transpile 5165 if this and this.is_number: 5166 this = exp.Literal.string(this.to_py()) 5167 elif this and this.is_string: 5168 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5169 if parts and unit: 5170 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5171 unit = None 5172 self._retreat(self._index - 1) 5173 5174 if len(parts) == 1: 5175 this = exp.Literal.string(parts[0][0]) 5176 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5177 5178 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5179 unit = self.expression( 5180 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5181 ) 5182 5183 interval = self.expression(exp.Interval, this=this, unit=unit) 5184 5185 index = self._index 5186 self._match(TokenType.PLUS) 5187 5188 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5189 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5190 return self.expression( 5191 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5192 ) 5193 5194 self._retreat(index) 5195 return interval 5196 5197 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5198 this = self._parse_term() 5199 5200 while True: 5201 if self._match_set(self.BITWISE): 5202 this = self.expression( 5203 self.BITWISE[self._prev.token_type], 5204 this=this, 5205 expression=self._parse_term(), 5206 ) 5207 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5208 this = self.expression( 5209 exp.DPipe, 5210 this=this, 5211 expression=self._parse_term(), 5212 safe=not self.dialect.STRICT_STRING_CONCAT, 5213 ) 5214 elif self._match(TokenType.DQMARK): 5215 this = self.expression( 5216 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5217 ) 5218 elif self._match_pair(TokenType.LT, TokenType.LT): 5219 this = self.expression( 5220 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5221 ) 5222 elif self._match_pair(TokenType.GT, TokenType.GT): 5223 this = self.expression( 5224 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5225 ) 5226 else: 5227 break 5228 5229 return this 5230 5231 def _parse_term(self) -> t.Optional[exp.Expression]: 5232 this = self._parse_factor() 5233 5234 while self._match_set(self.TERM): 5235 klass = self.TERM[self._prev.token_type] 5236 comments = self._prev_comments 5237 expression = self._parse_factor() 5238 5239 this = self.expression(klass, this=this, comments=comments, expression=expression) 5240 5241 if isinstance(this, exp.Collate): 5242 expr = this.expression 5243 5244 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5245 # fallback to Identifier / Var 5246 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5247 ident = expr.this 5248 if isinstance(ident, exp.Identifier): 5249 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5250 5251 return this 5252 5253 def _parse_factor(self) -> t.Optional[exp.Expression]: 5254 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5255 this = parse_method() 5256 5257 while self._match_set(self.FACTOR): 5258 klass = self.FACTOR[self._prev.token_type] 5259 comments = self._prev_comments 5260 expression = parse_method() 5261 5262 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5263 self._retreat(self._index - 1) 5264 return this 5265 5266 this = self.expression(klass, this=this, comments=comments, expression=expression) 5267 5268 if isinstance(this, exp.Div): 5269 this.args["typed"] = self.dialect.TYPED_DIVISION 5270 this.args["safe"] = self.dialect.SAFE_DIVISION 5271 5272 return this 5273 5274 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5275 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5276 5277 def _parse_unary(self) -> t.Optional[exp.Expression]: 5278 if self._match_set(self.UNARY_PARSERS): 5279 return self.UNARY_PARSERS[self._prev.token_type](self) 5280 return self._parse_at_time_zone(self._parse_type()) 5281 5282 def _parse_type( 5283 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5284 ) -> t.Optional[exp.Expression]: 5285 interval = parse_interval and self._parse_interval() 5286 if interval: 5287 return interval 5288 5289 index = self._index 5290 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5291 5292 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5293 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5294 if isinstance(data_type, exp.Cast): 5295 # This constructor can contain ops directly after it, for instance struct unnesting: 5296 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5297 return self._parse_column_ops(data_type) 5298 5299 if data_type: 5300 index2 = self._index 5301 this = self._parse_primary() 5302 5303 if isinstance(this, exp.Literal): 5304 literal = this.name 5305 this = self._parse_column_ops(this) 5306 5307 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5308 if parser: 5309 return parser(self, this, data_type) 5310 5311 if ( 5312 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5313 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5314 and TIME_ZONE_RE.search(literal) 5315 ): 5316 data_type = exp.DataType.build("TIMESTAMPTZ") 5317 5318 return self.expression(exp.Cast, this=this, to=data_type) 5319 5320 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5321 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5322 # 5323 # If the index difference here is greater than 1, that means the parser itself must have 5324 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5325 # 5326 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5327 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5328 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5329 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5330 # 5331 # In these cases, we don't really want to return the converted type, but instead retreat 5332 # and try to parse a Column or Identifier in the section below. 5333 if data_type.expressions and index2 - index > 1: 5334 self._retreat(index2) 5335 return self._parse_column_ops(data_type) 5336 5337 self._retreat(index) 5338 5339 if fallback_to_identifier: 5340 return self._parse_id_var() 5341 5342 this = self._parse_column() 5343 return this and self._parse_column_ops(this) 5344 5345 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5346 this = self._parse_type() 5347 if not this: 5348 return None 5349 5350 if isinstance(this, exp.Column) and not this.table: 5351 this = exp.var(this.name.upper()) 5352 5353 return self.expression( 5354 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5355 ) 5356 5357 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5358 type_name = identifier.name 5359 5360 while self._match(TokenType.DOT): 5361 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5362 5363 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5364 5365 def _parse_types( 5366 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5367 ) -> t.Optional[exp.Expression]: 5368 index = self._index 5369 5370 this: t.Optional[exp.Expression] = None 5371 prefix = self._match_text_seq("SYSUDTLIB", ".") 5372 5373 if self._match_set(self.TYPE_TOKENS): 5374 type_token = self._prev.token_type 5375 else: 5376 type_token = None 5377 identifier = allow_identifiers and self._parse_id_var( 5378 any_token=False, tokens=(TokenType.VAR,) 5379 ) 5380 if isinstance(identifier, exp.Identifier): 5381 try: 5382 tokens = self.dialect.tokenize(identifier.name) 5383 except TokenError: 5384 tokens = None 5385 5386 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5387 type_token = tokens[0].token_type 5388 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5389 this = self._parse_user_defined_type(identifier) 5390 else: 5391 self._retreat(self._index - 1) 5392 return None 5393 else: 5394 return None 5395 5396 if type_token == TokenType.PSEUDO_TYPE: 5397 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5398 5399 if type_token == TokenType.OBJECT_IDENTIFIER: 5400 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5401 5402 # https://materialize.com/docs/sql/types/map/ 5403 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5404 key_type = self._parse_types( 5405 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5406 ) 5407 if not self._match(TokenType.FARROW): 5408 self._retreat(index) 5409 return None 5410 5411 value_type = self._parse_types( 5412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5413 ) 5414 if not self._match(TokenType.R_BRACKET): 5415 self._retreat(index) 5416 return None 5417 5418 return exp.DataType( 5419 this=exp.DataType.Type.MAP, 5420 expressions=[key_type, value_type], 5421 nested=True, 5422 prefix=prefix, 5423 ) 5424 5425 nested = type_token in self.NESTED_TYPE_TOKENS 5426 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5427 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5428 expressions = None 5429 maybe_func = False 5430 5431 if self._match(TokenType.L_PAREN): 5432 if is_struct: 5433 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5434 elif nested: 5435 expressions = self._parse_csv( 5436 lambda: self._parse_types( 5437 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5438 ) 5439 ) 5440 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5441 this = expressions[0] 5442 this.set("nullable", True) 5443 self._match_r_paren() 5444 return this 5445 elif type_token in self.ENUM_TYPE_TOKENS: 5446 expressions = self._parse_csv(self._parse_equality) 5447 elif is_aggregate: 5448 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5449 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5450 ) 5451 if not func_or_ident: 5452 return None 5453 expressions = [func_or_ident] 5454 if self._match(TokenType.COMMA): 5455 expressions.extend( 5456 self._parse_csv( 5457 lambda: self._parse_types( 5458 check_func=check_func, 5459 schema=schema, 5460 allow_identifiers=allow_identifiers, 5461 ) 5462 ) 5463 ) 5464 else: 5465 expressions = self._parse_csv(self._parse_type_size) 5466 5467 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5468 if type_token == TokenType.VECTOR and len(expressions) == 2: 5469 expressions = self._parse_vector_expressions(expressions) 5470 5471 if not self._match(TokenType.R_PAREN): 5472 self._retreat(index) 5473 return None 5474 5475 maybe_func = True 5476 5477 values: t.Optional[t.List[exp.Expression]] = None 5478 5479 if nested and self._match(TokenType.LT): 5480 if is_struct: 5481 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5482 else: 5483 expressions = self._parse_csv( 5484 lambda: self._parse_types( 5485 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5486 ) 5487 ) 5488 5489 if not self._match(TokenType.GT): 5490 self.raise_error("Expecting >") 5491 5492 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5493 values = self._parse_csv(self._parse_assignment) 5494 if not values and is_struct: 5495 values = None 5496 self._retreat(self._index - 1) 5497 else: 5498 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5499 5500 if type_token in self.TIMESTAMPS: 5501 if self._match_text_seq("WITH", "TIME", "ZONE"): 5502 maybe_func = False 5503 tz_type = ( 5504 exp.DataType.Type.TIMETZ 5505 if type_token in self.TIMES 5506 else exp.DataType.Type.TIMESTAMPTZ 5507 ) 5508 this = exp.DataType(this=tz_type, expressions=expressions) 5509 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5510 maybe_func = False 5511 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5512 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5513 maybe_func = False 5514 elif type_token == TokenType.INTERVAL: 5515 unit = self._parse_var(upper=True) 5516 if unit: 5517 if self._match_text_seq("TO"): 5518 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5519 5520 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5521 else: 5522 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5523 elif type_token == TokenType.VOID: 5524 this = exp.DataType(this=exp.DataType.Type.NULL) 5525 5526 if maybe_func and check_func: 5527 index2 = self._index 5528 peek = self._parse_string() 5529 5530 if not peek: 5531 self._retreat(index) 5532 return None 5533 5534 self._retreat(index2) 5535 5536 if not this: 5537 if self._match_text_seq("UNSIGNED"): 5538 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5539 if not unsigned_type_token: 5540 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5541 5542 type_token = unsigned_type_token or type_token 5543 5544 # NULLABLE without parentheses can be a column (Presto/Trino) 5545 if type_token == TokenType.NULLABLE and not expressions: 5546 self._retreat(index) 5547 return None 5548 5549 this = exp.DataType( 5550 this=exp.DataType.Type[type_token.value], 5551 expressions=expressions, 5552 nested=nested, 5553 prefix=prefix, 5554 ) 5555 5556 # Empty arrays/structs are allowed 5557 if values is not None: 5558 cls = exp.Struct if is_struct else exp.Array 5559 this = exp.cast(cls(expressions=values), this, copy=False) 5560 5561 elif expressions: 5562 this.set("expressions", expressions) 5563 5564 # https://materialize.com/docs/sql/types/list/#type-name 5565 while self._match(TokenType.LIST): 5566 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5567 5568 index = self._index 5569 5570 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5571 matched_array = self._match(TokenType.ARRAY) 5572 5573 while self._curr: 5574 datatype_token = self._prev.token_type 5575 matched_l_bracket = self._match(TokenType.L_BRACKET) 5576 5577 if (not matched_l_bracket and not matched_array) or ( 5578 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5579 ): 5580 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5581 # not to be confused with the fixed size array parsing 5582 break 5583 5584 matched_array = False 5585 values = self._parse_csv(self._parse_assignment) or None 5586 if ( 5587 values 5588 and not schema 5589 and ( 5590 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5591 ) 5592 ): 5593 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5594 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5595 self._retreat(index) 5596 break 5597 5598 this = exp.DataType( 5599 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5600 ) 5601 self._match(TokenType.R_BRACKET) 5602 5603 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5604 converter = self.TYPE_CONVERTERS.get(this.this) 5605 if converter: 5606 this = converter(t.cast(exp.DataType, this)) 5607 5608 return this 5609 5610 def _parse_vector_expressions( 5611 self, expressions: t.List[exp.Expression] 5612 ) -> t.List[exp.Expression]: 5613 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5614 5615 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5616 index = self._index 5617 5618 if ( 5619 self._curr 5620 and self._next 5621 and self._curr.token_type in self.TYPE_TOKENS 5622 and self._next.token_type in self.TYPE_TOKENS 5623 ): 5624 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5625 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5626 this = self._parse_id_var() 5627 else: 5628 this = ( 5629 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5630 or self._parse_id_var() 5631 ) 5632 5633 self._match(TokenType.COLON) 5634 5635 if ( 5636 type_required 5637 and not isinstance(this, exp.DataType) 5638 and not self._match_set(self.TYPE_TOKENS, advance=False) 5639 ): 5640 self._retreat(index) 5641 return self._parse_types() 5642 5643 return self._parse_column_def(this) 5644 5645 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5646 if not self._match_text_seq("AT", "TIME", "ZONE"): 5647 return this 5648 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5649 5650 def _parse_column(self) -> t.Optional[exp.Expression]: 5651 this = self._parse_column_reference() 5652 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5653 5654 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5655 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5656 5657 return column 5658 5659 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5660 this = self._parse_field() 5661 if ( 5662 not this 5663 and self._match(TokenType.VALUES, advance=False) 5664 and self.VALUES_FOLLOWED_BY_PAREN 5665 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5666 ): 5667 this = self._parse_id_var() 5668 5669 if isinstance(this, exp.Identifier): 5670 # We bubble up comments from the Identifier to the Column 5671 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5672 5673 return this 5674 5675 def _parse_colon_as_variant_extract( 5676 self, this: t.Optional[exp.Expression] 5677 ) -> t.Optional[exp.Expression]: 5678 casts = [] 5679 json_path = [] 5680 escape = None 5681 5682 while self._match(TokenType.COLON): 5683 start_index = self._index 5684 5685 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5686 path = self._parse_column_ops( 5687 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5688 ) 5689 5690 # The cast :: operator has a lower precedence than the extraction operator :, so 5691 # we rearrange the AST appropriately to avoid casting the JSON path 5692 while isinstance(path, exp.Cast): 5693 casts.append(path.to) 5694 path = path.this 5695 5696 if casts: 5697 dcolon_offset = next( 5698 i 5699 for i, t in enumerate(self._tokens[start_index:]) 5700 if t.token_type == TokenType.DCOLON 5701 ) 5702 end_token = self._tokens[start_index + dcolon_offset - 1] 5703 else: 5704 end_token = self._prev 5705 5706 if path: 5707 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5708 # it'll roundtrip to a string literal in GET_PATH 5709 if isinstance(path, exp.Identifier) and path.quoted: 5710 escape = True 5711 5712 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5713 5714 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5715 # Databricks transforms it back to the colon/dot notation 5716 if json_path: 5717 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5718 5719 if json_path_expr: 5720 json_path_expr.set("escape", escape) 5721 5722 this = self.expression( 5723 exp.JSONExtract, 5724 this=this, 5725 expression=json_path_expr, 5726 variant_extract=True, 5727 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5728 ) 5729 5730 while casts: 5731 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5732 5733 return this 5734 5735 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5736 return self._parse_types() 5737 5738 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5739 this = self._parse_bracket(this) 5740 5741 while self._match_set(self.COLUMN_OPERATORS): 5742 op_token = self._prev.token_type 5743 op = self.COLUMN_OPERATORS.get(op_token) 5744 5745 if op_token in self.CAST_COLUMN_OPERATORS: 5746 field = self._parse_dcolon() 5747 if not field: 5748 self.raise_error("Expected type") 5749 elif op and self._curr: 5750 field = self._parse_column_reference() or self._parse_bitwise() 5751 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5752 field = self._parse_column_ops(field) 5753 else: 5754 field = self._parse_field(any_token=True, anonymous_func=True) 5755 5756 # Function calls can be qualified, e.g., x.y.FOO() 5757 # This converts the final AST to a series of Dots leading to the function call 5758 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5759 if isinstance(field, (exp.Func, exp.Window)) and this: 5760 this = this.transform( 5761 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5762 ) 5763 5764 if op: 5765 this = op(self, this, field) 5766 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5767 this = self.expression( 5768 exp.Column, 5769 comments=this.comments, 5770 this=field, 5771 table=this.this, 5772 db=this.args.get("table"), 5773 catalog=this.args.get("db"), 5774 ) 5775 elif isinstance(field, exp.Window): 5776 # Move the exp.Dot's to the window's function 5777 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5778 field.set("this", window_func) 5779 this = field 5780 else: 5781 this = self.expression(exp.Dot, this=this, expression=field) 5782 5783 if field and field.comments: 5784 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5785 5786 this = self._parse_bracket(this) 5787 5788 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5789 5790 def _parse_paren(self) -> t.Optional[exp.Expression]: 5791 if not self._match(TokenType.L_PAREN): 5792 return None 5793 5794 comments = self._prev_comments 5795 query = self._parse_select() 5796 5797 if query: 5798 expressions = [query] 5799 else: 5800 expressions = self._parse_expressions() 5801 5802 this = seq_get(expressions, 0) 5803 5804 if not this and self._match(TokenType.R_PAREN, advance=False): 5805 this = self.expression(exp.Tuple) 5806 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5807 this = self._parse_subquery(this=this, parse_alias=False) 5808 elif isinstance(this, exp.Subquery): 5809 this = self._parse_subquery( 5810 this=self._parse_query_modifiers(self._parse_set_operations(this)), 5811 parse_alias=False, 5812 ) 5813 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5814 this = self.expression(exp.Tuple, expressions=expressions) 5815 else: 5816 this = self.expression(exp.Paren, this=this) 5817 5818 if this: 5819 this.add_comments(comments) 5820 5821 self._match_r_paren(expression=this) 5822 5823 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5824 return self._parse_window(this) 5825 5826 return this 5827 5828 def _parse_primary(self) -> t.Optional[exp.Expression]: 5829 if self._match_set(self.PRIMARY_PARSERS): 5830 token_type = self._prev.token_type 5831 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5832 5833 if token_type == TokenType.STRING: 5834 expressions = [primary] 5835 while self._match(TokenType.STRING): 5836 expressions.append(exp.Literal.string(self._prev.text)) 5837 5838 if len(expressions) > 1: 5839 return self.expression(exp.Concat, expressions=expressions) 5840 5841 return primary 5842 5843 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5844 return exp.Literal.number(f"0.{self._prev.text}") 5845 5846 return self._parse_paren() 5847 5848 def _parse_field( 5849 self, 5850 any_token: bool = False, 5851 tokens: t.Optional[t.Collection[TokenType]] = None, 5852 anonymous_func: bool = False, 5853 ) -> t.Optional[exp.Expression]: 5854 if anonymous_func: 5855 field = ( 5856 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5857 or self._parse_primary() 5858 ) 5859 else: 5860 field = self._parse_primary() or self._parse_function( 5861 anonymous=anonymous_func, any_token=any_token 5862 ) 5863 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5864 5865 def _parse_function( 5866 self, 5867 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5868 anonymous: bool = False, 5869 optional_parens: bool = True, 5870 any_token: bool = False, 5871 ) -> t.Optional[exp.Expression]: 5872 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5873 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5874 fn_syntax = False 5875 if ( 5876 self._match(TokenType.L_BRACE, advance=False) 5877 and self._next 5878 and self._next.text.upper() == "FN" 5879 ): 5880 self._advance(2) 5881 fn_syntax = True 5882 5883 func = self._parse_function_call( 5884 functions=functions, 5885 anonymous=anonymous, 5886 optional_parens=optional_parens, 5887 any_token=any_token, 5888 ) 5889 5890 if fn_syntax: 5891 self._match(TokenType.R_BRACE) 5892 5893 return func 5894 5895 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5896 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5897 5898 def _parse_function_call( 5899 self, 5900 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5901 anonymous: bool = False, 5902 optional_parens: bool = True, 5903 any_token: bool = False, 5904 ) -> t.Optional[exp.Expression]: 5905 if not self._curr: 5906 return None 5907 5908 comments = self._curr.comments 5909 prev = self._prev 5910 token = self._curr 5911 token_type = self._curr.token_type 5912 this = self._curr.text 5913 upper = this.upper() 5914 5915 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5916 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5917 self._advance() 5918 return self._parse_window(parser(self)) 5919 5920 if not self._next or self._next.token_type != TokenType.L_PAREN: 5921 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5922 self._advance() 5923 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5924 5925 return None 5926 5927 if any_token: 5928 if token_type in self.RESERVED_TOKENS: 5929 return None 5930 elif token_type not in self.FUNC_TOKENS: 5931 return None 5932 5933 self._advance(2) 5934 5935 parser = self.FUNCTION_PARSERS.get(upper) 5936 if parser and not anonymous: 5937 this = parser(self) 5938 else: 5939 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5940 5941 if subquery_predicate: 5942 expr = None 5943 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5944 expr = self._parse_select() 5945 self._match_r_paren() 5946 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5947 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5948 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5949 self._advance(-1) 5950 expr = self._parse_bitwise() 5951 5952 if expr: 5953 return self.expression(subquery_predicate, comments=comments, this=expr) 5954 5955 if functions is None: 5956 functions = self.FUNCTIONS 5957 5958 function = functions.get(upper) 5959 known_function = function and not anonymous 5960 5961 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5962 args = self._parse_function_args(alias) 5963 5964 post_func_comments = self._curr and self._curr.comments 5965 if known_function and post_func_comments: 5966 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5967 # call we'll construct it as exp.Anonymous, even if it's "known" 5968 if any( 5969 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5970 for comment in post_func_comments 5971 ): 5972 known_function = False 5973 5974 if alias and known_function: 5975 args = self._kv_to_prop_eq(args) 5976 5977 if known_function: 5978 func_builder = t.cast(t.Callable, function) 5979 5980 if "dialect" in func_builder.__code__.co_varnames: 5981 func = func_builder(args, dialect=self.dialect) 5982 else: 5983 func = func_builder(args) 5984 5985 func = self.validate_expression(func, args) 5986 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5987 func.meta["name"] = this 5988 5989 this = func 5990 else: 5991 if token_type == TokenType.IDENTIFIER: 5992 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5993 5994 this = self.expression(exp.Anonymous, this=this, expressions=args) 5995 this = this.update_positions(token) 5996 5997 if isinstance(this, exp.Expression): 5998 this.add_comments(comments) 5999 6000 self._match_r_paren(this) 6001 return self._parse_window(this) 6002 6003 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6004 return expression 6005 6006 def _kv_to_prop_eq( 6007 self, expressions: t.List[exp.Expression], parse_map: bool = False 6008 ) -> t.List[exp.Expression]: 6009 transformed = [] 6010 6011 for index, e in enumerate(expressions): 6012 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6013 if isinstance(e, exp.Alias): 6014 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6015 6016 if not isinstance(e, exp.PropertyEQ): 6017 e = self.expression( 6018 exp.PropertyEQ, 6019 this=e.this if parse_map else exp.to_identifier(e.this.name), 6020 expression=e.expression, 6021 ) 6022 6023 if isinstance(e.this, exp.Column): 6024 e.this.replace(e.this.this) 6025 else: 6026 e = self._to_prop_eq(e, index) 6027 6028 transformed.append(e) 6029 6030 return transformed 6031 6032 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6033 return self._parse_statement() 6034 6035 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6036 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6037 6038 def _parse_user_defined_function( 6039 self, kind: t.Optional[TokenType] = None 6040 ) -> t.Optional[exp.Expression]: 6041 this = self._parse_table_parts(schema=True) 6042 6043 if not self._match(TokenType.L_PAREN): 6044 return this 6045 6046 expressions = self._parse_csv(self._parse_function_parameter) 6047 self._match_r_paren() 6048 return self.expression( 6049 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6050 ) 6051 6052 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6053 literal = self._parse_primary() 6054 if literal: 6055 return self.expression(exp.Introducer, this=token.text, expression=literal) 6056 6057 return self._identifier_expression(token) 6058 6059 def _parse_session_parameter(self) -> exp.SessionParameter: 6060 kind = None 6061 this = self._parse_id_var() or self._parse_primary() 6062 6063 if this and self._match(TokenType.DOT): 6064 kind = this.name 6065 this = self._parse_var() or self._parse_primary() 6066 6067 return self.expression(exp.SessionParameter, this=this, kind=kind) 6068 6069 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6070 return self._parse_id_var() 6071 6072 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6073 index = self._index 6074 6075 if self._match(TokenType.L_PAREN): 6076 expressions = t.cast( 6077 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6078 ) 6079 6080 if not self._match(TokenType.R_PAREN): 6081 self._retreat(index) 6082 else: 6083 expressions = [self._parse_lambda_arg()] 6084 6085 if self._match_set(self.LAMBDAS): 6086 return self.LAMBDAS[self._prev.token_type](self, expressions) 6087 6088 self._retreat(index) 6089 6090 this: t.Optional[exp.Expression] 6091 6092 if self._match(TokenType.DISTINCT): 6093 this = self.expression( 6094 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6095 ) 6096 else: 6097 this = self._parse_select_or_expression(alias=alias) 6098 6099 return self._parse_limit( 6100 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6101 ) 6102 6103 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6104 index = self._index 6105 if not self._match(TokenType.L_PAREN): 6106 return this 6107 6108 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6109 # expr can be of both types 6110 if self._match_set(self.SELECT_START_TOKENS): 6111 self._retreat(index) 6112 return this 6113 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6114 self._match_r_paren() 6115 return self.expression(exp.Schema, this=this, expressions=args) 6116 6117 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6118 return self._parse_column_def(self._parse_field(any_token=True)) 6119 6120 def _parse_column_def( 6121 self, this: t.Optional[exp.Expression], computed_column: bool = True 6122 ) -> t.Optional[exp.Expression]: 6123 # column defs are not really columns, they're identifiers 6124 if isinstance(this, exp.Column): 6125 this = this.this 6126 6127 if not computed_column: 6128 self._match(TokenType.ALIAS) 6129 6130 kind = self._parse_types(schema=True) 6131 6132 if self._match_text_seq("FOR", "ORDINALITY"): 6133 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6134 6135 constraints: t.List[exp.Expression] = [] 6136 6137 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6138 ("ALIAS", "MATERIALIZED") 6139 ): 6140 persisted = self._prev.text.upper() == "MATERIALIZED" 6141 constraint_kind = exp.ComputedColumnConstraint( 6142 this=self._parse_assignment(), 6143 persisted=persisted or self._match_text_seq("PERSISTED"), 6144 data_type=exp.Var(this="AUTO") 6145 if self._match_text_seq("AUTO") 6146 else self._parse_types(), 6147 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6148 ) 6149 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6150 elif ( 6151 kind 6152 and self._match(TokenType.ALIAS, advance=False) 6153 and ( 6154 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6155 or (self._next and self._next.token_type == TokenType.L_PAREN) 6156 ) 6157 ): 6158 self._advance() 6159 constraints.append( 6160 self.expression( 6161 exp.ColumnConstraint, 6162 kind=exp.ComputedColumnConstraint( 6163 this=self._parse_disjunction(), 6164 persisted=self._match_texts(("STORED", "VIRTUAL")) 6165 and self._prev.text.upper() == "STORED", 6166 ), 6167 ) 6168 ) 6169 6170 while True: 6171 constraint = self._parse_column_constraint() 6172 if not constraint: 6173 break 6174 constraints.append(constraint) 6175 6176 if not kind and not constraints: 6177 return this 6178 6179 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6180 6181 def _parse_auto_increment( 6182 self, 6183 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6184 start = None 6185 increment = None 6186 order = None 6187 6188 if self._match(TokenType.L_PAREN, advance=False): 6189 args = self._parse_wrapped_csv(self._parse_bitwise) 6190 start = seq_get(args, 0) 6191 increment = seq_get(args, 1) 6192 elif self._match_text_seq("START"): 6193 start = self._parse_bitwise() 6194 self._match_text_seq("INCREMENT") 6195 increment = self._parse_bitwise() 6196 if self._match_text_seq("ORDER"): 6197 order = True 6198 elif self._match_text_seq("NOORDER"): 6199 order = False 6200 6201 if start and increment: 6202 return exp.GeneratedAsIdentityColumnConstraint( 6203 start=start, increment=increment, this=False, order=order 6204 ) 6205 6206 return exp.AutoIncrementColumnConstraint() 6207 6208 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6209 if not self._match_text_seq("REFRESH"): 6210 self._retreat(self._index - 1) 6211 return None 6212 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6213 6214 def _parse_compress(self) -> exp.CompressColumnConstraint: 6215 if self._match(TokenType.L_PAREN, advance=False): 6216 return self.expression( 6217 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6218 ) 6219 6220 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6221 6222 def _parse_generated_as_identity( 6223 self, 6224 ) -> ( 6225 exp.GeneratedAsIdentityColumnConstraint 6226 | exp.ComputedColumnConstraint 6227 | exp.GeneratedAsRowColumnConstraint 6228 ): 6229 if self._match_text_seq("BY", "DEFAULT"): 6230 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6231 this = self.expression( 6232 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6233 ) 6234 else: 6235 self._match_text_seq("ALWAYS") 6236 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6237 6238 self._match(TokenType.ALIAS) 6239 6240 if self._match_text_seq("ROW"): 6241 start = self._match_text_seq("START") 6242 if not start: 6243 self._match(TokenType.END) 6244 hidden = self._match_text_seq("HIDDEN") 6245 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6246 6247 identity = self._match_text_seq("IDENTITY") 6248 6249 if self._match(TokenType.L_PAREN): 6250 if self._match(TokenType.START_WITH): 6251 this.set("start", self._parse_bitwise()) 6252 if self._match_text_seq("INCREMENT", "BY"): 6253 this.set("increment", self._parse_bitwise()) 6254 if self._match_text_seq("MINVALUE"): 6255 this.set("minvalue", self._parse_bitwise()) 6256 if self._match_text_seq("MAXVALUE"): 6257 this.set("maxvalue", self._parse_bitwise()) 6258 6259 if self._match_text_seq("CYCLE"): 6260 this.set("cycle", True) 6261 elif self._match_text_seq("NO", "CYCLE"): 6262 this.set("cycle", False) 6263 6264 if not identity: 6265 this.set("expression", self._parse_range()) 6266 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6267 args = self._parse_csv(self._parse_bitwise) 6268 this.set("start", seq_get(args, 0)) 6269 this.set("increment", seq_get(args, 1)) 6270 6271 self._match_r_paren() 6272 6273 return this 6274 6275 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6276 self._match_text_seq("LENGTH") 6277 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6278 6279 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6280 if self._match_text_seq("NULL"): 6281 return self.expression(exp.NotNullColumnConstraint) 6282 if self._match_text_seq("CASESPECIFIC"): 6283 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6284 if self._match_text_seq("FOR", "REPLICATION"): 6285 return self.expression(exp.NotForReplicationColumnConstraint) 6286 6287 # Unconsume the `NOT` token 6288 self._retreat(self._index - 1) 6289 return None 6290 6291 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6292 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6293 6294 procedure_option_follows = ( 6295 self._match(TokenType.WITH, advance=False) 6296 and self._next 6297 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6298 ) 6299 6300 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6301 return self.expression( 6302 exp.ColumnConstraint, 6303 this=this, 6304 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6305 ) 6306 6307 return this 6308 6309 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6310 if not self._match(TokenType.CONSTRAINT): 6311 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6312 6313 return self.expression( 6314 exp.Constraint, 6315 this=self._parse_id_var(), 6316 expressions=self._parse_unnamed_constraints(), 6317 ) 6318 6319 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6320 constraints = [] 6321 while True: 6322 constraint = self._parse_unnamed_constraint() or self._parse_function() 6323 if not constraint: 6324 break 6325 constraints.append(constraint) 6326 6327 return constraints 6328 6329 def _parse_unnamed_constraint( 6330 self, constraints: t.Optional[t.Collection[str]] = None 6331 ) -> t.Optional[exp.Expression]: 6332 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6333 constraints or self.CONSTRAINT_PARSERS 6334 ): 6335 return None 6336 6337 constraint = self._prev.text.upper() 6338 if constraint not in self.CONSTRAINT_PARSERS: 6339 self.raise_error(f"No parser found for schema constraint {constraint}.") 6340 6341 return self.CONSTRAINT_PARSERS[constraint](self) 6342 6343 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6344 return self._parse_id_var(any_token=False) 6345 6346 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6347 self._match_texts(("KEY", "INDEX")) 6348 return self.expression( 6349 exp.UniqueColumnConstraint, 6350 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6351 this=self._parse_schema(self._parse_unique_key()), 6352 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6353 on_conflict=self._parse_on_conflict(), 6354 options=self._parse_key_constraint_options(), 6355 ) 6356 6357 def _parse_key_constraint_options(self) -> t.List[str]: 6358 options = [] 6359 while True: 6360 if not self._curr: 6361 break 6362 6363 if self._match(TokenType.ON): 6364 action = None 6365 on = self._advance_any() and self._prev.text 6366 6367 if self._match_text_seq("NO", "ACTION"): 6368 action = "NO ACTION" 6369 elif self._match_text_seq("CASCADE"): 6370 action = "CASCADE" 6371 elif self._match_text_seq("RESTRICT"): 6372 action = "RESTRICT" 6373 elif self._match_pair(TokenType.SET, TokenType.NULL): 6374 action = "SET NULL" 6375 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6376 action = "SET DEFAULT" 6377 else: 6378 self.raise_error("Invalid key constraint") 6379 6380 options.append(f"ON {on} {action}") 6381 else: 6382 var = self._parse_var_from_options( 6383 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6384 ) 6385 if not var: 6386 break 6387 options.append(var.name) 6388 6389 return options 6390 6391 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6392 if match and not self._match(TokenType.REFERENCES): 6393 return None 6394 6395 expressions = None 6396 this = self._parse_table(schema=True) 6397 options = self._parse_key_constraint_options() 6398 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6399 6400 def _parse_foreign_key(self) -> exp.ForeignKey: 6401 expressions = ( 6402 self._parse_wrapped_id_vars() 6403 if not self._match(TokenType.REFERENCES, advance=False) 6404 else None 6405 ) 6406 reference = self._parse_references() 6407 on_options = {} 6408 6409 while self._match(TokenType.ON): 6410 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6411 self.raise_error("Expected DELETE or UPDATE") 6412 6413 kind = self._prev.text.lower() 6414 6415 if self._match_text_seq("NO", "ACTION"): 6416 action = "NO ACTION" 6417 elif self._match(TokenType.SET): 6418 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6419 action = "SET " + self._prev.text.upper() 6420 else: 6421 self._advance() 6422 action = self._prev.text.upper() 6423 6424 on_options[kind] = action 6425 6426 return self.expression( 6427 exp.ForeignKey, 6428 expressions=expressions, 6429 reference=reference, 6430 options=self._parse_key_constraint_options(), 6431 **on_options, # type: ignore 6432 ) 6433 6434 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6435 return self._parse_ordered() or self._parse_field() 6436 6437 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6438 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6439 self._retreat(self._index - 1) 6440 return None 6441 6442 id_vars = self._parse_wrapped_id_vars() 6443 return self.expression( 6444 exp.PeriodForSystemTimeConstraint, 6445 this=seq_get(id_vars, 0), 6446 expression=seq_get(id_vars, 1), 6447 ) 6448 6449 def _parse_primary_key( 6450 self, wrapped_optional: bool = False, in_props: bool = False 6451 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6452 desc = ( 6453 self._match_set((TokenType.ASC, TokenType.DESC)) 6454 and self._prev.token_type == TokenType.DESC 6455 ) 6456 6457 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6458 return self.expression( 6459 exp.PrimaryKeyColumnConstraint, 6460 desc=desc, 6461 options=self._parse_key_constraint_options(), 6462 ) 6463 6464 expressions = self._parse_wrapped_csv( 6465 self._parse_primary_key_part, optional=wrapped_optional 6466 ) 6467 6468 return self.expression( 6469 exp.PrimaryKey, 6470 expressions=expressions, 6471 include=self._parse_index_params(), 6472 options=self._parse_key_constraint_options(), 6473 ) 6474 6475 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6476 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6477 6478 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6479 """ 6480 Parses a datetime column in ODBC format. We parse the column into the corresponding 6481 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6482 same as we did for `DATE('yyyy-mm-dd')`. 6483 6484 Reference: 6485 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6486 """ 6487 self._match(TokenType.VAR) 6488 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6489 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6490 if not self._match(TokenType.R_BRACE): 6491 self.raise_error("Expected }") 6492 return expression 6493 6494 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6495 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6496 return this 6497 6498 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6499 map_token = seq_get(self._tokens, self._index - 2) 6500 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6501 else: 6502 parse_map = False 6503 6504 bracket_kind = self._prev.token_type 6505 if ( 6506 bracket_kind == TokenType.L_BRACE 6507 and self._curr 6508 and self._curr.token_type == TokenType.VAR 6509 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6510 ): 6511 return self._parse_odbc_datetime_literal() 6512 6513 expressions = self._parse_csv( 6514 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6515 ) 6516 6517 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6518 self.raise_error("Expected ]") 6519 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6520 self.raise_error("Expected }") 6521 6522 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6523 if bracket_kind == TokenType.L_BRACE: 6524 this = self.expression( 6525 exp.Struct, 6526 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6527 ) 6528 elif not this: 6529 this = build_array_constructor( 6530 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6531 ) 6532 else: 6533 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6534 if constructor_type: 6535 return build_array_constructor( 6536 constructor_type, 6537 args=expressions, 6538 bracket_kind=bracket_kind, 6539 dialect=self.dialect, 6540 ) 6541 6542 expressions = apply_index_offset( 6543 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6544 ) 6545 this = self.expression( 6546 exp.Bracket, 6547 this=this, 6548 expressions=expressions, 6549 comments=this.pop_comments(), 6550 ) 6551 6552 self._add_comments(this) 6553 return self._parse_bracket(this) 6554 6555 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6556 if self._match(TokenType.COLON): 6557 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6558 return this 6559 6560 def _parse_case(self) -> t.Optional[exp.Expression]: 6561 if self._match(TokenType.DOT, advance=False): 6562 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6563 self._retreat(self._index - 1) 6564 return None 6565 6566 ifs = [] 6567 default = None 6568 6569 comments = self._prev_comments 6570 expression = self._parse_assignment() 6571 6572 while self._match(TokenType.WHEN): 6573 this = self._parse_assignment() 6574 self._match(TokenType.THEN) 6575 then = self._parse_assignment() 6576 ifs.append(self.expression(exp.If, this=this, true=then)) 6577 6578 if self._match(TokenType.ELSE): 6579 default = self._parse_assignment() 6580 6581 if not self._match(TokenType.END): 6582 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6583 default = exp.column("interval") 6584 else: 6585 self.raise_error("Expected END after CASE", self._prev) 6586 6587 return self.expression( 6588 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6589 ) 6590 6591 def _parse_if(self) -> t.Optional[exp.Expression]: 6592 if self._match(TokenType.L_PAREN): 6593 args = self._parse_csv( 6594 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6595 ) 6596 this = self.validate_expression(exp.If.from_arg_list(args), args) 6597 self._match_r_paren() 6598 else: 6599 index = self._index - 1 6600 6601 if self.NO_PAREN_IF_COMMANDS and index == 0: 6602 return self._parse_as_command(self._prev) 6603 6604 condition = self._parse_assignment() 6605 6606 if not condition: 6607 self._retreat(index) 6608 return None 6609 6610 self._match(TokenType.THEN) 6611 true = self._parse_assignment() 6612 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6613 self._match(TokenType.END) 6614 this = self.expression(exp.If, this=condition, true=true, false=false) 6615 6616 return this 6617 6618 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6619 if not self._match_text_seq("VALUE", "FOR"): 6620 self._retreat(self._index - 1) 6621 return None 6622 6623 return self.expression( 6624 exp.NextValueFor, 6625 this=self._parse_column(), 6626 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6627 ) 6628 6629 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6630 this = self._parse_function() or self._parse_var_or_string(upper=True) 6631 6632 if self._match(TokenType.FROM): 6633 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6634 6635 if not self._match(TokenType.COMMA): 6636 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6637 6638 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6639 6640 def _parse_gap_fill(self) -> exp.GapFill: 6641 self._match(TokenType.TABLE) 6642 this = self._parse_table() 6643 6644 self._match(TokenType.COMMA) 6645 args = [this, *self._parse_csv(self._parse_lambda)] 6646 6647 gap_fill = exp.GapFill.from_arg_list(args) 6648 return self.validate_expression(gap_fill, args) 6649 6650 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6651 this = self._parse_assignment() 6652 6653 if not self._match(TokenType.ALIAS): 6654 if self._match(TokenType.COMMA): 6655 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6656 6657 self.raise_error("Expected AS after CAST") 6658 6659 fmt = None 6660 to = self._parse_types() 6661 6662 default = self._match(TokenType.DEFAULT) 6663 if default: 6664 default = self._parse_bitwise() 6665 self._match_text_seq("ON", "CONVERSION", "ERROR") 6666 6667 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6668 fmt_string = self._parse_string() 6669 fmt = self._parse_at_time_zone(fmt_string) 6670 6671 if not to: 6672 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6673 if to.this in exp.DataType.TEMPORAL_TYPES: 6674 this = self.expression( 6675 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6676 this=this, 6677 format=exp.Literal.string( 6678 format_time( 6679 fmt_string.this if fmt_string else "", 6680 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6681 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6682 ) 6683 ), 6684 safe=safe, 6685 ) 6686 6687 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6688 this.set("zone", fmt.args["zone"]) 6689 return this 6690 elif not to: 6691 self.raise_error("Expected TYPE after CAST") 6692 elif isinstance(to, exp.Identifier): 6693 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6694 elif to.this == exp.DataType.Type.CHAR: 6695 if self._match(TokenType.CHARACTER_SET): 6696 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6697 6698 return self.build_cast( 6699 strict=strict, 6700 this=this, 6701 to=to, 6702 format=fmt, 6703 safe=safe, 6704 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6705 default=default, 6706 ) 6707 6708 def _parse_string_agg(self) -> exp.GroupConcat: 6709 if self._match(TokenType.DISTINCT): 6710 args: t.List[t.Optional[exp.Expression]] = [ 6711 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6712 ] 6713 if self._match(TokenType.COMMA): 6714 args.extend(self._parse_csv(self._parse_assignment)) 6715 else: 6716 args = self._parse_csv(self._parse_assignment) # type: ignore 6717 6718 if self._match_text_seq("ON", "OVERFLOW"): 6719 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6720 if self._match_text_seq("ERROR"): 6721 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6722 else: 6723 self._match_text_seq("TRUNCATE") 6724 on_overflow = self.expression( 6725 exp.OverflowTruncateBehavior, 6726 this=self._parse_string(), 6727 with_count=( 6728 self._match_text_seq("WITH", "COUNT") 6729 or not self._match_text_seq("WITHOUT", "COUNT") 6730 ), 6731 ) 6732 else: 6733 on_overflow = None 6734 6735 index = self._index 6736 if not self._match(TokenType.R_PAREN) and args: 6737 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6738 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6739 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6740 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6741 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6742 6743 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6744 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6745 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6746 if not self._match_text_seq("WITHIN", "GROUP"): 6747 self._retreat(index) 6748 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6749 6750 # The corresponding match_r_paren will be called in parse_function (caller) 6751 self._match_l_paren() 6752 6753 return self.expression( 6754 exp.GroupConcat, 6755 this=self._parse_order(this=seq_get(args, 0)), 6756 separator=seq_get(args, 1), 6757 on_overflow=on_overflow, 6758 ) 6759 6760 def _parse_convert( 6761 self, strict: bool, safe: t.Optional[bool] = None 6762 ) -> t.Optional[exp.Expression]: 6763 this = self._parse_bitwise() 6764 6765 if self._match(TokenType.USING): 6766 to: t.Optional[exp.Expression] = self.expression( 6767 exp.CharacterSet, this=self._parse_var() 6768 ) 6769 elif self._match(TokenType.COMMA): 6770 to = self._parse_types() 6771 else: 6772 to = None 6773 6774 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6775 6776 def _parse_xml_table(self) -> exp.XMLTable: 6777 namespaces = None 6778 passing = None 6779 columns = None 6780 6781 if self._match_text_seq("XMLNAMESPACES", "("): 6782 namespaces = self._parse_xml_namespace() 6783 self._match_text_seq(")", ",") 6784 6785 this = self._parse_string() 6786 6787 if self._match_text_seq("PASSING"): 6788 # The BY VALUE keywords are optional and are provided for semantic clarity 6789 self._match_text_seq("BY", "VALUE") 6790 passing = self._parse_csv(self._parse_column) 6791 6792 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6793 6794 if self._match_text_seq("COLUMNS"): 6795 columns = self._parse_csv(self._parse_field_def) 6796 6797 return self.expression( 6798 exp.XMLTable, 6799 this=this, 6800 namespaces=namespaces, 6801 passing=passing, 6802 columns=columns, 6803 by_ref=by_ref, 6804 ) 6805 6806 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6807 namespaces = [] 6808 6809 while True: 6810 if self._match(TokenType.DEFAULT): 6811 uri = self._parse_string() 6812 else: 6813 uri = self._parse_alias(self._parse_string()) 6814 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6815 if not self._match(TokenType.COMMA): 6816 break 6817 6818 return namespaces 6819 6820 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6821 args = self._parse_csv(self._parse_assignment) 6822 6823 if len(args) < 3: 6824 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6825 6826 return self.expression(exp.DecodeCase, expressions=args) 6827 6828 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6829 self._match_text_seq("KEY") 6830 key = self._parse_column() 6831 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6832 self._match_text_seq("VALUE") 6833 value = self._parse_bitwise() 6834 6835 if not key and not value: 6836 return None 6837 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6838 6839 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6840 if not this or not self._match_text_seq("FORMAT", "JSON"): 6841 return this 6842 6843 return self.expression(exp.FormatJson, this=this) 6844 6845 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6846 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6847 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6848 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6849 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6850 else: 6851 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6852 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6853 6854 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6855 6856 if not empty and not error and not null: 6857 return None 6858 6859 return self.expression( 6860 exp.OnCondition, 6861 empty=empty, 6862 error=error, 6863 null=null, 6864 ) 6865 6866 def _parse_on_handling( 6867 self, on: str, *values: str 6868 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6869 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6870 for value in values: 6871 if self._match_text_seq(value, "ON", on): 6872 return f"{value} ON {on}" 6873 6874 index = self._index 6875 if self._match(TokenType.DEFAULT): 6876 default_value = self._parse_bitwise() 6877 if self._match_text_seq("ON", on): 6878 return default_value 6879 6880 self._retreat(index) 6881 6882 return None 6883 6884 @t.overload 6885 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6886 6887 @t.overload 6888 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6889 6890 def _parse_json_object(self, agg=False): 6891 star = self._parse_star() 6892 expressions = ( 6893 [star] 6894 if star 6895 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6896 ) 6897 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6898 6899 unique_keys = None 6900 if self._match_text_seq("WITH", "UNIQUE"): 6901 unique_keys = True 6902 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6903 unique_keys = False 6904 6905 self._match_text_seq("KEYS") 6906 6907 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6908 self._parse_type() 6909 ) 6910 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6911 6912 return self.expression( 6913 exp.JSONObjectAgg if agg else exp.JSONObject, 6914 expressions=expressions, 6915 null_handling=null_handling, 6916 unique_keys=unique_keys, 6917 return_type=return_type, 6918 encoding=encoding, 6919 ) 6920 6921 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6922 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6923 if not self._match_text_seq("NESTED"): 6924 this = self._parse_id_var() 6925 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 6926 kind = self._parse_types(allow_identifiers=False) 6927 nested = None 6928 else: 6929 this = None 6930 ordinality = None 6931 kind = None 6932 nested = True 6933 6934 path = self._match_text_seq("PATH") and self._parse_string() 6935 nested_schema = nested and self._parse_json_schema() 6936 6937 return self.expression( 6938 exp.JSONColumnDef, 6939 this=this, 6940 kind=kind, 6941 path=path, 6942 nested_schema=nested_schema, 6943 ordinality=ordinality, 6944 ) 6945 6946 def _parse_json_schema(self) -> exp.JSONSchema: 6947 self._match_text_seq("COLUMNS") 6948 return self.expression( 6949 exp.JSONSchema, 6950 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6951 ) 6952 6953 def _parse_json_table(self) -> exp.JSONTable: 6954 this = self._parse_format_json(self._parse_bitwise()) 6955 path = self._match(TokenType.COMMA) and self._parse_string() 6956 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6957 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6958 schema = self._parse_json_schema() 6959 6960 return exp.JSONTable( 6961 this=this, 6962 schema=schema, 6963 path=path, 6964 error_handling=error_handling, 6965 empty_handling=empty_handling, 6966 ) 6967 6968 def _parse_match_against(self) -> exp.MatchAgainst: 6969 if self._match_text_seq("TABLE"): 6970 # parse SingleStore MATCH(TABLE ...) syntax 6971 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6972 expressions = [] 6973 table = self._parse_table() 6974 if table: 6975 expressions = [table] 6976 else: 6977 expressions = self._parse_csv(self._parse_column) 6978 6979 self._match_text_seq(")", "AGAINST", "(") 6980 6981 this = self._parse_string() 6982 6983 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6984 modifier = "IN NATURAL LANGUAGE MODE" 6985 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6986 modifier = f"{modifier} WITH QUERY EXPANSION" 6987 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6988 modifier = "IN BOOLEAN MODE" 6989 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6990 modifier = "WITH QUERY EXPANSION" 6991 else: 6992 modifier = None 6993 6994 return self.expression( 6995 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6996 ) 6997 6998 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6999 def _parse_open_json(self) -> exp.OpenJSON: 7000 this = self._parse_bitwise() 7001 path = self._match(TokenType.COMMA) and self._parse_string() 7002 7003 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7004 this = self._parse_field(any_token=True) 7005 kind = self._parse_types() 7006 path = self._parse_string() 7007 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7008 7009 return self.expression( 7010 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7011 ) 7012 7013 expressions = None 7014 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7015 self._match_l_paren() 7016 expressions = self._parse_csv(_parse_open_json_column_def) 7017 7018 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7019 7020 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7021 args = self._parse_csv(self._parse_bitwise) 7022 7023 if self._match(TokenType.IN): 7024 return self.expression( 7025 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7026 ) 7027 7028 if haystack_first: 7029 haystack = seq_get(args, 0) 7030 needle = seq_get(args, 1) 7031 else: 7032 haystack = seq_get(args, 1) 7033 needle = seq_get(args, 0) 7034 7035 return self.expression( 7036 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7037 ) 7038 7039 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7040 args = self._parse_csv(self._parse_table) 7041 return exp.JoinHint(this=func_name.upper(), expressions=args) 7042 7043 def _parse_substring(self) -> exp.Substring: 7044 # Postgres supports the form: substring(string [from int] [for int]) 7045 # (despite being undocumented, the reverse order also works) 7046 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7047 7048 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7049 7050 start, length = None, None 7051 7052 while self._curr: 7053 if self._match(TokenType.FROM): 7054 start = self._parse_bitwise() 7055 elif self._match(TokenType.FOR): 7056 if not start: 7057 start = exp.Literal.number(1) 7058 length = self._parse_bitwise() 7059 else: 7060 break 7061 7062 if start: 7063 args.append(start) 7064 if length: 7065 args.append(length) 7066 7067 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7068 7069 def _parse_trim(self) -> exp.Trim: 7070 # https://www.w3resource.com/sql/character-functions/trim.php 7071 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7072 7073 position = None 7074 collation = None 7075 expression = None 7076 7077 if self._match_texts(self.TRIM_TYPES): 7078 position = self._prev.text.upper() 7079 7080 this = self._parse_bitwise() 7081 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7082 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7083 expression = self._parse_bitwise() 7084 7085 if invert_order: 7086 this, expression = expression, this 7087 7088 if self._match(TokenType.COLLATE): 7089 collation = self._parse_bitwise() 7090 7091 return self.expression( 7092 exp.Trim, this=this, position=position, expression=expression, collation=collation 7093 ) 7094 7095 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7096 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7097 7098 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7099 return self._parse_window(self._parse_id_var(), alias=True) 7100 7101 def _parse_respect_or_ignore_nulls( 7102 self, this: t.Optional[exp.Expression] 7103 ) -> t.Optional[exp.Expression]: 7104 if self._match_text_seq("IGNORE", "NULLS"): 7105 return self.expression(exp.IgnoreNulls, this=this) 7106 if self._match_text_seq("RESPECT", "NULLS"): 7107 return self.expression(exp.RespectNulls, this=this) 7108 return this 7109 7110 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7111 if self._match(TokenType.HAVING): 7112 self._match_texts(("MAX", "MIN")) 7113 max = self._prev.text.upper() != "MIN" 7114 return self.expression( 7115 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7116 ) 7117 7118 return this 7119 7120 def _parse_window( 7121 self, this: t.Optional[exp.Expression], alias: bool = False 7122 ) -> t.Optional[exp.Expression]: 7123 func = this 7124 comments = func.comments if isinstance(func, exp.Expression) else None 7125 7126 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7127 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7128 if self._match_text_seq("WITHIN", "GROUP"): 7129 order = self._parse_wrapped(self._parse_order) 7130 this = self.expression(exp.WithinGroup, this=this, expression=order) 7131 7132 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7133 self._match(TokenType.WHERE) 7134 this = self.expression( 7135 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7136 ) 7137 self._match_r_paren() 7138 7139 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7140 # Some dialects choose to implement and some do not. 7141 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7142 7143 # There is some code above in _parse_lambda that handles 7144 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7145 7146 # The below changes handle 7147 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7148 7149 # Oracle allows both formats 7150 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7151 # and Snowflake chose to do the same for familiarity 7152 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7153 if isinstance(this, exp.AggFunc): 7154 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7155 7156 if ignore_respect and ignore_respect is not this: 7157 ignore_respect.replace(ignore_respect.this) 7158 this = self.expression(ignore_respect.__class__, this=this) 7159 7160 this = self._parse_respect_or_ignore_nulls(this) 7161 7162 # bigquery select from window x AS (partition by ...) 7163 if alias: 7164 over = None 7165 self._match(TokenType.ALIAS) 7166 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7167 return this 7168 else: 7169 over = self._prev.text.upper() 7170 7171 if comments and isinstance(func, exp.Expression): 7172 func.pop_comments() 7173 7174 if not self._match(TokenType.L_PAREN): 7175 return self.expression( 7176 exp.Window, 7177 comments=comments, 7178 this=this, 7179 alias=self._parse_id_var(False), 7180 over=over, 7181 ) 7182 7183 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7184 7185 first = self._match(TokenType.FIRST) 7186 if self._match_text_seq("LAST"): 7187 first = False 7188 7189 partition, order = self._parse_partition_and_order() 7190 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7191 7192 if kind: 7193 self._match(TokenType.BETWEEN) 7194 start = self._parse_window_spec() 7195 7196 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7197 exclude = ( 7198 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7199 if self._match_text_seq("EXCLUDE") 7200 else None 7201 ) 7202 7203 spec = self.expression( 7204 exp.WindowSpec, 7205 kind=kind, 7206 start=start["value"], 7207 start_side=start["side"], 7208 end=end.get("value"), 7209 end_side=end.get("side"), 7210 exclude=exclude, 7211 ) 7212 else: 7213 spec = None 7214 7215 self._match_r_paren() 7216 7217 window = self.expression( 7218 exp.Window, 7219 comments=comments, 7220 this=this, 7221 partition_by=partition, 7222 order=order, 7223 spec=spec, 7224 alias=window_alias, 7225 over=over, 7226 first=first, 7227 ) 7228 7229 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7230 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7231 return self._parse_window(window, alias=alias) 7232 7233 return window 7234 7235 def _parse_partition_and_order( 7236 self, 7237 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7238 return self._parse_partition_by(), self._parse_order() 7239 7240 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7241 self._match(TokenType.BETWEEN) 7242 7243 return { 7244 "value": ( 7245 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7246 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7247 or self._parse_type() 7248 ), 7249 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7250 } 7251 7252 def _parse_alias( 7253 self, this: t.Optional[exp.Expression], explicit: bool = False 7254 ) -> t.Optional[exp.Expression]: 7255 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7256 # so this section tries to parse the clause version and if it fails, it treats the token 7257 # as an identifier (alias) 7258 if self._can_parse_limit_or_offset(): 7259 return this 7260 7261 any_token = self._match(TokenType.ALIAS) 7262 comments = self._prev_comments or [] 7263 7264 if explicit and not any_token: 7265 return this 7266 7267 if self._match(TokenType.L_PAREN): 7268 aliases = self.expression( 7269 exp.Aliases, 7270 comments=comments, 7271 this=this, 7272 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7273 ) 7274 self._match_r_paren(aliases) 7275 return aliases 7276 7277 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7278 self.STRING_ALIASES and self._parse_string_as_identifier() 7279 ) 7280 7281 if alias: 7282 comments.extend(alias.pop_comments()) 7283 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7284 column = this.this 7285 7286 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7287 if not this.comments and column and column.comments: 7288 this.comments = column.pop_comments() 7289 7290 return this 7291 7292 def _parse_id_var( 7293 self, 7294 any_token: bool = True, 7295 tokens: t.Optional[t.Collection[TokenType]] = None, 7296 ) -> t.Optional[exp.Expression]: 7297 expression = self._parse_identifier() 7298 if not expression and ( 7299 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7300 ): 7301 quoted = self._prev.token_type == TokenType.STRING 7302 expression = self._identifier_expression(quoted=quoted) 7303 7304 return expression 7305 7306 def _parse_string(self) -> t.Optional[exp.Expression]: 7307 if self._match_set(self.STRING_PARSERS): 7308 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7309 return self._parse_placeholder() 7310 7311 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7312 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7313 if output: 7314 output.update_positions(self._prev) 7315 return output 7316 7317 def _parse_number(self) -> t.Optional[exp.Expression]: 7318 if self._match_set(self.NUMERIC_PARSERS): 7319 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7320 return self._parse_placeholder() 7321 7322 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7323 if self._match(TokenType.IDENTIFIER): 7324 return self._identifier_expression(quoted=True) 7325 return self._parse_placeholder() 7326 7327 def _parse_var( 7328 self, 7329 any_token: bool = False, 7330 tokens: t.Optional[t.Collection[TokenType]] = None, 7331 upper: bool = False, 7332 ) -> t.Optional[exp.Expression]: 7333 if ( 7334 (any_token and self._advance_any()) 7335 or self._match(TokenType.VAR) 7336 or (self._match_set(tokens) if tokens else False) 7337 ): 7338 return self.expression( 7339 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7340 ) 7341 return self._parse_placeholder() 7342 7343 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7344 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7345 self._advance() 7346 return self._prev 7347 return None 7348 7349 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7350 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7351 7352 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7353 return self._parse_primary() or self._parse_var(any_token=True) 7354 7355 def _parse_null(self) -> t.Optional[exp.Expression]: 7356 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7357 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7358 return self._parse_placeholder() 7359 7360 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7361 if self._match(TokenType.TRUE): 7362 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7363 if self._match(TokenType.FALSE): 7364 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7365 return self._parse_placeholder() 7366 7367 def _parse_star(self) -> t.Optional[exp.Expression]: 7368 if self._match(TokenType.STAR): 7369 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7370 return self._parse_placeholder() 7371 7372 def _parse_parameter(self) -> exp.Parameter: 7373 this = self._parse_identifier() or self._parse_primary_or_var() 7374 return self.expression(exp.Parameter, this=this) 7375 7376 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7377 if self._match_set(self.PLACEHOLDER_PARSERS): 7378 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7379 if placeholder: 7380 return placeholder 7381 self._advance(-1) 7382 return None 7383 7384 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7385 if not self._match_texts(keywords): 7386 return None 7387 if self._match(TokenType.L_PAREN, advance=False): 7388 return self._parse_wrapped_csv(self._parse_expression) 7389 7390 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7391 return [expression] if expression else None 7392 7393 def _parse_csv( 7394 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7395 ) -> t.List[exp.Expression]: 7396 parse_result = parse_method() 7397 items = [parse_result] if parse_result is not None else [] 7398 7399 while self._match(sep): 7400 self._add_comments(parse_result) 7401 parse_result = parse_method() 7402 if parse_result is not None: 7403 items.append(parse_result) 7404 7405 return items 7406 7407 def _parse_tokens( 7408 self, parse_method: t.Callable, expressions: t.Dict 7409 ) -> t.Optional[exp.Expression]: 7410 this = parse_method() 7411 7412 while self._match_set(expressions): 7413 this = self.expression( 7414 expressions[self._prev.token_type], 7415 this=this, 7416 comments=self._prev_comments, 7417 expression=parse_method(), 7418 ) 7419 7420 return this 7421 7422 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7423 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7424 7425 def _parse_wrapped_csv( 7426 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7427 ) -> t.List[exp.Expression]: 7428 return self._parse_wrapped( 7429 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7430 ) 7431 7432 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7433 wrapped = self._match(TokenType.L_PAREN) 7434 if not wrapped and not optional: 7435 self.raise_error("Expecting (") 7436 parse_result = parse_method() 7437 if wrapped: 7438 self._match_r_paren() 7439 return parse_result 7440 7441 def _parse_expressions(self) -> t.List[exp.Expression]: 7442 return self._parse_csv(self._parse_expression) 7443 7444 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7445 return ( 7446 self._parse_set_operations( 7447 self._parse_alias(self._parse_assignment(), explicit=True) 7448 if alias 7449 else self._parse_assignment() 7450 ) 7451 or self._parse_select() 7452 ) 7453 7454 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7455 return self._parse_query_modifiers( 7456 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7457 ) 7458 7459 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7460 this = None 7461 if self._match_texts(self.TRANSACTION_KIND): 7462 this = self._prev.text 7463 7464 self._match_texts(("TRANSACTION", "WORK")) 7465 7466 modes = [] 7467 while True: 7468 mode = [] 7469 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7470 mode.append(self._prev.text) 7471 7472 if mode: 7473 modes.append(" ".join(mode)) 7474 if not self._match(TokenType.COMMA): 7475 break 7476 7477 return self.expression(exp.Transaction, this=this, modes=modes) 7478 7479 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7480 chain = None 7481 savepoint = None 7482 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7483 7484 self._match_texts(("TRANSACTION", "WORK")) 7485 7486 if self._match_text_seq("TO"): 7487 self._match_text_seq("SAVEPOINT") 7488 savepoint = self._parse_id_var() 7489 7490 if self._match(TokenType.AND): 7491 chain = not self._match_text_seq("NO") 7492 self._match_text_seq("CHAIN") 7493 7494 if is_rollback: 7495 return self.expression(exp.Rollback, savepoint=savepoint) 7496 7497 return self.expression(exp.Commit, chain=chain) 7498 7499 def _parse_refresh(self) -> exp.Refresh: 7500 self._match(TokenType.TABLE) 7501 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7502 7503 def _parse_column_def_with_exists(self): 7504 start = self._index 7505 self._match(TokenType.COLUMN) 7506 7507 exists_column = self._parse_exists(not_=True) 7508 expression = self._parse_field_def() 7509 7510 if not isinstance(expression, exp.ColumnDef): 7511 self._retreat(start) 7512 return None 7513 7514 expression.set("exists", exists_column) 7515 7516 return expression 7517 7518 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7519 if not self._prev.text.upper() == "ADD": 7520 return None 7521 7522 expression = self._parse_column_def_with_exists() 7523 if not expression: 7524 return None 7525 7526 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7527 if self._match_texts(("FIRST", "AFTER")): 7528 position = self._prev.text 7529 column_position = self.expression( 7530 exp.ColumnPosition, this=self._parse_column(), position=position 7531 ) 7532 expression.set("position", column_position) 7533 7534 return expression 7535 7536 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7537 drop = self._match(TokenType.DROP) and self._parse_drop() 7538 if drop and not isinstance(drop, exp.Command): 7539 drop.set("kind", drop.args.get("kind", "COLUMN")) 7540 return drop 7541 7542 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7543 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7544 return self.expression( 7545 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7546 ) 7547 7548 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7549 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7550 self._match_text_seq("ADD") 7551 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7552 return self.expression( 7553 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7554 ) 7555 7556 column_def = self._parse_add_column() 7557 if isinstance(column_def, exp.ColumnDef): 7558 return column_def 7559 7560 exists = self._parse_exists(not_=True) 7561 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7562 return self.expression( 7563 exp.AddPartition, 7564 exists=exists, 7565 this=self._parse_field(any_token=True), 7566 location=self._match_text_seq("LOCATION", advance=False) 7567 and self._parse_property(), 7568 ) 7569 7570 return None 7571 7572 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7573 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7574 or self._match_text_seq("COLUMNS") 7575 ): 7576 schema = self._parse_schema() 7577 7578 return ( 7579 ensure_list(schema) 7580 if schema 7581 else self._parse_csv(self._parse_column_def_with_exists) 7582 ) 7583 7584 return self._parse_csv(_parse_add_alteration) 7585 7586 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7587 if self._match_texts(self.ALTER_ALTER_PARSERS): 7588 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7589 7590 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7591 # keyword after ALTER we default to parsing this statement 7592 self._match(TokenType.COLUMN) 7593 column = self._parse_field(any_token=True) 7594 7595 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7596 return self.expression(exp.AlterColumn, this=column, drop=True) 7597 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7598 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7599 if self._match(TokenType.COMMENT): 7600 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7601 if self._match_text_seq("DROP", "NOT", "NULL"): 7602 return self.expression( 7603 exp.AlterColumn, 7604 this=column, 7605 drop=True, 7606 allow_null=True, 7607 ) 7608 if self._match_text_seq("SET", "NOT", "NULL"): 7609 return self.expression( 7610 exp.AlterColumn, 7611 this=column, 7612 allow_null=False, 7613 ) 7614 7615 if self._match_text_seq("SET", "VISIBLE"): 7616 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7617 if self._match_text_seq("SET", "INVISIBLE"): 7618 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7619 7620 self._match_text_seq("SET", "DATA") 7621 self._match_text_seq("TYPE") 7622 return self.expression( 7623 exp.AlterColumn, 7624 this=column, 7625 dtype=self._parse_types(), 7626 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7627 using=self._match(TokenType.USING) and self._parse_assignment(), 7628 ) 7629 7630 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7631 if self._match_texts(("ALL", "EVEN", "AUTO")): 7632 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7633 7634 self._match_text_seq("KEY", "DISTKEY") 7635 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7636 7637 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7638 if compound: 7639 self._match_text_seq("SORTKEY") 7640 7641 if self._match(TokenType.L_PAREN, advance=False): 7642 return self.expression( 7643 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7644 ) 7645 7646 self._match_texts(("AUTO", "NONE")) 7647 return self.expression( 7648 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7649 ) 7650 7651 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7652 index = self._index - 1 7653 7654 partition_exists = self._parse_exists() 7655 if self._match(TokenType.PARTITION, advance=False): 7656 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7657 7658 self._retreat(index) 7659 return self._parse_csv(self._parse_drop_column) 7660 7661 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7662 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7663 exists = self._parse_exists() 7664 old_column = self._parse_column() 7665 to = self._match_text_seq("TO") 7666 new_column = self._parse_column() 7667 7668 if old_column is None or to is None or new_column is None: 7669 return None 7670 7671 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7672 7673 self._match_text_seq("TO") 7674 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7675 7676 def _parse_alter_table_set(self) -> exp.AlterSet: 7677 alter_set = self.expression(exp.AlterSet) 7678 7679 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7680 "TABLE", "PROPERTIES" 7681 ): 7682 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7683 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7684 alter_set.set("expressions", [self._parse_assignment()]) 7685 elif self._match_texts(("LOGGED", "UNLOGGED")): 7686 alter_set.set("option", exp.var(self._prev.text.upper())) 7687 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7688 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7689 elif self._match_text_seq("LOCATION"): 7690 alter_set.set("location", self._parse_field()) 7691 elif self._match_text_seq("ACCESS", "METHOD"): 7692 alter_set.set("access_method", self._parse_field()) 7693 elif self._match_text_seq("TABLESPACE"): 7694 alter_set.set("tablespace", self._parse_field()) 7695 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7696 alter_set.set("file_format", [self._parse_field()]) 7697 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7698 alter_set.set("file_format", self._parse_wrapped_options()) 7699 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7700 alter_set.set("copy_options", self._parse_wrapped_options()) 7701 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7702 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7703 else: 7704 if self._match_text_seq("SERDE"): 7705 alter_set.set("serde", self._parse_field()) 7706 7707 properties = self._parse_wrapped(self._parse_properties, optional=True) 7708 alter_set.set("expressions", [properties]) 7709 7710 return alter_set 7711 7712 def _parse_alter_session(self) -> exp.AlterSession: 7713 """Parse ALTER SESSION SET/UNSET statements.""" 7714 if self._match(TokenType.SET): 7715 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7716 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7717 7718 self._match_text_seq("UNSET") 7719 expressions = self._parse_csv( 7720 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7721 ) 7722 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7723 7724 def _parse_alter(self) -> exp.Alter | exp.Command: 7725 start = self._prev 7726 7727 alter_token = self._match_set(self.ALTERABLES) and self._prev 7728 if not alter_token: 7729 return self._parse_as_command(start) 7730 7731 exists = self._parse_exists() 7732 only = self._match_text_seq("ONLY") 7733 7734 if alter_token.token_type == TokenType.SESSION: 7735 this = None 7736 check = None 7737 cluster = None 7738 else: 7739 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 7740 check = self._match_text_seq("WITH", "CHECK") 7741 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7742 7743 if self._next: 7744 self._advance() 7745 7746 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7747 if parser: 7748 actions = ensure_list(parser(self)) 7749 not_valid = self._match_text_seq("NOT", "VALID") 7750 options = self._parse_csv(self._parse_property) 7751 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 7752 7753 if not self._curr and actions: 7754 return self.expression( 7755 exp.Alter, 7756 this=this, 7757 kind=alter_token.text.upper(), 7758 exists=exists, 7759 actions=actions, 7760 only=only, 7761 options=options, 7762 cluster=cluster, 7763 not_valid=not_valid, 7764 check=check, 7765 cascade=cascade, 7766 ) 7767 7768 return self._parse_as_command(start) 7769 7770 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7771 start = self._prev 7772 # https://duckdb.org/docs/sql/statements/analyze 7773 if not self._curr: 7774 return self.expression(exp.Analyze) 7775 7776 options = [] 7777 while self._match_texts(self.ANALYZE_STYLES): 7778 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7779 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7780 else: 7781 options.append(self._prev.text.upper()) 7782 7783 this: t.Optional[exp.Expression] = None 7784 inner_expression: t.Optional[exp.Expression] = None 7785 7786 kind = self._curr and self._curr.text.upper() 7787 7788 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7789 this = self._parse_table_parts() 7790 elif self._match_text_seq("TABLES"): 7791 if self._match_set((TokenType.FROM, TokenType.IN)): 7792 kind = f"{kind} {self._prev.text.upper()}" 7793 this = self._parse_table(schema=True, is_db_reference=True) 7794 elif self._match_text_seq("DATABASE"): 7795 this = self._parse_table(schema=True, is_db_reference=True) 7796 elif self._match_text_seq("CLUSTER"): 7797 this = self._parse_table() 7798 # Try matching inner expr keywords before fallback to parse table. 7799 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7800 kind = None 7801 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7802 else: 7803 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7804 kind = None 7805 this = self._parse_table_parts() 7806 7807 partition = self._try_parse(self._parse_partition) 7808 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7809 return self._parse_as_command(start) 7810 7811 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7812 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7813 "WITH", "ASYNC", "MODE" 7814 ): 7815 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7816 else: 7817 mode = None 7818 7819 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7820 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7821 7822 properties = self._parse_properties() 7823 return self.expression( 7824 exp.Analyze, 7825 kind=kind, 7826 this=this, 7827 mode=mode, 7828 partition=partition, 7829 properties=properties, 7830 expression=inner_expression, 7831 options=options, 7832 ) 7833 7834 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7835 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7836 this = None 7837 kind = self._prev.text.upper() 7838 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7839 expressions = [] 7840 7841 if not self._match_text_seq("STATISTICS"): 7842 self.raise_error("Expecting token STATISTICS") 7843 7844 if self._match_text_seq("NOSCAN"): 7845 this = "NOSCAN" 7846 elif self._match(TokenType.FOR): 7847 if self._match_text_seq("ALL", "COLUMNS"): 7848 this = "FOR ALL COLUMNS" 7849 if self._match_texts("COLUMNS"): 7850 this = "FOR COLUMNS" 7851 expressions = self._parse_csv(self._parse_column_reference) 7852 elif self._match_text_seq("SAMPLE"): 7853 sample = self._parse_number() 7854 expressions = [ 7855 self.expression( 7856 exp.AnalyzeSample, 7857 sample=sample, 7858 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7859 ) 7860 ] 7861 7862 return self.expression( 7863 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7864 ) 7865 7866 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7867 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7868 kind = None 7869 this = None 7870 expression: t.Optional[exp.Expression] = None 7871 if self._match_text_seq("REF", "UPDATE"): 7872 kind = "REF" 7873 this = "UPDATE" 7874 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7875 this = "UPDATE SET DANGLING TO NULL" 7876 elif self._match_text_seq("STRUCTURE"): 7877 kind = "STRUCTURE" 7878 if self._match_text_seq("CASCADE", "FAST"): 7879 this = "CASCADE FAST" 7880 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7881 ("ONLINE", "OFFLINE") 7882 ): 7883 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7884 expression = self._parse_into() 7885 7886 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7887 7888 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7889 this = self._prev.text.upper() 7890 if self._match_text_seq("COLUMNS"): 7891 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7892 return None 7893 7894 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7895 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7896 if self._match_text_seq("STATISTICS"): 7897 return self.expression(exp.AnalyzeDelete, kind=kind) 7898 return None 7899 7900 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7901 if self._match_text_seq("CHAINED", "ROWS"): 7902 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7903 return None 7904 7905 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7906 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7907 this = self._prev.text.upper() 7908 expression: t.Optional[exp.Expression] = None 7909 expressions = [] 7910 update_options = None 7911 7912 if self._match_text_seq("HISTOGRAM", "ON"): 7913 expressions = self._parse_csv(self._parse_column_reference) 7914 with_expressions = [] 7915 while self._match(TokenType.WITH): 7916 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7917 if self._match_texts(("SYNC", "ASYNC")): 7918 if self._match_text_seq("MODE", advance=False): 7919 with_expressions.append(f"{self._prev.text.upper()} MODE") 7920 self._advance() 7921 else: 7922 buckets = self._parse_number() 7923 if self._match_text_seq("BUCKETS"): 7924 with_expressions.append(f"{buckets} BUCKETS") 7925 if with_expressions: 7926 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7927 7928 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7929 TokenType.UPDATE, advance=False 7930 ): 7931 update_options = self._prev.text.upper() 7932 self._advance() 7933 elif self._match_text_seq("USING", "DATA"): 7934 expression = self.expression(exp.UsingData, this=self._parse_string()) 7935 7936 return self.expression( 7937 exp.AnalyzeHistogram, 7938 this=this, 7939 expressions=expressions, 7940 expression=expression, 7941 update_options=update_options, 7942 ) 7943 7944 def _parse_merge(self) -> exp.Merge: 7945 self._match(TokenType.INTO) 7946 target = self._parse_table() 7947 7948 if target and self._match(TokenType.ALIAS, advance=False): 7949 target.set("alias", self._parse_table_alias()) 7950 7951 self._match(TokenType.USING) 7952 using = self._parse_table() 7953 7954 self._match(TokenType.ON) 7955 on = self._parse_assignment() 7956 7957 return self.expression( 7958 exp.Merge, 7959 this=target, 7960 using=using, 7961 on=on, 7962 whens=self._parse_when_matched(), 7963 returning=self._parse_returning(), 7964 ) 7965 7966 def _parse_when_matched(self) -> exp.Whens: 7967 whens = [] 7968 7969 while self._match(TokenType.WHEN): 7970 matched = not self._match(TokenType.NOT) 7971 self._match_text_seq("MATCHED") 7972 source = ( 7973 False 7974 if self._match_text_seq("BY", "TARGET") 7975 else self._match_text_seq("BY", "SOURCE") 7976 ) 7977 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7978 7979 self._match(TokenType.THEN) 7980 7981 if self._match(TokenType.INSERT): 7982 this = self._parse_star() 7983 if this: 7984 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7985 else: 7986 then = self.expression( 7987 exp.Insert, 7988 this=exp.var("ROW") 7989 if self._match_text_seq("ROW") 7990 else self._parse_value(values=False), 7991 expression=self._match_text_seq("VALUES") and self._parse_value(), 7992 ) 7993 elif self._match(TokenType.UPDATE): 7994 expressions = self._parse_star() 7995 if expressions: 7996 then = self.expression(exp.Update, expressions=expressions) 7997 else: 7998 then = self.expression( 7999 exp.Update, 8000 expressions=self._match(TokenType.SET) 8001 and self._parse_csv(self._parse_equality), 8002 ) 8003 elif self._match(TokenType.DELETE): 8004 then = self.expression(exp.Var, this=self._prev.text) 8005 else: 8006 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8007 8008 whens.append( 8009 self.expression( 8010 exp.When, 8011 matched=matched, 8012 source=source, 8013 condition=condition, 8014 then=then, 8015 ) 8016 ) 8017 return self.expression(exp.Whens, expressions=whens) 8018 8019 def _parse_show(self) -> t.Optional[exp.Expression]: 8020 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8021 if parser: 8022 return parser(self) 8023 return self._parse_as_command(self._prev) 8024 8025 def _parse_set_item_assignment( 8026 self, kind: t.Optional[str] = None 8027 ) -> t.Optional[exp.Expression]: 8028 index = self._index 8029 8030 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8031 return self._parse_set_transaction(global_=kind == "GLOBAL") 8032 8033 left = self._parse_primary() or self._parse_column() 8034 assignment_delimiter = self._match_texts(("=", "TO")) 8035 8036 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8037 self._retreat(index) 8038 return None 8039 8040 right = self._parse_statement() or self._parse_id_var() 8041 if isinstance(right, (exp.Column, exp.Identifier)): 8042 right = exp.var(right.name) 8043 8044 this = self.expression(exp.EQ, this=left, expression=right) 8045 return self.expression(exp.SetItem, this=this, kind=kind) 8046 8047 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8048 self._match_text_seq("TRANSACTION") 8049 characteristics = self._parse_csv( 8050 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8051 ) 8052 return self.expression( 8053 exp.SetItem, 8054 expressions=characteristics, 8055 kind="TRANSACTION", 8056 **{"global": global_}, # type: ignore 8057 ) 8058 8059 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8060 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8061 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8062 8063 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8064 index = self._index 8065 set_ = self.expression( 8066 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8067 ) 8068 8069 if self._curr: 8070 self._retreat(index) 8071 return self._parse_as_command(self._prev) 8072 8073 return set_ 8074 8075 def _parse_var_from_options( 8076 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8077 ) -> t.Optional[exp.Var]: 8078 start = self._curr 8079 if not start: 8080 return None 8081 8082 option = start.text.upper() 8083 continuations = options.get(option) 8084 8085 index = self._index 8086 self._advance() 8087 for keywords in continuations or []: 8088 if isinstance(keywords, str): 8089 keywords = (keywords,) 8090 8091 if self._match_text_seq(*keywords): 8092 option = f"{option} {' '.join(keywords)}" 8093 break 8094 else: 8095 if continuations or continuations is None: 8096 if raise_unmatched: 8097 self.raise_error(f"Unknown option {option}") 8098 8099 self._retreat(index) 8100 return None 8101 8102 return exp.var(option) 8103 8104 def _parse_as_command(self, start: Token) -> exp.Command: 8105 while self._curr: 8106 self._advance() 8107 text = self._find_sql(start, self._prev) 8108 size = len(start.text) 8109 self._warn_unsupported() 8110 return exp.Command(this=text[:size], expression=text[size:]) 8111 8112 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8113 settings = [] 8114 8115 self._match_l_paren() 8116 kind = self._parse_id_var() 8117 8118 if self._match(TokenType.L_PAREN): 8119 while True: 8120 key = self._parse_id_var() 8121 value = self._parse_primary() 8122 if not key and value is None: 8123 break 8124 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8125 self._match(TokenType.R_PAREN) 8126 8127 self._match_r_paren() 8128 8129 return self.expression( 8130 exp.DictProperty, 8131 this=this, 8132 kind=kind.this if kind else None, 8133 settings=settings, 8134 ) 8135 8136 def _parse_dict_range(self, this: str) -> exp.DictRange: 8137 self._match_l_paren() 8138 has_min = self._match_text_seq("MIN") 8139 if has_min: 8140 min = self._parse_var() or self._parse_primary() 8141 self._match_text_seq("MAX") 8142 max = self._parse_var() or self._parse_primary() 8143 else: 8144 max = self._parse_var() or self._parse_primary() 8145 min = exp.Literal.number(0) 8146 self._match_r_paren() 8147 return self.expression(exp.DictRange, this=this, min=min, max=max) 8148 8149 def _parse_comprehension( 8150 self, this: t.Optional[exp.Expression] 8151 ) -> t.Optional[exp.Comprehension]: 8152 index = self._index 8153 expression = self._parse_column() 8154 if not self._match(TokenType.IN): 8155 self._retreat(index - 1) 8156 return None 8157 iterator = self._parse_column() 8158 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8159 return self.expression( 8160 exp.Comprehension, 8161 this=this, 8162 expression=expression, 8163 iterator=iterator, 8164 condition=condition, 8165 ) 8166 8167 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8168 if self._match(TokenType.HEREDOC_STRING): 8169 return self.expression(exp.Heredoc, this=self._prev.text) 8170 8171 if not self._match_text_seq("$"): 8172 return None 8173 8174 tags = ["$"] 8175 tag_text = None 8176 8177 if self._is_connected(): 8178 self._advance() 8179 tags.append(self._prev.text.upper()) 8180 else: 8181 self.raise_error("No closing $ found") 8182 8183 if tags[-1] != "$": 8184 if self._is_connected() and self._match_text_seq("$"): 8185 tag_text = tags[-1] 8186 tags.append("$") 8187 else: 8188 self.raise_error("No closing $ found") 8189 8190 heredoc_start = self._curr 8191 8192 while self._curr: 8193 if self._match_text_seq(*tags, advance=False): 8194 this = self._find_sql(heredoc_start, self._prev) 8195 self._advance(len(tags)) 8196 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8197 8198 self._advance() 8199 8200 self.raise_error(f"No closing {''.join(tags)} found") 8201 return None 8202 8203 def _find_parser( 8204 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8205 ) -> t.Optional[t.Callable]: 8206 if not self._curr: 8207 return None 8208 8209 index = self._index 8210 this = [] 8211 while True: 8212 # The current token might be multiple words 8213 curr = self._curr.text.upper() 8214 key = curr.split(" ") 8215 this.append(curr) 8216 8217 self._advance() 8218 result, trie = in_trie(trie, key) 8219 if result == TrieResult.FAILED: 8220 break 8221 8222 if result == TrieResult.EXISTS: 8223 subparser = parsers[" ".join(this)] 8224 return subparser 8225 8226 self._retreat(index) 8227 return None 8228 8229 def _match(self, token_type, advance=True, expression=None): 8230 if not self._curr: 8231 return None 8232 8233 if self._curr.token_type == token_type: 8234 if advance: 8235 self._advance() 8236 self._add_comments(expression) 8237 return True 8238 8239 return None 8240 8241 def _match_set(self, types, advance=True): 8242 if not self._curr: 8243 return None 8244 8245 if self._curr.token_type in types: 8246 if advance: 8247 self._advance() 8248 return True 8249 8250 return None 8251 8252 def _match_pair(self, token_type_a, token_type_b, advance=True): 8253 if not self._curr or not self._next: 8254 return None 8255 8256 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8257 if advance: 8258 self._advance(2) 8259 return True 8260 8261 return None 8262 8263 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8264 if not self._match(TokenType.L_PAREN, expression=expression): 8265 self.raise_error("Expecting (") 8266 8267 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8268 if not self._match(TokenType.R_PAREN, expression=expression): 8269 self.raise_error("Expecting )") 8270 8271 def _match_texts(self, texts, advance=True): 8272 if ( 8273 self._curr 8274 and self._curr.token_type != TokenType.STRING 8275 and self._curr.text.upper() in texts 8276 ): 8277 if advance: 8278 self._advance() 8279 return True 8280 return None 8281 8282 def _match_text_seq(self, *texts, advance=True): 8283 index = self._index 8284 for text in texts: 8285 if ( 8286 self._curr 8287 and self._curr.token_type != TokenType.STRING 8288 and self._curr.text.upper() == text 8289 ): 8290 self._advance() 8291 else: 8292 self._retreat(index) 8293 return None 8294 8295 if not advance: 8296 self._retreat(index) 8297 8298 return True 8299 8300 def _replace_lambda( 8301 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8302 ) -> t.Optional[exp.Expression]: 8303 if not node: 8304 return node 8305 8306 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8307 8308 for column in node.find_all(exp.Column): 8309 typ = lambda_types.get(column.parts[0].name) 8310 if typ is not None: 8311 dot_or_id = column.to_dot() if column.table else column.this 8312 8313 if typ: 8314 dot_or_id = self.expression( 8315 exp.Cast, 8316 this=dot_or_id, 8317 to=typ, 8318 ) 8319 8320 parent = column.parent 8321 8322 while isinstance(parent, exp.Dot): 8323 if not isinstance(parent.parent, exp.Dot): 8324 parent.replace(dot_or_id) 8325 break 8326 parent = parent.parent 8327 else: 8328 if column is node: 8329 node = dot_or_id 8330 else: 8331 column.replace(dot_or_id) 8332 return node 8333 8334 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8335 start = self._prev 8336 8337 # Not to be confused with TRUNCATE(number, decimals) function call 8338 if self._match(TokenType.L_PAREN): 8339 self._retreat(self._index - 2) 8340 return self._parse_function() 8341 8342 # Clickhouse supports TRUNCATE DATABASE as well 8343 is_database = self._match(TokenType.DATABASE) 8344 8345 self._match(TokenType.TABLE) 8346 8347 exists = self._parse_exists(not_=False) 8348 8349 expressions = self._parse_csv( 8350 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8351 ) 8352 8353 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8354 8355 if self._match_text_seq("RESTART", "IDENTITY"): 8356 identity = "RESTART" 8357 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8358 identity = "CONTINUE" 8359 else: 8360 identity = None 8361 8362 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8363 option = self._prev.text 8364 else: 8365 option = None 8366 8367 partition = self._parse_partition() 8368 8369 # Fallback case 8370 if self._curr: 8371 return self._parse_as_command(start) 8372 8373 return self.expression( 8374 exp.TruncateTable, 8375 expressions=expressions, 8376 is_database=is_database, 8377 exists=exists, 8378 cluster=cluster, 8379 identity=identity, 8380 option=option, 8381 partition=partition, 8382 ) 8383 8384 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8385 this = self._parse_ordered(self._parse_opclass) 8386 8387 if not self._match(TokenType.WITH): 8388 return this 8389 8390 op = self._parse_var(any_token=True) 8391 8392 return self.expression(exp.WithOperator, this=this, op=op) 8393 8394 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8395 self._match(TokenType.EQ) 8396 self._match(TokenType.L_PAREN) 8397 8398 opts: t.List[t.Optional[exp.Expression]] = [] 8399 option: exp.Expression | None 8400 while self._curr and not self._match(TokenType.R_PAREN): 8401 if self._match_text_seq("FORMAT_NAME", "="): 8402 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8403 option = self._parse_format_name() 8404 else: 8405 option = self._parse_property() 8406 8407 if option is None: 8408 self.raise_error("Unable to parse option") 8409 break 8410 8411 opts.append(option) 8412 8413 return opts 8414 8415 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8416 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8417 8418 options = [] 8419 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8420 option = self._parse_var(any_token=True) 8421 prev = self._prev.text.upper() 8422 8423 # Different dialects might separate options and values by white space, "=" and "AS" 8424 self._match(TokenType.EQ) 8425 self._match(TokenType.ALIAS) 8426 8427 param = self.expression(exp.CopyParameter, this=option) 8428 8429 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8430 TokenType.L_PAREN, advance=False 8431 ): 8432 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8433 param.set("expressions", self._parse_wrapped_options()) 8434 elif prev == "FILE_FORMAT": 8435 # T-SQL's external file format case 8436 param.set("expression", self._parse_field()) 8437 else: 8438 param.set("expression", self._parse_unquoted_field()) 8439 8440 options.append(param) 8441 self._match(sep) 8442 8443 return options 8444 8445 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8446 expr = self.expression(exp.Credentials) 8447 8448 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8449 expr.set("storage", self._parse_field()) 8450 if self._match_text_seq("CREDENTIALS"): 8451 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8452 creds = ( 8453 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8454 ) 8455 expr.set("credentials", creds) 8456 if self._match_text_seq("ENCRYPTION"): 8457 expr.set("encryption", self._parse_wrapped_options()) 8458 if self._match_text_seq("IAM_ROLE"): 8459 expr.set("iam_role", self._parse_field()) 8460 if self._match_text_seq("REGION"): 8461 expr.set("region", self._parse_field()) 8462 8463 return expr 8464 8465 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8466 return self._parse_field() 8467 8468 def _parse_copy(self) -> exp.Copy | exp.Command: 8469 start = self._prev 8470 8471 self._match(TokenType.INTO) 8472 8473 this = ( 8474 self._parse_select(nested=True, parse_subquery_alias=False) 8475 if self._match(TokenType.L_PAREN, advance=False) 8476 else self._parse_table(schema=True) 8477 ) 8478 8479 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8480 8481 files = self._parse_csv(self._parse_file_location) 8482 if self._match(TokenType.EQ, advance=False): 8483 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8484 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8485 # list via `_parse_wrapped(..)` below. 8486 self._advance(-1) 8487 files = [] 8488 8489 credentials = self._parse_credentials() 8490 8491 self._match_text_seq("WITH") 8492 8493 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8494 8495 # Fallback case 8496 if self._curr: 8497 return self._parse_as_command(start) 8498 8499 return self.expression( 8500 exp.Copy, 8501 this=this, 8502 kind=kind, 8503 credentials=credentials, 8504 files=files, 8505 params=params, 8506 ) 8507 8508 def _parse_normalize(self) -> exp.Normalize: 8509 return self.expression( 8510 exp.Normalize, 8511 this=self._parse_bitwise(), 8512 form=self._match(TokenType.COMMA) and self._parse_var(), 8513 ) 8514 8515 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8516 args = self._parse_csv(lambda: self._parse_lambda()) 8517 8518 this = seq_get(args, 0) 8519 decimals = seq_get(args, 1) 8520 8521 return expr_type( 8522 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8523 ) 8524 8525 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8526 star_token = self._prev 8527 8528 if self._match_text_seq("COLUMNS", "(", advance=False): 8529 this = self._parse_function() 8530 if isinstance(this, exp.Columns): 8531 this.set("unpack", True) 8532 return this 8533 8534 return self.expression( 8535 exp.Star, 8536 **{ # type: ignore 8537 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8538 "replace": self._parse_star_op("REPLACE"), 8539 "rename": self._parse_star_op("RENAME"), 8540 }, 8541 ).update_positions(star_token) 8542 8543 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8544 privilege_parts = [] 8545 8546 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8547 # (end of privilege list) or L_PAREN (start of column list) are met 8548 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8549 privilege_parts.append(self._curr.text.upper()) 8550 self._advance() 8551 8552 this = exp.var(" ".join(privilege_parts)) 8553 expressions = ( 8554 self._parse_wrapped_csv(self._parse_column) 8555 if self._match(TokenType.L_PAREN, advance=False) 8556 else None 8557 ) 8558 8559 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8560 8561 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8562 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8563 principal = self._parse_id_var() 8564 8565 if not principal: 8566 return None 8567 8568 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8569 8570 def _parse_grant_revoke_common( 8571 self, 8572 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8573 privileges = self._parse_csv(self._parse_grant_privilege) 8574 8575 self._match(TokenType.ON) 8576 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8577 8578 # Attempt to parse the securable e.g. MySQL allows names 8579 # such as "foo.*", "*.*" which are not easily parseable yet 8580 securable = self._try_parse(self._parse_table_parts) 8581 8582 return privileges, kind, securable 8583 8584 def _parse_grant(self) -> exp.Grant | exp.Command: 8585 start = self._prev 8586 8587 privileges, kind, securable = self._parse_grant_revoke_common() 8588 8589 if not securable or not self._match_text_seq("TO"): 8590 return self._parse_as_command(start) 8591 8592 principals = self._parse_csv(self._parse_grant_principal) 8593 8594 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8595 8596 if self._curr: 8597 return self._parse_as_command(start) 8598 8599 return self.expression( 8600 exp.Grant, 8601 privileges=privileges, 8602 kind=kind, 8603 securable=securable, 8604 principals=principals, 8605 grant_option=grant_option, 8606 ) 8607 8608 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8609 start = self._prev 8610 8611 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8612 8613 privileges, kind, securable = self._parse_grant_revoke_common() 8614 8615 if not securable or not self._match_text_seq("FROM"): 8616 return self._parse_as_command(start) 8617 8618 principals = self._parse_csv(self._parse_grant_principal) 8619 8620 cascade = None 8621 if self._match_texts(("CASCADE", "RESTRICT")): 8622 cascade = self._prev.text.upper() 8623 8624 if self._curr: 8625 return self._parse_as_command(start) 8626 8627 return self.expression( 8628 exp.Revoke, 8629 privileges=privileges, 8630 kind=kind, 8631 securable=securable, 8632 principals=principals, 8633 grant_option=grant_option, 8634 cascade=cascade, 8635 ) 8636 8637 def _parse_overlay(self) -> exp.Overlay: 8638 return self.expression( 8639 exp.Overlay, 8640 **{ # type: ignore 8641 "this": self._parse_bitwise(), 8642 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8643 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8644 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8645 }, 8646 ) 8647 8648 def _parse_format_name(self) -> exp.Property: 8649 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8650 # for FILE_FORMAT = <format_name> 8651 return self.expression( 8652 exp.Property, 8653 this=exp.var("FORMAT_NAME"), 8654 value=self._parse_string() or self._parse_table_parts(), 8655 ) 8656 8657 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8658 args: t.List[exp.Expression] = [] 8659 8660 if self._match(TokenType.DISTINCT): 8661 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8662 self._match(TokenType.COMMA) 8663 8664 args.extend(self._parse_function_args()) 8665 8666 return self.expression( 8667 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8668 ) 8669 8670 def _identifier_expression( 8671 self, token: t.Optional[Token] = None, **kwargs: t.Any 8672 ) -> exp.Identifier: 8673 token = token or self._prev 8674 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8675 expression.update_positions(token) 8676 return expression 8677 8678 def _build_pipe_cte( 8679 self, 8680 query: exp.Query, 8681 expressions: t.List[exp.Expression], 8682 alias_cte: t.Optional[exp.TableAlias] = None, 8683 ) -> exp.Select: 8684 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8685 if alias_cte: 8686 new_cte = alias_cte 8687 else: 8688 self._pipe_cte_counter += 1 8689 new_cte = f"__tmp{self._pipe_cte_counter}" 8690 8691 with_ = query.args.get("with") 8692 ctes = with_.pop() if with_ else None 8693 8694 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8695 if ctes: 8696 new_select.set("with", ctes) 8697 8698 return new_select.with_(new_cte, as_=query, copy=False) 8699 8700 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8701 select = self._parse_select(consume_pipe=False) 8702 if not select: 8703 return query 8704 8705 return self._build_pipe_cte( 8706 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8707 ) 8708 8709 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8710 limit = self._parse_limit() 8711 offset = self._parse_offset() 8712 if limit: 8713 curr_limit = query.args.get("limit", limit) 8714 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8715 query.limit(limit, copy=False) 8716 if offset: 8717 curr_offset = query.args.get("offset") 8718 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8719 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8720 8721 return query 8722 8723 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8724 this = self._parse_assignment() 8725 if self._match_text_seq("GROUP", "AND", advance=False): 8726 return this 8727 8728 this = self._parse_alias(this) 8729 8730 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8731 return self._parse_ordered(lambda: this) 8732 8733 return this 8734 8735 def _parse_pipe_syntax_aggregate_group_order_by( 8736 self, query: exp.Select, group_by_exists: bool = True 8737 ) -> exp.Select: 8738 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8739 aggregates_or_groups, orders = [], [] 8740 for element in expr: 8741 if isinstance(element, exp.Ordered): 8742 this = element.this 8743 if isinstance(this, exp.Alias): 8744 element.set("this", this.args["alias"]) 8745 orders.append(element) 8746 else: 8747 this = element 8748 aggregates_or_groups.append(this) 8749 8750 if group_by_exists: 8751 query.select(*aggregates_or_groups, copy=False).group_by( 8752 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8753 copy=False, 8754 ) 8755 else: 8756 query.select(*aggregates_or_groups, append=False, copy=False) 8757 8758 if orders: 8759 return query.order_by(*orders, append=False, copy=False) 8760 8761 return query 8762 8763 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8764 self._match_text_seq("AGGREGATE") 8765 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8766 8767 if self._match(TokenType.GROUP_BY) or ( 8768 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8769 ): 8770 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8771 8772 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8773 8774 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8775 first_setop = self.parse_set_operation(this=query) 8776 if not first_setop: 8777 return None 8778 8779 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8780 expr = self._parse_paren() 8781 return expr.assert_is(exp.Subquery).unnest() if expr else None 8782 8783 first_setop.this.pop() 8784 8785 setops = [ 8786 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8787 *self._parse_csv(_parse_and_unwrap_query), 8788 ] 8789 8790 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8791 with_ = query.args.get("with") 8792 ctes = with_.pop() if with_ else None 8793 8794 if isinstance(first_setop, exp.Union): 8795 query = query.union(*setops, copy=False, **first_setop.args) 8796 elif isinstance(first_setop, exp.Except): 8797 query = query.except_(*setops, copy=False, **first_setop.args) 8798 else: 8799 query = query.intersect(*setops, copy=False, **first_setop.args) 8800 8801 query.set("with", ctes) 8802 8803 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8804 8805 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8806 join = self._parse_join() 8807 if not join: 8808 return None 8809 8810 if isinstance(query, exp.Select): 8811 return query.join(join, copy=False) 8812 8813 return query 8814 8815 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8816 pivots = self._parse_pivots() 8817 if not pivots: 8818 return query 8819 8820 from_ = query.args.get("from") 8821 if from_: 8822 from_.this.set("pivots", pivots) 8823 8824 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8825 8826 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8827 self._match_text_seq("EXTEND") 8828 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8829 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8830 8831 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8832 sample = self._parse_table_sample() 8833 8834 with_ = query.args.get("with") 8835 if with_: 8836 with_.expressions[-1].this.set("sample", sample) 8837 else: 8838 query.set("sample", sample) 8839 8840 return query 8841 8842 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8843 if isinstance(query, exp.Subquery): 8844 query = exp.select("*").from_(query, copy=False) 8845 8846 if not query.args.get("from"): 8847 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8848 8849 while self._match(TokenType.PIPE_GT): 8850 start = self._curr 8851 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8852 if not parser: 8853 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8854 # keywords, making it tricky to disambiguate them without lookahead. The approach 8855 # here is to try and parse a set operation and if that fails, then try to parse a 8856 # join operator. If that fails as well, then the operator is not supported. 8857 parsed_query = self._parse_pipe_syntax_set_operator(query) 8858 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8859 if not parsed_query: 8860 self._retreat(start) 8861 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8862 break 8863 query = parsed_query 8864 else: 8865 query = parser(self, query) 8866 8867 return query 8868 8869 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8870 vars = self._parse_csv(self._parse_id_var) 8871 if not vars: 8872 return None 8873 8874 return self.expression( 8875 exp.DeclareItem, 8876 this=vars, 8877 kind=self._parse_types(), 8878 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8879 ) 8880 8881 def _parse_declare(self) -> exp.Declare | exp.Command: 8882 start = self._prev 8883 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8884 8885 if not expressions or self._curr: 8886 return self._parse_as_command(start) 8887 8888 return self.expression(exp.Declare, expressions=expressions) 8889 8890 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8891 exp_class = exp.Cast if strict else exp.TryCast 8892 8893 if exp_class == exp.TryCast: 8894 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8895 8896 return self.expression(exp_class, **kwargs) 8897 8898 def _parse_json_value(self) -> exp.JSONValue: 8899 this = self._parse_bitwise() 8900 self._match(TokenType.COMMA) 8901 path = self._parse_bitwise() 8902 8903 returning = self._match(TokenType.RETURNING) and self._parse_type() 8904 8905 return self.expression( 8906 exp.JSONValue, 8907 this=this, 8908 path=self.dialect.to_json_path(path), 8909 returning=returning, 8910 on_condition=self._parse_on_condition(), 8911 ) 8912 8913 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8914 def concat_exprs( 8915 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8916 ) -> exp.Expression: 8917 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8918 concat_exprs = [ 8919 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8920 ] 8921 node.set("expressions", concat_exprs) 8922 return node 8923 if len(exprs) == 1: 8924 return exprs[0] 8925 return self.expression(exp.Concat, expressions=args, safe=True) 8926 8927 args = self._parse_csv(self._parse_lambda) 8928 8929 if args: 8930 order = args[-1] if isinstance(args[-1], exp.Order) else None 8931 8932 if order: 8933 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8934 # remove 'expr' from exp.Order and add it back to args 8935 args[-1] = order.this 8936 order.set("this", concat_exprs(order.this, args)) 8937 8938 this = order or concat_exprs(args[0], args) 8939 else: 8940 this = None 8941 8942 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8943 8944 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 814 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 815 exp.Group: lambda self: self._parse_group(), 816 exp.Having: lambda self: self._parse_having(), 817 exp.Hint: lambda self: self._parse_hint_body(), 818 exp.Identifier: lambda self: self._parse_id_var(), 819 exp.Join: lambda self: self._parse_join(), 820 exp.Lambda: lambda self: self._parse_lambda(), 821 exp.Lateral: lambda self: self._parse_lateral(), 822 exp.Limit: lambda self: self._parse_limit(), 823 exp.Offset: lambda self: self._parse_offset(), 824 exp.Order: lambda self: self._parse_order(), 825 exp.Ordered: lambda self: self._parse_ordered(), 826 exp.Properties: lambda self: self._parse_properties(), 827 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 828 exp.Qualify: lambda self: self._parse_qualify(), 829 exp.Returning: lambda self: self._parse_returning(), 830 exp.Select: lambda self: self._parse_select(), 831 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 832 exp.Table: lambda self: self._parse_table_parts(), 833 exp.TableAlias: lambda self: self._parse_table_alias(), 834 exp.Tuple: lambda self: self._parse_value(values=False), 835 exp.Whens: lambda self: self._parse_when_matched(), 836 exp.Where: lambda self: self._parse_where(), 837 exp.Window: lambda self: self._parse_named_window(), 838 exp.With: lambda self: self._parse_with(), 839 "JOIN_TYPE": lambda self: self._parse_join_parts(), 840 } 841 842 STATEMENT_PARSERS = { 843 TokenType.ALTER: lambda self: self._parse_alter(), 844 TokenType.ANALYZE: lambda self: self._parse_analyze(), 845 TokenType.BEGIN: lambda self: self._parse_transaction(), 846 TokenType.CACHE: lambda self: self._parse_cache(), 847 TokenType.COMMENT: lambda self: self._parse_comment(), 848 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 849 TokenType.COPY: lambda self: self._parse_copy(), 850 TokenType.CREATE: lambda self: self._parse_create(), 851 TokenType.DELETE: lambda self: self._parse_delete(), 852 TokenType.DESC: lambda self: self._parse_describe(), 853 TokenType.DESCRIBE: lambda self: self._parse_describe(), 854 TokenType.DROP: lambda self: self._parse_drop(), 855 TokenType.GRANT: lambda self: self._parse_grant(), 856 TokenType.REVOKE: lambda self: self._parse_revoke(), 857 TokenType.INSERT: lambda self: self._parse_insert(), 858 TokenType.KILL: lambda self: self._parse_kill(), 859 TokenType.LOAD: lambda self: self._parse_load(), 860 TokenType.MERGE: lambda self: self._parse_merge(), 861 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 862 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 863 TokenType.REFRESH: lambda self: self._parse_refresh(), 864 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 865 TokenType.SET: lambda self: self._parse_set(), 866 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 867 TokenType.UNCACHE: lambda self: self._parse_uncache(), 868 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 869 TokenType.UPDATE: lambda self: self._parse_update(), 870 TokenType.USE: lambda self: self._parse_use(), 871 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 872 } 873 874 UNARY_PARSERS = { 875 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 876 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 877 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 878 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 879 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 880 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 881 } 882 883 STRING_PARSERS = { 884 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 885 exp.RawString, this=token.text 886 ), 887 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 888 exp.National, this=token.text 889 ), 890 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 891 TokenType.STRING: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=True 893 ), 894 TokenType.UNICODE_STRING: lambda self, token: self.expression( 895 exp.UnicodeString, 896 this=token.text, 897 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 898 ), 899 } 900 901 NUMERIC_PARSERS = { 902 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 903 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 904 TokenType.HEX_STRING: lambda self, token: self.expression( 905 exp.HexString, 906 this=token.text, 907 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 908 ), 909 TokenType.NUMBER: lambda self, token: self.expression( 910 exp.Literal, this=token.text, is_string=False 911 ), 912 } 913 914 PRIMARY_PARSERS = { 915 **STRING_PARSERS, 916 **NUMERIC_PARSERS, 917 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 918 TokenType.NULL: lambda self, _: self.expression(exp.Null), 919 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 920 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 921 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 922 TokenType.STAR: lambda self, _: self._parse_star_ops(), 923 } 924 925 PLACEHOLDER_PARSERS = { 926 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 927 TokenType.PARAMETER: lambda self: self._parse_parameter(), 928 TokenType.COLON: lambda self: ( 929 self.expression(exp.Placeholder, this=self._prev.text) 930 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 931 else None 932 ), 933 } 934 935 RANGE_PARSERS = { 936 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 937 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 938 TokenType.GLOB: binary_range_parser(exp.Glob), 939 TokenType.ILIKE: binary_range_parser(exp.ILike), 940 TokenType.IN: lambda self, this: self._parse_in(this), 941 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 942 TokenType.IS: lambda self, this: self._parse_is(this), 943 TokenType.LIKE: binary_range_parser(exp.Like), 944 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 945 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 946 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 947 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 948 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 949 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 950 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 951 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 952 } 953 954 PIPE_SYNTAX_TRANSFORM_PARSERS = { 955 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 956 "AS": lambda self, query: self._build_pipe_cte( 957 query, [exp.Star()], self._parse_table_alias() 958 ), 959 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 960 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 961 "ORDER BY": lambda self, query: query.order_by( 962 self._parse_order(), append=False, copy=False 963 ), 964 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 966 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 967 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 968 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 969 } 970 971 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 972 "ALLOWED_VALUES": lambda self: self.expression( 973 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 974 ), 975 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 976 "AUTO": lambda self: self._parse_auto_property(), 977 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 978 "BACKUP": lambda self: self.expression( 979 exp.BackupProperty, this=self._parse_var(any_token=True) 980 ), 981 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 982 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 983 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 984 "CHECKSUM": lambda self: self._parse_checksum(), 985 "CLUSTER BY": lambda self: self._parse_cluster(), 986 "CLUSTERED": lambda self: self._parse_clustered_by(), 987 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 988 exp.CollateProperty, **kwargs 989 ), 990 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 991 "CONTAINS": lambda self: self._parse_contains_property(), 992 "COPY": lambda self: self._parse_copy_property(), 993 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 994 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 995 "DEFINER": lambda self: self._parse_definer(), 996 "DETERMINISTIC": lambda self: self.expression( 997 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 998 ), 999 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1000 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1001 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1002 "DISTKEY": lambda self: self._parse_distkey(), 1003 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1004 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1005 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1006 "ENVIRONMENT": lambda self: self.expression( 1007 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1008 ), 1009 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1010 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1011 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1012 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1013 "FREESPACE": lambda self: self._parse_freespace(), 1014 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1015 "HEAP": lambda self: self.expression(exp.HeapProperty), 1016 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1017 "IMMUTABLE": lambda self: self.expression( 1018 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1019 ), 1020 "INHERITS": lambda self: self.expression( 1021 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1022 ), 1023 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1024 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1025 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1026 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1027 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1030 "LOCK": lambda self: self._parse_locking(), 1031 "LOCKING": lambda self: self._parse_locking(), 1032 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1033 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1034 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1035 "MODIFIES": lambda self: self._parse_modifies_property(), 1036 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1037 "NO": lambda self: self._parse_no_property(), 1038 "ON": lambda self: self._parse_on_property(), 1039 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1040 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1041 "PARTITION": lambda self: self._parse_partitioned_of(), 1042 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1043 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1044 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1045 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1046 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1047 "READS": lambda self: self._parse_reads_property(), 1048 "REMOTE": lambda self: self._parse_remote_with_connection(), 1049 "RETURNS": lambda self: self._parse_returns(), 1050 "STRICT": lambda self: self.expression(exp.StrictProperty), 1051 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1052 "ROW": lambda self: self._parse_row(), 1053 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1054 "SAMPLE": lambda self: self.expression( 1055 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1056 ), 1057 "SECURE": lambda self: self.expression(exp.SecureProperty), 1058 "SECURITY": lambda self: self._parse_security(), 1059 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1060 "SETTINGS": lambda self: self._parse_settings_property(), 1061 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1062 "SORTKEY": lambda self: self._parse_sortkey(), 1063 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1064 "STABLE": lambda self: self.expression( 1065 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1066 ), 1067 "STORED": lambda self: self._parse_stored(), 1068 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1069 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1070 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1071 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1072 "TO": lambda self: self._parse_to_table(), 1073 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1074 "TRANSFORM": lambda self: self.expression( 1075 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1076 ), 1077 "TTL": lambda self: self._parse_ttl(), 1078 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1079 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1080 "VOLATILE": lambda self: self._parse_volatile_property(), 1081 "WITH": lambda self: self._parse_with_property(), 1082 } 1083 1084 CONSTRAINT_PARSERS = { 1085 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1086 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1087 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1088 "CHARACTER SET": lambda self: self.expression( 1089 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1090 ), 1091 "CHECK": lambda self: self.expression( 1092 exp.CheckColumnConstraint, 1093 this=self._parse_wrapped(self._parse_assignment), 1094 enforced=self._match_text_seq("ENFORCED"), 1095 ), 1096 "COLLATE": lambda self: self.expression( 1097 exp.CollateColumnConstraint, 1098 this=self._parse_identifier() or self._parse_column(), 1099 ), 1100 "COMMENT": lambda self: self.expression( 1101 exp.CommentColumnConstraint, this=self._parse_string() 1102 ), 1103 "COMPRESS": lambda self: self._parse_compress(), 1104 "CLUSTERED": lambda self: self.expression( 1105 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "NONCLUSTERED": lambda self: self.expression( 1108 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1109 ), 1110 "DEFAULT": lambda self: self.expression( 1111 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1114 "EPHEMERAL": lambda self: self.expression( 1115 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1116 ), 1117 "EXCLUDE": lambda self: self.expression( 1118 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1119 ), 1120 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1121 "FORMAT": lambda self: self.expression( 1122 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1123 ), 1124 "GENERATED": lambda self: self._parse_generated_as_identity(), 1125 "IDENTITY": lambda self: self._parse_auto_increment(), 1126 "INLINE": lambda self: self._parse_inline(), 1127 "LIKE": lambda self: self._parse_create_like(), 1128 "NOT": lambda self: self._parse_not_constraint(), 1129 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1130 "ON": lambda self: ( 1131 self._match(TokenType.UPDATE) 1132 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1133 ) 1134 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1135 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1136 "PERIOD": lambda self: self._parse_period_for_system_time(), 1137 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1138 "REFERENCES": lambda self: self._parse_references(match=False), 1139 "TITLE": lambda self: self.expression( 1140 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1141 ), 1142 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1143 "UNIQUE": lambda self: self._parse_unique(), 1144 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1145 "WITH": lambda self: self.expression( 1146 exp.Properties, expressions=self._parse_wrapped_properties() 1147 ), 1148 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1149 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1150 } 1151 1152 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1153 if not self._match(TokenType.L_PAREN, advance=False): 1154 # Partitioning by bucket or truncate follows the syntax: 1155 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1156 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1157 self._retreat(self._index - 1) 1158 return None 1159 1160 klass = ( 1161 exp.PartitionedByBucket 1162 if self._prev.text.upper() == "BUCKET" 1163 else exp.PartitionByTruncate 1164 ) 1165 1166 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1167 this, expression = seq_get(args, 0), seq_get(args, 1) 1168 1169 if isinstance(this, exp.Literal): 1170 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1171 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1172 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1173 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1174 # 1175 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1176 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1177 this, expression = expression, this 1178 1179 return self.expression(klass, this=this, expression=expression) 1180 1181 ALTER_PARSERS = { 1182 "ADD": lambda self: self._parse_alter_table_add(), 1183 "AS": lambda self: self._parse_select(), 1184 "ALTER": lambda self: self._parse_alter_table_alter(), 1185 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1186 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1187 "DROP": lambda self: self._parse_alter_table_drop(), 1188 "RENAME": lambda self: self._parse_alter_table_rename(), 1189 "SET": lambda self: self._parse_alter_table_set(), 1190 "SWAP": lambda self: self.expression( 1191 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1192 ), 1193 } 1194 1195 ALTER_ALTER_PARSERS = { 1196 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1197 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1198 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1199 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1200 } 1201 1202 SCHEMA_UNNAMED_CONSTRAINTS = { 1203 "CHECK", 1204 "EXCLUDE", 1205 "FOREIGN KEY", 1206 "LIKE", 1207 "PERIOD", 1208 "PRIMARY KEY", 1209 "UNIQUE", 1210 "BUCKET", 1211 "TRUNCATE", 1212 } 1213 1214 NO_PAREN_FUNCTION_PARSERS = { 1215 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1216 "CASE": lambda self: self._parse_case(), 1217 "CONNECT_BY_ROOT": lambda self: self.expression( 1218 exp.ConnectByRoot, this=self._parse_column() 1219 ), 1220 "IF": lambda self: self._parse_if(), 1221 } 1222 1223 INVALID_FUNC_NAME_TOKENS = { 1224 TokenType.IDENTIFIER, 1225 TokenType.STRING, 1226 } 1227 1228 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1229 1230 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1231 1232 FUNCTION_PARSERS = { 1233 **{ 1234 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1235 }, 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1238 }, 1239 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1240 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1241 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1242 "DECODE": lambda self: self._parse_decode(), 1243 "EXTRACT": lambda self: self._parse_extract(), 1244 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1245 "GAP_FILL": lambda self: self._parse_gap_fill(), 1246 "JSON_OBJECT": lambda self: self._parse_json_object(), 1247 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1248 "JSON_TABLE": lambda self: self._parse_json_table(), 1249 "MATCH": lambda self: self._parse_match_against(), 1250 "NORMALIZE": lambda self: self._parse_normalize(), 1251 "OPENJSON": lambda self: self._parse_open_json(), 1252 "OVERLAY": lambda self: self._parse_overlay(), 1253 "POSITION": lambda self: self._parse_position(), 1254 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1255 "STRING_AGG": lambda self: self._parse_string_agg(), 1256 "SUBSTRING": lambda self: self._parse_substring(), 1257 "TRIM": lambda self: self._parse_trim(), 1258 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1260 "XMLELEMENT": lambda self: self.expression( 1261 exp.XMLElement, 1262 this=self._match_text_seq("NAME") and self._parse_id_var(), 1263 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1264 ), 1265 "XMLTABLE": lambda self: self._parse_xml_table(), 1266 } 1267 1268 QUERY_MODIFIER_PARSERS = { 1269 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1270 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1271 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1272 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1273 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1274 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1275 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1276 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1277 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1278 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1279 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1280 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1281 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1282 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1284 TokenType.CLUSTER_BY: lambda self: ( 1285 "cluster", 1286 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1287 ), 1288 TokenType.DISTRIBUTE_BY: lambda self: ( 1289 "distribute", 1290 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1291 ), 1292 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1293 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1294 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1295 } 1296 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1297 1298 SET_PARSERS = { 1299 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1300 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1301 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1302 "TRANSACTION": lambda self: self._parse_set_transaction(), 1303 } 1304 1305 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1306 1307 TYPE_LITERAL_PARSERS = { 1308 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1309 } 1310 1311 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1312 1313 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1314 1315 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1316 1317 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1318 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1319 "ISOLATION": ( 1320 ("LEVEL", "REPEATABLE", "READ"), 1321 ("LEVEL", "READ", "COMMITTED"), 1322 ("LEVEL", "READ", "UNCOMITTED"), 1323 ("LEVEL", "SERIALIZABLE"), 1324 ), 1325 "READ": ("WRITE", "ONLY"), 1326 } 1327 1328 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1329 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1330 ) 1331 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1332 1333 CREATE_SEQUENCE: OPTIONS_TYPE = { 1334 "SCALE": ("EXTEND", "NOEXTEND"), 1335 "SHARD": ("EXTEND", "NOEXTEND"), 1336 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1337 **dict.fromkeys( 1338 ( 1339 "SESSION", 1340 "GLOBAL", 1341 "KEEP", 1342 "NOKEEP", 1343 "ORDER", 1344 "NOORDER", 1345 "NOCACHE", 1346 "CYCLE", 1347 "NOCYCLE", 1348 "NOMINVALUE", 1349 "NOMAXVALUE", 1350 "NOSCALE", 1351 "NOSHARD", 1352 ), 1353 tuple(), 1354 ), 1355 } 1356 1357 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1358 1359 USABLES: OPTIONS_TYPE = dict.fromkeys( 1360 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1361 ) 1362 1363 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1364 1365 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1366 "TYPE": ("EVOLUTION",), 1367 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1368 } 1369 1370 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1371 1372 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1373 1374 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1375 "NOT": ("ENFORCED",), 1376 "MATCH": ( 1377 "FULL", 1378 "PARTIAL", 1379 "SIMPLE", 1380 ), 1381 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1382 "USING": ( 1383 "BTREE", 1384 "HASH", 1385 ), 1386 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1387 } 1388 1389 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1390 "NO": ("OTHERS",), 1391 "CURRENT": ("ROW",), 1392 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1393 } 1394 1395 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1396 1397 CLONE_KEYWORDS = {"CLONE", "COPY"} 1398 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1399 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1400 1401 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1402 1403 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1404 1405 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1406 1407 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1408 1409 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1410 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1411 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1412 1413 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1414 1415 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1416 1417 ADD_CONSTRAINT_TOKENS = { 1418 TokenType.CONSTRAINT, 1419 TokenType.FOREIGN_KEY, 1420 TokenType.INDEX, 1421 TokenType.KEY, 1422 TokenType.PRIMARY_KEY, 1423 TokenType.UNIQUE, 1424 } 1425 1426 DISTINCT_TOKENS = {TokenType.DISTINCT} 1427 1428 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1429 1430 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1431 1432 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1433 1434 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1435 1436 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1437 1438 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1439 1440 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1441 1442 # The style options for the DESCRIBE statement 1443 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1444 1445 # The style options for the ANALYZE statement 1446 ANALYZE_STYLES = { 1447 "BUFFER_USAGE_LIMIT", 1448 "FULL", 1449 "LOCAL", 1450 "NO_WRITE_TO_BINLOG", 1451 "SAMPLE", 1452 "SKIP_LOCKED", 1453 "VERBOSE", 1454 } 1455 1456 ANALYZE_EXPRESSION_PARSERS = { 1457 "ALL": lambda self: self._parse_analyze_columns(), 1458 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1459 "DELETE": lambda self: self._parse_analyze_delete(), 1460 "DROP": lambda self: self._parse_analyze_histogram(), 1461 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1462 "LIST": lambda self: self._parse_analyze_list(), 1463 "PREDICATE": lambda self: self._parse_analyze_columns(), 1464 "UPDATE": lambda self: self._parse_analyze_histogram(), 1465 "VALIDATE": lambda self: self._parse_analyze_validate(), 1466 } 1467 1468 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1469 1470 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1471 1472 OPERATION_MODIFIERS: t.Set[str] = set() 1473 1474 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1475 1476 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1477 1478 STRICT_CAST = True 1479 1480 PREFIXED_PIVOT_COLUMNS = False 1481 IDENTIFY_PIVOT_STRINGS = False 1482 1483 LOG_DEFAULTS_TO_LN = False 1484 1485 # Whether the table sample clause expects CSV syntax 1486 TABLESAMPLE_CSV = False 1487 1488 # The default method used for table sampling 1489 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1490 1491 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1492 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1493 1494 # Whether the TRIM function expects the characters to trim as its first argument 1495 TRIM_PATTERN_FIRST = False 1496 1497 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1498 STRING_ALIASES = False 1499 1500 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1501 MODIFIERS_ATTACHED_TO_SET_OP = True 1502 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1503 1504 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1505 NO_PAREN_IF_COMMANDS = True 1506 1507 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1508 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1509 1510 # Whether the `:` operator is used to extract a value from a VARIANT column 1511 COLON_IS_VARIANT_EXTRACT = False 1512 1513 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1514 # If this is True and '(' is not found, the keyword will be treated as an identifier 1515 VALUES_FOLLOWED_BY_PAREN = True 1516 1517 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1518 SUPPORTS_IMPLICIT_UNNEST = False 1519 1520 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1521 INTERVAL_SPANS = True 1522 1523 # Whether a PARTITION clause can follow a table reference 1524 SUPPORTS_PARTITION_SELECTION = False 1525 1526 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1527 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1528 1529 # Whether the 'AS' keyword is optional in the CTE definition syntax 1530 OPTIONAL_ALIAS_TOKEN_CTE = True 1531 1532 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1533 ALTER_RENAME_REQUIRES_COLUMN = True 1534 1535 # Whether Alter statements are allowed to contain Partition specifications 1536 ALTER_TABLE_PARTITIONS = False 1537 1538 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1539 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1540 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1541 # as BigQuery, where all joins have the same precedence. 1542 JOINS_HAVE_EQUAL_PRECEDENCE = False 1543 1544 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1545 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1546 1547 # Whether map literals support arbitrary expressions as keys. 1548 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1549 # When False, keys are typically restricted to identifiers. 1550 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1551 1552 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1553 # is true for Snowflake but not for BigQuery which can also process strings 1554 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1555 1556 # Dialects like Databricks support JOINS without join criteria 1557 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1558 ADD_JOIN_ON_TRUE = False 1559 1560 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1561 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1562 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1563 1564 __slots__ = ( 1565 "error_level", 1566 "error_message_context", 1567 "max_errors", 1568 "dialect", 1569 "sql", 1570 "errors", 1571 "_tokens", 1572 "_index", 1573 "_curr", 1574 "_next", 1575 "_prev", 1576 "_prev_comments", 1577 "_pipe_cte_counter", 1578 ) 1579 1580 # Autofilled 1581 SHOW_TRIE: t.Dict = {} 1582 SET_TRIE: t.Dict = {} 1583 1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset() 1598 1599 def reset(self): 1600 self.sql = "" 1601 self.errors = [] 1602 self._tokens = [] 1603 self._index = 0 1604 self._curr = None 1605 self._next = None 1606 self._prev = None 1607 self._prev_comments = None 1608 self._pipe_cte_counter = 0 1609 1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 ) 1627 1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1] 1663 1664 def _parse( 1665 self, 1666 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1667 raw_tokens: t.List[Token], 1668 sql: t.Optional[str] = None, 1669 ) -> t.List[t.Optional[exp.Expression]]: 1670 self.reset() 1671 self.sql = sql or "" 1672 1673 total = len(raw_tokens) 1674 chunks: t.List[t.List[Token]] = [[]] 1675 1676 for i, token in enumerate(raw_tokens): 1677 if token.token_type == TokenType.SEMICOLON: 1678 if token.comments: 1679 chunks.append([token]) 1680 1681 if i < total - 1: 1682 chunks.append([]) 1683 else: 1684 chunks[-1].append(token) 1685 1686 expressions = [] 1687 1688 for tokens in chunks: 1689 self._index = -1 1690 self._tokens = tokens 1691 self._advance() 1692 1693 expressions.append(parse_method(self)) 1694 1695 if self._index < len(self._tokens): 1696 self.raise_error("Invalid expression / Unexpected token") 1697 1698 self.check_errors() 1699 1700 return expressions 1701 1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 ) 1712 1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error) 1740 1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance) 1758 1759 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1760 if expression and self._prev_comments: 1761 expression.add_comments(self._prev_comments) 1762 self._prev_comments = None 1763 1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression 1780 1781 def _find_sql(self, start: Token, end: Token) -> str: 1782 return self.sql[start.start : end.end + 1] 1783 1784 def _is_connected(self) -> bool: 1785 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1786 1787 def _advance(self, times: int = 1) -> None: 1788 self._index += times 1789 self._curr = seq_get(self._tokens, self._index) 1790 self._next = seq_get(self._tokens, self._index + 1) 1791 1792 if self._index > 0: 1793 self._prev = self._tokens[self._index - 1] 1794 self._prev_comments = self._prev.comments 1795 else: 1796 self._prev = None 1797 self._prev_comments = None 1798 1799 def _retreat(self, index: int) -> None: 1800 if index != self._index: 1801 self._advance(index - self._index) 1802 1803 def _warn_unsupported(self) -> None: 1804 if len(self._tokens) <= 1: 1805 return 1806 1807 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1808 # interested in emitting a warning for the one being currently processed. 1809 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1810 1811 logger.warning( 1812 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1813 ) 1814 1815 def _parse_command(self) -> exp.Command: 1816 self._warn_unsupported() 1817 return self.expression( 1818 exp.Command, 1819 comments=self._prev_comments, 1820 this=self._prev.text.upper(), 1821 expression=self._parse_string(), 1822 ) 1823 1824 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1825 """ 1826 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1827 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1828 solve this by setting & resetting the parser state accordingly 1829 """ 1830 index = self._index 1831 error_level = self.error_level 1832 1833 self.error_level = ErrorLevel.IMMEDIATE 1834 try: 1835 this = parse_method() 1836 except ParseError: 1837 this = None 1838 finally: 1839 if not this or retreat: 1840 self._retreat(index) 1841 self.error_level = error_level 1842 1843 return this 1844 1845 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1846 start = self._prev 1847 exists = self._parse_exists() if allow_exists else None 1848 1849 self._match(TokenType.ON) 1850 1851 materialized = self._match_text_seq("MATERIALIZED") 1852 kind = self._match_set(self.CREATABLES) and self._prev 1853 if not kind: 1854 return self._parse_as_command(start) 1855 1856 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1857 this = self._parse_user_defined_function(kind=kind.token_type) 1858 elif kind.token_type == TokenType.TABLE: 1859 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1860 elif kind.token_type == TokenType.COLUMN: 1861 this = self._parse_column() 1862 else: 1863 this = self._parse_id_var() 1864 1865 self._match(TokenType.IS) 1866 1867 return self.expression( 1868 exp.Comment, 1869 this=this, 1870 kind=kind.text, 1871 expression=self._parse_string(), 1872 exists=exists, 1873 materialized=materialized, 1874 ) 1875 1876 def _parse_to_table( 1877 self, 1878 ) -> exp.ToTableProperty: 1879 table = self._parse_table_parts(schema=True) 1880 return self.expression(exp.ToTableProperty, this=table) 1881 1882 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1883 def _parse_ttl(self) -> exp.Expression: 1884 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1885 this = self._parse_bitwise() 1886 1887 if self._match_text_seq("DELETE"): 1888 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1889 if self._match_text_seq("RECOMPRESS"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1892 ) 1893 if self._match_text_seq("TO", "DISK"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1896 ) 1897 if self._match_text_seq("TO", "VOLUME"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1900 ) 1901 1902 return this 1903 1904 expressions = self._parse_csv(_parse_ttl_action) 1905 where = self._parse_where() 1906 group = self._parse_group() 1907 1908 aggregates = None 1909 if group and self._match(TokenType.SET): 1910 aggregates = self._parse_csv(self._parse_set_item) 1911 1912 return self.expression( 1913 exp.MergeTreeTTL, 1914 expressions=expressions, 1915 where=where, 1916 group=group, 1917 aggregates=aggregates, 1918 ) 1919 1920 def _parse_statement(self) -> t.Optional[exp.Expression]: 1921 if self._curr is None: 1922 return None 1923 1924 if self._match_set(self.STATEMENT_PARSERS): 1925 comments = self._prev_comments 1926 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1927 stmt.add_comments(comments, prepend=True) 1928 return stmt 1929 1930 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1931 return self._parse_command() 1932 1933 expression = self._parse_expression() 1934 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1935 return self._parse_query_modifiers(expression) 1936 1937 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1938 start = self._prev 1939 temporary = self._match(TokenType.TEMPORARY) 1940 materialized = self._match_text_seq("MATERIALIZED") 1941 1942 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1943 if not kind: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 if_exists = exists or self._parse_exists() 1948 1949 if kind == "COLUMN": 1950 this = self._parse_column() 1951 else: 1952 this = self._parse_table_parts( 1953 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1954 ) 1955 1956 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1957 1958 if self._match(TokenType.L_PAREN, advance=False): 1959 expressions = self._parse_wrapped_csv(self._parse_types) 1960 else: 1961 expressions = None 1962 1963 return self.expression( 1964 exp.Drop, 1965 exists=if_exists, 1966 this=this, 1967 expressions=expressions, 1968 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1969 temporary=temporary, 1970 materialized=materialized, 1971 cascade=self._match_text_seq("CASCADE"), 1972 constraints=self._match_text_seq("CONSTRAINTS"), 1973 purge=self._match_text_seq("PURGE"), 1974 cluster=cluster, 1975 concurrently=concurrently, 1976 ) 1977 1978 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1979 return ( 1980 self._match_text_seq("IF") 1981 and (not not_ or self._match(TokenType.NOT)) 1982 and self._match(TokenType.EXISTS) 1983 ) 1984 1985 def _parse_create(self) -> exp.Create | exp.Command: 1986 # Note: this can't be None because we've matched a statement parser 1987 start = self._prev 1988 1989 replace = ( 1990 start.token_type == TokenType.REPLACE 1991 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1992 or self._match_pair(TokenType.OR, TokenType.ALTER) 1993 ) 1994 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1995 1996 unique = self._match(TokenType.UNIQUE) 1997 1998 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1999 clustered = True 2000 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2001 "COLUMNSTORE" 2002 ): 2003 clustered = False 2004 else: 2005 clustered = None 2006 2007 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2008 self._advance() 2009 2010 properties = None 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not create_token: 2014 # exp.Properties.Location.POST_CREATE 2015 properties = self._parse_properties() 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not properties or not create_token: 2019 return self._parse_as_command(start) 2020 2021 concurrently = self._match_text_seq("CONCURRENTLY") 2022 exists = self._parse_exists(not_=True) 2023 this = None 2024 expression: t.Optional[exp.Expression] = None 2025 indexes = None 2026 no_schema_binding = None 2027 begin = None 2028 end = None 2029 clone = None 2030 2031 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2032 nonlocal properties 2033 if properties and temp_props: 2034 properties.expressions.extend(temp_props.expressions) 2035 elif temp_props: 2036 properties = temp_props 2037 2038 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2039 this = self._parse_user_defined_function(kind=create_token.token_type) 2040 2041 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2042 extend_props(self._parse_properties()) 2043 2044 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2045 extend_props(self._parse_properties()) 2046 2047 if not expression: 2048 if self._match(TokenType.COMMAND): 2049 expression = self._parse_as_command(self._prev) 2050 else: 2051 begin = self._match(TokenType.BEGIN) 2052 return_ = self._match_text_seq("RETURN") 2053 2054 if self._match(TokenType.STRING, advance=False): 2055 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2056 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2057 expression = self._parse_string() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_user_defined_function_expression() 2061 2062 end = self._match_text_seq("END") 2063 2064 if return_: 2065 expression = self.expression(exp.Return, this=expression) 2066 elif create_token.token_type == TokenType.INDEX: 2067 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2068 if not self._match(TokenType.ON): 2069 index = self._parse_id_var() 2070 anonymous = False 2071 else: 2072 index = None 2073 anonymous = True 2074 2075 this = self._parse_index(index=index, anonymous=anonymous) 2076 elif create_token.token_type in self.DB_CREATABLES: 2077 table_parts = self._parse_table_parts( 2078 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2079 ) 2080 2081 # exp.Properties.Location.POST_NAME 2082 self._match(TokenType.COMMA) 2083 extend_props(self._parse_properties(before=True)) 2084 2085 this = self._parse_schema(this=table_parts) 2086 2087 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2088 extend_props(self._parse_properties()) 2089 2090 has_alias = self._match(TokenType.ALIAS) 2091 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2092 # exp.Properties.Location.POST_ALIAS 2093 extend_props(self._parse_properties()) 2094 2095 if create_token.token_type == TokenType.SEQUENCE: 2096 expression = self._parse_types() 2097 props = self._parse_properties() 2098 if props: 2099 sequence_props = exp.SequenceProperties() 2100 options = [] 2101 for prop in props: 2102 if isinstance(prop, exp.SequenceProperties): 2103 for arg, value in prop.args.items(): 2104 if arg == "options": 2105 options.extend(value) 2106 else: 2107 sequence_props.set(arg, value) 2108 prop.pop() 2109 2110 if options: 2111 sequence_props.set("options", options) 2112 2113 props.append("expressions", sequence_props) 2114 extend_props(props) 2115 else: 2116 expression = self._parse_ddl_select() 2117 2118 # Some dialects also support using a table as an alias instead of a SELECT. 2119 # Here we fallback to this as an alternative. 2120 if not expression and has_alias: 2121 expression = self._try_parse(self._parse_table_parts) 2122 2123 if create_token.token_type == TokenType.TABLE: 2124 # exp.Properties.Location.POST_EXPRESSION 2125 extend_props(self._parse_properties()) 2126 2127 indexes = [] 2128 while True: 2129 index = self._parse_index() 2130 2131 # exp.Properties.Location.POST_INDEX 2132 extend_props(self._parse_properties()) 2133 if not index: 2134 break 2135 else: 2136 self._match(TokenType.COMMA) 2137 indexes.append(index) 2138 elif create_token.token_type == TokenType.VIEW: 2139 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2140 no_schema_binding = True 2141 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2142 extend_props(self._parse_properties()) 2143 2144 shallow = self._match_text_seq("SHALLOW") 2145 2146 if self._match_texts(self.CLONE_KEYWORDS): 2147 copy = self._prev.text.lower() == "copy" 2148 clone = self.expression( 2149 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2150 ) 2151 2152 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2153 return self._parse_as_command(start) 2154 2155 create_kind_text = create_token.text.upper() 2156 return self.expression( 2157 exp.Create, 2158 this=this, 2159 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2160 replace=replace, 2161 refresh=refresh, 2162 unique=unique, 2163 expression=expression, 2164 exists=exists, 2165 properties=properties, 2166 indexes=indexes, 2167 no_schema_binding=no_schema_binding, 2168 begin=begin, 2169 end=end, 2170 clone=clone, 2171 concurrently=concurrently, 2172 clustered=clustered, 2173 ) 2174 2175 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2176 seq = exp.SequenceProperties() 2177 2178 options = [] 2179 index = self._index 2180 2181 while self._curr: 2182 self._match(TokenType.COMMA) 2183 if self._match_text_seq("INCREMENT"): 2184 self._match_text_seq("BY") 2185 self._match_text_seq("=") 2186 seq.set("increment", self._parse_term()) 2187 elif self._match_text_seq("MINVALUE"): 2188 seq.set("minvalue", self._parse_term()) 2189 elif self._match_text_seq("MAXVALUE"): 2190 seq.set("maxvalue", self._parse_term()) 2191 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2192 self._match_text_seq("=") 2193 seq.set("start", self._parse_term()) 2194 elif self._match_text_seq("CACHE"): 2195 # T-SQL allows empty CACHE which is initialized dynamically 2196 seq.set("cache", self._parse_number() or True) 2197 elif self._match_text_seq("OWNED", "BY"): 2198 # "OWNED BY NONE" is the default 2199 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2200 else: 2201 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2202 if opt: 2203 options.append(opt) 2204 else: 2205 break 2206 2207 seq.set("options", options if options else None) 2208 return None if self._index == index else seq 2209 2210 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2211 # only used for teradata currently 2212 self._match(TokenType.COMMA) 2213 2214 kwargs = { 2215 "no": self._match_text_seq("NO"), 2216 "dual": self._match_text_seq("DUAL"), 2217 "before": self._match_text_seq("BEFORE"), 2218 "default": self._match_text_seq("DEFAULT"), 2219 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2220 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2221 "after": self._match_text_seq("AFTER"), 2222 "minimum": self._match_texts(("MIN", "MINIMUM")), 2223 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2224 } 2225 2226 if self._match_texts(self.PROPERTY_PARSERS): 2227 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2228 try: 2229 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2230 except TypeError: 2231 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2232 2233 return None 2234 2235 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2236 return self._parse_wrapped_csv(self._parse_property) 2237 2238 def _parse_property(self) -> t.Optional[exp.Expression]: 2239 if self._match_texts(self.PROPERTY_PARSERS): 2240 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2241 2242 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2243 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2244 2245 if self._match_text_seq("COMPOUND", "SORTKEY"): 2246 return self._parse_sortkey(compound=True) 2247 2248 if self._match_text_seq("SQL", "SECURITY"): 2249 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2250 2251 index = self._index 2252 2253 seq_props = self._parse_sequence_properties() 2254 if seq_props: 2255 return seq_props 2256 2257 self._retreat(index) 2258 key = self._parse_column() 2259 2260 if not self._match(TokenType.EQ): 2261 self._retreat(index) 2262 return None 2263 2264 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2265 if isinstance(key, exp.Column): 2266 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2267 2268 value = self._parse_bitwise() or self._parse_var(any_token=True) 2269 2270 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2271 if isinstance(value, exp.Column): 2272 value = exp.var(value.name) 2273 2274 return self.expression(exp.Property, this=key, value=value) 2275 2276 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2277 if self._match_text_seq("BY"): 2278 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2279 2280 self._match(TokenType.ALIAS) 2281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2283 2284 return self.expression( 2285 exp.FileFormatProperty, 2286 this=( 2287 self.expression( 2288 exp.InputOutputFormat, 2289 input_format=input_format, 2290 output_format=output_format, 2291 ) 2292 if input_format or output_format 2293 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2294 ), 2295 hive_format=True, 2296 ) 2297 2298 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2299 field = self._parse_field() 2300 if isinstance(field, exp.Identifier) and not field.quoted: 2301 field = exp.var(field) 2302 2303 return field 2304 2305 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2306 self._match(TokenType.EQ) 2307 self._match(TokenType.ALIAS) 2308 2309 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2310 2311 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2312 properties = [] 2313 while True: 2314 if before: 2315 prop = self._parse_property_before() 2316 else: 2317 prop = self._parse_property() 2318 if not prop: 2319 break 2320 for p in ensure_list(prop): 2321 properties.append(p) 2322 2323 if properties: 2324 return self.expression(exp.Properties, expressions=properties) 2325 2326 return None 2327 2328 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2329 return self.expression( 2330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2331 ) 2332 2333 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2334 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2335 security_specifier = self._prev.text.upper() 2336 return self.expression(exp.SecurityProperty, this=security_specifier) 2337 return None 2338 2339 def _parse_settings_property(self) -> exp.SettingsProperty: 2340 return self.expression( 2341 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2342 ) 2343 2344 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2345 if self._index >= 2: 2346 pre_volatile_token = self._tokens[self._index - 2] 2347 else: 2348 pre_volatile_token = None 2349 2350 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2351 return exp.VolatileProperty() 2352 2353 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2354 2355 def _parse_retention_period(self) -> exp.Var: 2356 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2357 number = self._parse_number() 2358 number_str = f"{number} " if number else "" 2359 unit = self._parse_var(any_token=True) 2360 return exp.var(f"{number_str}{unit}") 2361 2362 def _parse_system_versioning_property( 2363 self, with_: bool = False 2364 ) -> exp.WithSystemVersioningProperty: 2365 self._match(TokenType.EQ) 2366 prop = self.expression( 2367 exp.WithSystemVersioningProperty, 2368 **{ # type: ignore 2369 "on": True, 2370 "with": with_, 2371 }, 2372 ) 2373 2374 if self._match_text_seq("OFF"): 2375 prop.set("on", False) 2376 return prop 2377 2378 self._match(TokenType.ON) 2379 if self._match(TokenType.L_PAREN): 2380 while self._curr and not self._match(TokenType.R_PAREN): 2381 if self._match_text_seq("HISTORY_TABLE", "="): 2382 prop.set("this", self._parse_table_parts()) 2383 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2384 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2385 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2386 prop.set("retention_period", self._parse_retention_period()) 2387 2388 self._match(TokenType.COMMA) 2389 2390 return prop 2391 2392 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2393 self._match(TokenType.EQ) 2394 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2395 prop = self.expression(exp.DataDeletionProperty, on=on) 2396 2397 if self._match(TokenType.L_PAREN): 2398 while self._curr and not self._match(TokenType.R_PAREN): 2399 if self._match_text_seq("FILTER_COLUMN", "="): 2400 prop.set("filter_column", self._parse_column()) 2401 elif self._match_text_seq("RETENTION_PERIOD", "="): 2402 prop.set("retention_period", self._parse_retention_period()) 2403 2404 self._match(TokenType.COMMA) 2405 2406 return prop 2407 2408 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2409 kind = "HASH" 2410 expressions: t.Optional[t.List[exp.Expression]] = None 2411 if self._match_text_seq("BY", "HASH"): 2412 expressions = self._parse_wrapped_csv(self._parse_id_var) 2413 elif self._match_text_seq("BY", "RANDOM"): 2414 kind = "RANDOM" 2415 2416 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2417 buckets: t.Optional[exp.Expression] = None 2418 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2419 buckets = self._parse_number() 2420 2421 return self.expression( 2422 exp.DistributedByProperty, 2423 expressions=expressions, 2424 kind=kind, 2425 buckets=buckets, 2426 order=self._parse_order(), 2427 ) 2428 2429 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2430 self._match_text_seq("KEY") 2431 expressions = self._parse_wrapped_id_vars() 2432 return self.expression(expr_type, expressions=expressions) 2433 2434 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2435 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2436 prop = self._parse_system_versioning_property(with_=True) 2437 self._match_r_paren() 2438 return prop 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 return self._parse_wrapped_properties() 2442 2443 if self._match_text_seq("JOURNAL"): 2444 return self._parse_withjournaltable() 2445 2446 if self._match_texts(self.VIEW_ATTRIBUTES): 2447 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2448 2449 if self._match_text_seq("DATA"): 2450 return self._parse_withdata(no=False) 2451 elif self._match_text_seq("NO", "DATA"): 2452 return self._parse_withdata(no=True) 2453 2454 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2455 return self._parse_serde_properties(with_=True) 2456 2457 if self._match(TokenType.SCHEMA): 2458 return self.expression( 2459 exp.WithSchemaBindingProperty, 2460 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2461 ) 2462 2463 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2464 return self.expression( 2465 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2466 ) 2467 2468 if not self._next: 2469 return None 2470 2471 return self._parse_withisolatedloading() 2472 2473 def _parse_procedure_option(self) -> exp.Expression | None: 2474 if self._match_text_seq("EXECUTE", "AS"): 2475 return self.expression( 2476 exp.ExecuteAsProperty, 2477 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2478 or self._parse_string(), 2479 ) 2480 2481 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2482 2483 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2484 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2485 self._match(TokenType.EQ) 2486 2487 user = self._parse_id_var() 2488 self._match(TokenType.PARAMETER) 2489 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2490 2491 if not user or not host: 2492 return None 2493 2494 return exp.DefinerProperty(this=f"{user}@{host}") 2495 2496 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2497 self._match(TokenType.TABLE) 2498 self._match(TokenType.EQ) 2499 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2500 2501 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2502 return self.expression(exp.LogProperty, no=no) 2503 2504 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2505 return self.expression(exp.JournalProperty, **kwargs) 2506 2507 def _parse_checksum(self) -> exp.ChecksumProperty: 2508 self._match(TokenType.EQ) 2509 2510 on = None 2511 if self._match(TokenType.ON): 2512 on = True 2513 elif self._match_text_seq("OFF"): 2514 on = False 2515 2516 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2517 2518 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2519 return self.expression( 2520 exp.Cluster, 2521 expressions=( 2522 self._parse_wrapped_csv(self._parse_ordered) 2523 if wrapped 2524 else self._parse_csv(self._parse_ordered) 2525 ), 2526 ) 2527 2528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2529 self._match_text_seq("BY") 2530 2531 self._match_l_paren() 2532 expressions = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 2535 if self._match_text_seq("SORTED", "BY"): 2536 self._match_l_paren() 2537 sorted_by = self._parse_csv(self._parse_ordered) 2538 self._match_r_paren() 2539 else: 2540 sorted_by = None 2541 2542 self._match(TokenType.INTO) 2543 buckets = self._parse_number() 2544 self._match_text_seq("BUCKETS") 2545 2546 return self.expression( 2547 exp.ClusteredByProperty, 2548 expressions=expressions, 2549 sorted_by=sorted_by, 2550 buckets=buckets, 2551 ) 2552 2553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2554 if not self._match_text_seq("GRANTS"): 2555 self._retreat(self._index - 1) 2556 return None 2557 2558 return self.expression(exp.CopyGrantsProperty) 2559 2560 def _parse_freespace(self) -> exp.FreespaceProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2564 ) 2565 2566 def _parse_mergeblockratio( 2567 self, no: bool = False, default: bool = False 2568 ) -> exp.MergeBlockRatioProperty: 2569 if self._match(TokenType.EQ): 2570 return self.expression( 2571 exp.MergeBlockRatioProperty, 2572 this=self._parse_number(), 2573 percent=self._match(TokenType.PERCENT), 2574 ) 2575 2576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2577 2578 def _parse_datablocksize( 2579 self, 2580 default: t.Optional[bool] = None, 2581 minimum: t.Optional[bool] = None, 2582 maximum: t.Optional[bool] = None, 2583 ) -> exp.DataBlocksizeProperty: 2584 self._match(TokenType.EQ) 2585 size = self._parse_number() 2586 2587 units = None 2588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2589 units = self._prev.text 2590 2591 return self.expression( 2592 exp.DataBlocksizeProperty, 2593 size=size, 2594 units=units, 2595 default=default, 2596 minimum=minimum, 2597 maximum=maximum, 2598 ) 2599 2600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2601 self._match(TokenType.EQ) 2602 always = self._match_text_seq("ALWAYS") 2603 manual = self._match_text_seq("MANUAL") 2604 never = self._match_text_seq("NEVER") 2605 default = self._match_text_seq("DEFAULT") 2606 2607 autotemp = None 2608 if self._match_text_seq("AUTOTEMP"): 2609 autotemp = self._parse_schema() 2610 2611 return self.expression( 2612 exp.BlockCompressionProperty, 2613 always=always, 2614 manual=manual, 2615 never=never, 2616 default=default, 2617 autotemp=autotemp, 2618 ) 2619 2620 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2621 index = self._index 2622 no = self._match_text_seq("NO") 2623 concurrent = self._match_text_seq("CONCURRENT") 2624 2625 if not self._match_text_seq("ISOLATED", "LOADING"): 2626 self._retreat(index) 2627 return None 2628 2629 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2630 return self.expression( 2631 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2632 ) 2633 2634 def _parse_locking(self) -> exp.LockingProperty: 2635 if self._match(TokenType.TABLE): 2636 kind = "TABLE" 2637 elif self._match(TokenType.VIEW): 2638 kind = "VIEW" 2639 elif self._match(TokenType.ROW): 2640 kind = "ROW" 2641 elif self._match_text_seq("DATABASE"): 2642 kind = "DATABASE" 2643 else: 2644 kind = None 2645 2646 if kind in ("DATABASE", "TABLE", "VIEW"): 2647 this = self._parse_table_parts() 2648 else: 2649 this = None 2650 2651 if self._match(TokenType.FOR): 2652 for_or_in = "FOR" 2653 elif self._match(TokenType.IN): 2654 for_or_in = "IN" 2655 else: 2656 for_or_in = None 2657 2658 if self._match_text_seq("ACCESS"): 2659 lock_type = "ACCESS" 2660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2661 lock_type = "EXCLUSIVE" 2662 elif self._match_text_seq("SHARE"): 2663 lock_type = "SHARE" 2664 elif self._match_text_seq("READ"): 2665 lock_type = "READ" 2666 elif self._match_text_seq("WRITE"): 2667 lock_type = "WRITE" 2668 elif self._match_text_seq("CHECKSUM"): 2669 lock_type = "CHECKSUM" 2670 else: 2671 lock_type = None 2672 2673 override = self._match_text_seq("OVERRIDE") 2674 2675 return self.expression( 2676 exp.LockingProperty, 2677 this=this, 2678 kind=kind, 2679 for_or_in=for_or_in, 2680 lock_type=lock_type, 2681 override=override, 2682 ) 2683 2684 def _parse_partition_by(self) -> t.List[exp.Expression]: 2685 if self._match(TokenType.PARTITION_BY): 2686 return self._parse_csv(self._parse_assignment) 2687 return [] 2688 2689 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2690 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2691 if self._match_text_seq("MINVALUE"): 2692 return exp.var("MINVALUE") 2693 if self._match_text_seq("MAXVALUE"): 2694 return exp.var("MAXVALUE") 2695 return self._parse_bitwise() 2696 2697 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2698 expression = None 2699 from_expressions = None 2700 to_expressions = None 2701 2702 if self._match(TokenType.IN): 2703 this = self._parse_wrapped_csv(self._parse_bitwise) 2704 elif self._match(TokenType.FROM): 2705 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2706 self._match_text_seq("TO") 2707 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 elif self._match_text_seq("WITH", "(", "MODULUS"): 2709 this = self._parse_number() 2710 self._match_text_seq(",", "REMAINDER") 2711 expression = self._parse_number() 2712 self._match_r_paren() 2713 else: 2714 self.raise_error("Failed to parse partition bound spec.") 2715 2716 return self.expression( 2717 exp.PartitionBoundSpec, 2718 this=this, 2719 expression=expression, 2720 from_expressions=from_expressions, 2721 to_expressions=to_expressions, 2722 ) 2723 2724 # https://www.postgresql.org/docs/current/sql-createtable.html 2725 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2726 if not self._match_text_seq("OF"): 2727 self._retreat(self._index - 1) 2728 return None 2729 2730 this = self._parse_table(schema=True) 2731 2732 if self._match(TokenType.DEFAULT): 2733 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2734 elif self._match_text_seq("FOR", "VALUES"): 2735 expression = self._parse_partition_bound_spec() 2736 else: 2737 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2738 2739 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2740 2741 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2742 self._match(TokenType.EQ) 2743 return self.expression( 2744 exp.PartitionedByProperty, 2745 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2746 ) 2747 2748 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2749 if self._match_text_seq("AND", "STATISTICS"): 2750 statistics = True 2751 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2752 statistics = False 2753 else: 2754 statistics = None 2755 2756 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2757 2758 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2761 return None 2762 2763 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL", "DATA"): 2765 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2766 return None 2767 2768 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("PRIMARY", "INDEX"): 2770 return exp.NoPrimaryIndexProperty() 2771 if self._match_text_seq("SQL"): 2772 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2773 return None 2774 2775 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2777 return exp.OnCommitProperty() 2778 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2779 return exp.OnCommitProperty(delete=True) 2780 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2781 2782 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2783 if self._match_text_seq("SQL", "DATA"): 2784 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2785 return None 2786 2787 def _parse_distkey(self) -> exp.DistKeyProperty: 2788 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2789 2790 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2791 table = self._parse_table(schema=True) 2792 2793 options = [] 2794 while self._match_texts(("INCLUDING", "EXCLUDING")): 2795 this = self._prev.text.upper() 2796 2797 id_var = self._parse_id_var() 2798 if not id_var: 2799 return None 2800 2801 options.append( 2802 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2803 ) 2804 2805 return self.expression(exp.LikeProperty, this=table, expressions=options) 2806 2807 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2808 return self.expression( 2809 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2810 ) 2811 2812 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2813 self._match(TokenType.EQ) 2814 return self.expression( 2815 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2816 ) 2817 2818 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2819 self._match_text_seq("WITH", "CONNECTION") 2820 return self.expression( 2821 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2822 ) 2823 2824 def _parse_returns(self) -> exp.ReturnsProperty: 2825 value: t.Optional[exp.Expression] 2826 null = None 2827 is_table = self._match(TokenType.TABLE) 2828 2829 if is_table: 2830 if self._match(TokenType.LT): 2831 value = self.expression( 2832 exp.Schema, 2833 this="TABLE", 2834 expressions=self._parse_csv(self._parse_struct_types), 2835 ) 2836 if not self._match(TokenType.GT): 2837 self.raise_error("Expecting >") 2838 else: 2839 value = self._parse_schema(exp.var("TABLE")) 2840 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2841 null = True 2842 value = None 2843 else: 2844 value = self._parse_types() 2845 2846 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2847 2848 def _parse_describe(self) -> exp.Describe: 2849 kind = self._match_set(self.CREATABLES) and self._prev.text 2850 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2851 if self._match(TokenType.DOT): 2852 style = None 2853 self._retreat(self._index - 2) 2854 2855 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2856 2857 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2858 this = self._parse_statement() 2859 else: 2860 this = self._parse_table(schema=True) 2861 2862 properties = self._parse_properties() 2863 expressions = properties.expressions if properties else None 2864 partition = self._parse_partition() 2865 return self.expression( 2866 exp.Describe, 2867 this=this, 2868 style=style, 2869 kind=kind, 2870 expressions=expressions, 2871 partition=partition, 2872 format=format, 2873 ) 2874 2875 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2876 kind = self._prev.text.upper() 2877 expressions = [] 2878 2879 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2880 if self._match(TokenType.WHEN): 2881 expression = self._parse_disjunction() 2882 self._match(TokenType.THEN) 2883 else: 2884 expression = None 2885 2886 else_ = self._match(TokenType.ELSE) 2887 2888 if not self._match(TokenType.INTO): 2889 return None 2890 2891 return self.expression( 2892 exp.ConditionalInsert, 2893 this=self.expression( 2894 exp.Insert, 2895 this=self._parse_table(schema=True), 2896 expression=self._parse_derived_table_values(), 2897 ), 2898 expression=expression, 2899 else_=else_, 2900 ) 2901 2902 expression = parse_conditional_insert() 2903 while expression is not None: 2904 expressions.append(expression) 2905 expression = parse_conditional_insert() 2906 2907 return self.expression( 2908 exp.MultitableInserts, 2909 kind=kind, 2910 comments=comments, 2911 expressions=expressions, 2912 source=self._parse_table(), 2913 ) 2914 2915 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2916 comments = [] 2917 hint = self._parse_hint() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 ignore = self._match(TokenType.IGNORE) 2920 local = self._match_text_seq("LOCAL") 2921 alternative = None 2922 is_function = None 2923 2924 if self._match_text_seq("DIRECTORY"): 2925 this: t.Optional[exp.Expression] = self.expression( 2926 exp.Directory, 2927 this=self._parse_var_or_string(), 2928 local=local, 2929 row_format=self._parse_row_format(match_row=True), 2930 ) 2931 else: 2932 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2933 comments += ensure_list(self._prev_comments) 2934 return self._parse_multitable_inserts(comments) 2935 2936 if self._match(TokenType.OR): 2937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2938 2939 self._match(TokenType.INTO) 2940 comments += ensure_list(self._prev_comments) 2941 self._match(TokenType.TABLE) 2942 is_function = self._match(TokenType.FUNCTION) 2943 2944 this = ( 2945 self._parse_table(schema=True, parse_partition=True) 2946 if not is_function 2947 else self._parse_function() 2948 ) 2949 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2950 this.set("alias", self._parse_table_alias()) 2951 2952 returning = self._parse_returning() 2953 2954 return self.expression( 2955 exp.Insert, 2956 comments=comments, 2957 hint=hint, 2958 is_function=is_function, 2959 this=this, 2960 stored=self._match_text_seq("STORED") and self._parse_stored(), 2961 by_name=self._match_text_seq("BY", "NAME"), 2962 exists=self._parse_exists(), 2963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2964 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2965 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2966 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2967 conflict=self._parse_on_conflict(), 2968 returning=returning or self._parse_returning(), 2969 overwrite=overwrite, 2970 alternative=alternative, 2971 ignore=ignore, 2972 source=self._match(TokenType.TABLE) and self._parse_table(), 2973 ) 2974 2975 def _parse_kill(self) -> exp.Kill: 2976 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2977 2978 return self.expression( 2979 exp.Kill, 2980 this=self._parse_primary(), 2981 kind=kind, 2982 ) 2983 2984 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2985 conflict = self._match_text_seq("ON", "CONFLICT") 2986 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2987 2988 if not conflict and not duplicate: 2989 return None 2990 2991 conflict_keys = None 2992 constraint = None 2993 2994 if conflict: 2995 if self._match_text_seq("ON", "CONSTRAINT"): 2996 constraint = self._parse_id_var() 2997 elif self._match(TokenType.L_PAREN): 2998 conflict_keys = self._parse_csv(self._parse_id_var) 2999 self._match_r_paren() 3000 3001 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3002 if self._prev.token_type == TokenType.UPDATE: 3003 self._match(TokenType.SET) 3004 expressions = self._parse_csv(self._parse_equality) 3005 else: 3006 expressions = None 3007 3008 return self.expression( 3009 exp.OnConflict, 3010 duplicate=duplicate, 3011 expressions=expressions, 3012 action=action, 3013 conflict_keys=conflict_keys, 3014 constraint=constraint, 3015 where=self._parse_where(), 3016 ) 3017 3018 def _parse_returning(self) -> t.Optional[exp.Returning]: 3019 if not self._match(TokenType.RETURNING): 3020 return None 3021 return self.expression( 3022 exp.Returning, 3023 expressions=self._parse_csv(self._parse_expression), 3024 into=self._match(TokenType.INTO) and self._parse_table_part(), 3025 ) 3026 3027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3028 if not self._match(TokenType.FORMAT): 3029 return None 3030 return self._parse_row_format() 3031 3032 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3033 index = self._index 3034 with_ = with_ or self._match_text_seq("WITH") 3035 3036 if not self._match(TokenType.SERDE_PROPERTIES): 3037 self._retreat(index) 3038 return None 3039 return self.expression( 3040 exp.SerdeProperties, 3041 **{ # type: ignore 3042 "expressions": self._parse_wrapped_properties(), 3043 "with": with_, 3044 }, 3045 ) 3046 3047 def _parse_row_format( 3048 self, match_row: bool = False 3049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3051 return None 3052 3053 if self._match_text_seq("SERDE"): 3054 this = self._parse_string() 3055 3056 serde_properties = self._parse_serde_properties() 3057 3058 return self.expression( 3059 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3060 ) 3061 3062 self._match_text_seq("DELIMITED") 3063 3064 kwargs = {} 3065 3066 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3067 kwargs["fields"] = self._parse_string() 3068 if self._match_text_seq("ESCAPED", "BY"): 3069 kwargs["escaped"] = self._parse_string() 3070 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3071 kwargs["collection_items"] = self._parse_string() 3072 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3073 kwargs["map_keys"] = self._parse_string() 3074 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3075 kwargs["lines"] = self._parse_string() 3076 if self._match_text_seq("NULL", "DEFINED", "AS"): 3077 kwargs["null"] = self._parse_string() 3078 3079 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3080 3081 def _parse_load(self) -> exp.LoadData | exp.Command: 3082 if self._match_text_seq("DATA"): 3083 local = self._match_text_seq("LOCAL") 3084 self._match_text_seq("INPATH") 3085 inpath = self._parse_string() 3086 overwrite = self._match(TokenType.OVERWRITE) 3087 self._match_pair(TokenType.INTO, TokenType.TABLE) 3088 3089 return self.expression( 3090 exp.LoadData, 3091 this=self._parse_table(schema=True), 3092 local=local, 3093 overwrite=overwrite, 3094 inpath=inpath, 3095 partition=self._parse_partition(), 3096 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3097 serde=self._match_text_seq("SERDE") and self._parse_string(), 3098 ) 3099 return self._parse_as_command(self._prev) 3100 3101 def _parse_delete(self) -> exp.Delete: 3102 # This handles MySQL's "Multiple-Table Syntax" 3103 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3104 tables = None 3105 if not self._match(TokenType.FROM, advance=False): 3106 tables = self._parse_csv(self._parse_table) or None 3107 3108 returning = self._parse_returning() 3109 3110 return self.expression( 3111 exp.Delete, 3112 tables=tables, 3113 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3114 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3115 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3116 where=self._parse_where(), 3117 returning=returning or self._parse_returning(), 3118 limit=self._parse_limit(), 3119 ) 3120 3121 def _parse_update(self) -> exp.Update: 3122 kwargs: t.Dict[str, t.Any] = { 3123 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3124 } 3125 while self._curr: 3126 if self._match(TokenType.SET): 3127 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3128 elif self._match(TokenType.RETURNING, advance=False): 3129 kwargs["returning"] = self._parse_returning() 3130 elif self._match(TokenType.FROM, advance=False): 3131 kwargs["from"] = self._parse_from(joins=True) 3132 elif self._match(TokenType.WHERE, advance=False): 3133 kwargs["where"] = self._parse_where() 3134 elif self._match(TokenType.ORDER_BY, advance=False): 3135 kwargs["order"] = self._parse_order() 3136 elif self._match(TokenType.LIMIT, advance=False): 3137 kwargs["limit"] = self._parse_limit() 3138 else: 3139 break 3140 3141 return self.expression(exp.Update, **kwargs) 3142 3143 def _parse_use(self) -> exp.Use: 3144 return self.expression( 3145 exp.Use, 3146 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3147 this=self._parse_table(schema=False), 3148 ) 3149 3150 def _parse_uncache(self) -> exp.Uncache: 3151 if not self._match(TokenType.TABLE): 3152 self.raise_error("Expecting TABLE after UNCACHE") 3153 3154 return self.expression( 3155 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3156 ) 3157 3158 def _parse_cache(self) -> exp.Cache: 3159 lazy = self._match_text_seq("LAZY") 3160 self._match(TokenType.TABLE) 3161 table = self._parse_table(schema=True) 3162 3163 options = [] 3164 if self._match_text_seq("OPTIONS"): 3165 self._match_l_paren() 3166 k = self._parse_string() 3167 self._match(TokenType.EQ) 3168 v = self._parse_string() 3169 options = [k, v] 3170 self._match_r_paren() 3171 3172 self._match(TokenType.ALIAS) 3173 return self.expression( 3174 exp.Cache, 3175 this=table, 3176 lazy=lazy, 3177 options=options, 3178 expression=self._parse_select(nested=True), 3179 ) 3180 3181 def _parse_partition(self) -> t.Optional[exp.Partition]: 3182 if not self._match_texts(self.PARTITION_KEYWORDS): 3183 return None 3184 3185 return self.expression( 3186 exp.Partition, 3187 subpartition=self._prev.text.upper() == "SUBPARTITION", 3188 expressions=self._parse_wrapped_csv(self._parse_assignment), 3189 ) 3190 3191 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3192 def _parse_value_expression() -> t.Optional[exp.Expression]: 3193 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3194 return exp.var(self._prev.text.upper()) 3195 return self._parse_expression() 3196 3197 if self._match(TokenType.L_PAREN): 3198 expressions = self._parse_csv(_parse_value_expression) 3199 self._match_r_paren() 3200 return self.expression(exp.Tuple, expressions=expressions) 3201 3202 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3203 expression = self._parse_expression() 3204 if expression: 3205 return self.expression(exp.Tuple, expressions=[expression]) 3206 return None 3207 3208 def _parse_projections(self) -> t.List[exp.Expression]: 3209 return self._parse_expressions() 3210 3211 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3212 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3213 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3214 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3215 ) 3216 elif self._match(TokenType.FROM): 3217 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3218 # Support parentheses for duckdb FROM-first syntax 3219 select = self._parse_select(from_=from_) 3220 if select: 3221 if not select.args.get("from"): 3222 select.set("from", from_) 3223 this = select 3224 else: 3225 this = exp.select("*").from_(t.cast(exp.From, from_)) 3226 else: 3227 this = ( 3228 self._parse_table(consume_pipe=True) 3229 if table 3230 else self._parse_select(nested=True, parse_set_operation=False) 3231 ) 3232 3233 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3234 # in case a modifier (e.g. join) is following 3235 if table and isinstance(this, exp.Values) and this.alias: 3236 alias = this.args["alias"].pop() 3237 this = exp.Table(this=this, alias=alias) 3238 3239 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3240 3241 return this 3242 3243 def _parse_select( 3244 self, 3245 nested: bool = False, 3246 table: bool = False, 3247 parse_subquery_alias: bool = True, 3248 parse_set_operation: bool = True, 3249 consume_pipe: bool = True, 3250 from_: t.Optional[exp.From] = None, 3251 ) -> t.Optional[exp.Expression]: 3252 query = self._parse_select_query( 3253 nested=nested, 3254 table=table, 3255 parse_subquery_alias=parse_subquery_alias, 3256 parse_set_operation=parse_set_operation, 3257 ) 3258 3259 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3260 if not query and from_: 3261 query = exp.select("*").from_(from_) 3262 if isinstance(query, exp.Query): 3263 query = self._parse_pipe_syntax_query(query) 3264 query = query.subquery(copy=False) if query and table else query 3265 3266 return query 3267 3268 def _parse_select_query( 3269 self, 3270 nested: bool = False, 3271 table: bool = False, 3272 parse_subquery_alias: bool = True, 3273 parse_set_operation: bool = True, 3274 ) -> t.Optional[exp.Expression]: 3275 cte = self._parse_with() 3276 3277 if cte: 3278 this = self._parse_statement() 3279 3280 if not this: 3281 self.raise_error("Failed to parse any statement following CTE") 3282 return cte 3283 3284 if "with" in this.arg_types: 3285 this.set("with", cte) 3286 else: 3287 self.raise_error(f"{this.key} does not support CTE") 3288 this = cte 3289 3290 return this 3291 3292 # duckdb supports leading with FROM x 3293 from_ = ( 3294 self._parse_from(consume_pipe=True) 3295 if self._match(TokenType.FROM, advance=False) 3296 else None 3297 ) 3298 3299 if self._match(TokenType.SELECT): 3300 comments = self._prev_comments 3301 3302 hint = self._parse_hint() 3303 3304 if self._next and not self._next.token_type == TokenType.DOT: 3305 all_ = self._match(TokenType.ALL) 3306 distinct = self._match_set(self.DISTINCT_TOKENS) 3307 else: 3308 all_, distinct = None, None 3309 3310 kind = ( 3311 self._match(TokenType.ALIAS) 3312 and self._match_texts(("STRUCT", "VALUE")) 3313 and self._prev.text.upper() 3314 ) 3315 3316 if distinct: 3317 distinct = self.expression( 3318 exp.Distinct, 3319 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3320 ) 3321 3322 if all_ and distinct: 3323 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3324 3325 operation_modifiers = [] 3326 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3327 operation_modifiers.append(exp.var(self._prev.text.upper())) 3328 3329 limit = self._parse_limit(top=True) 3330 projections = self._parse_projections() 3331 3332 this = self.expression( 3333 exp.Select, 3334 kind=kind, 3335 hint=hint, 3336 distinct=distinct, 3337 expressions=projections, 3338 limit=limit, 3339 operation_modifiers=operation_modifiers or None, 3340 ) 3341 this.comments = comments 3342 3343 into = self._parse_into() 3344 if into: 3345 this.set("into", into) 3346 3347 if not from_: 3348 from_ = self._parse_from() 3349 3350 if from_: 3351 this.set("from", from_) 3352 3353 this = self._parse_query_modifiers(this) 3354 elif (table or nested) and self._match(TokenType.L_PAREN): 3355 this = self._parse_wrapped_select(table=table) 3356 3357 # We return early here so that the UNION isn't attached to the subquery by the 3358 # following call to _parse_set_operations, but instead becomes the parent node 3359 self._match_r_paren() 3360 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3361 elif self._match(TokenType.VALUES, advance=False): 3362 this = self._parse_derived_table_values() 3363 elif from_: 3364 this = exp.select("*").from_(from_.this, copy=False) 3365 elif self._match(TokenType.SUMMARIZE): 3366 table = self._match(TokenType.TABLE) 3367 this = self._parse_select() or self._parse_string() or self._parse_table() 3368 return self.expression(exp.Summarize, this=this, table=table) 3369 elif self._match(TokenType.DESCRIBE): 3370 this = self._parse_describe() 3371 elif self._match_text_seq("STREAM"): 3372 this = self._parse_function() 3373 if this: 3374 this = self.expression(exp.Stream, this=this) 3375 else: 3376 self._retreat(self._index - 1) 3377 else: 3378 this = None 3379 3380 return self._parse_set_operations(this) if parse_set_operation else this 3381 3382 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3383 self._match_text_seq("SEARCH") 3384 3385 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3386 3387 if not kind: 3388 return None 3389 3390 self._match_text_seq("FIRST", "BY") 3391 3392 return self.expression( 3393 exp.RecursiveWithSearch, 3394 kind=kind, 3395 this=self._parse_id_var(), 3396 expression=self._match_text_seq("SET") and self._parse_id_var(), 3397 using=self._match_text_seq("USING") and self._parse_id_var(), 3398 ) 3399 3400 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3401 if not skip_with_token and not self._match(TokenType.WITH): 3402 return None 3403 3404 comments = self._prev_comments 3405 recursive = self._match(TokenType.RECURSIVE) 3406 3407 last_comments = None 3408 expressions = [] 3409 while True: 3410 cte = self._parse_cte() 3411 if isinstance(cte, exp.CTE): 3412 expressions.append(cte) 3413 if last_comments: 3414 cte.add_comments(last_comments) 3415 3416 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3417 break 3418 else: 3419 self._match(TokenType.WITH) 3420 3421 last_comments = self._prev_comments 3422 3423 return self.expression( 3424 exp.With, 3425 comments=comments, 3426 expressions=expressions, 3427 recursive=recursive, 3428 search=self._parse_recursive_with_search(), 3429 ) 3430 3431 def _parse_cte(self) -> t.Optional[exp.CTE]: 3432 index = self._index 3433 3434 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3435 if not alias or not alias.this: 3436 self.raise_error("Expected CTE to have alias") 3437 3438 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3439 self._retreat(index) 3440 return None 3441 3442 comments = self._prev_comments 3443 3444 if self._match_text_seq("NOT", "MATERIALIZED"): 3445 materialized = False 3446 elif self._match_text_seq("MATERIALIZED"): 3447 materialized = True 3448 else: 3449 materialized = None 3450 3451 cte = self.expression( 3452 exp.CTE, 3453 this=self._parse_wrapped(self._parse_statement), 3454 alias=alias, 3455 materialized=materialized, 3456 comments=comments, 3457 ) 3458 3459 values = cte.this 3460 if isinstance(values, exp.Values): 3461 if values.alias: 3462 cte.set("this", exp.select("*").from_(values)) 3463 else: 3464 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3465 3466 return cte 3467 3468 def _parse_table_alias( 3469 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3470 ) -> t.Optional[exp.TableAlias]: 3471 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3472 # so this section tries to parse the clause version and if it fails, it treats the token 3473 # as an identifier (alias) 3474 if self._can_parse_limit_or_offset(): 3475 return None 3476 3477 any_token = self._match(TokenType.ALIAS) 3478 alias = ( 3479 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3480 or self._parse_string_as_identifier() 3481 ) 3482 3483 index = self._index 3484 if self._match(TokenType.L_PAREN): 3485 columns = self._parse_csv(self._parse_function_parameter) 3486 self._match_r_paren() if columns else self._retreat(index) 3487 else: 3488 columns = None 3489 3490 if not alias and not columns: 3491 return None 3492 3493 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3494 3495 # We bubble up comments from the Identifier to the TableAlias 3496 if isinstance(alias, exp.Identifier): 3497 table_alias.add_comments(alias.pop_comments()) 3498 3499 return table_alias 3500 3501 def _parse_subquery( 3502 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3503 ) -> t.Optional[exp.Subquery]: 3504 if not this: 3505 return None 3506 3507 return self.expression( 3508 exp.Subquery, 3509 this=this, 3510 pivots=self._parse_pivots(), 3511 alias=self._parse_table_alias() if parse_alias else None, 3512 sample=self._parse_table_sample(), 3513 ) 3514 3515 def _implicit_unnests_to_explicit(self, this: E) -> E: 3516 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3517 3518 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3519 for i, join in enumerate(this.args.get("joins") or []): 3520 table = join.this 3521 normalized_table = table.copy() 3522 normalized_table.meta["maybe_column"] = True 3523 normalized_table = _norm(normalized_table, dialect=self.dialect) 3524 3525 if isinstance(table, exp.Table) and not join.args.get("on"): 3526 if normalized_table.parts[0].name in refs: 3527 table_as_column = table.to_column() 3528 unnest = exp.Unnest(expressions=[table_as_column]) 3529 3530 # Table.to_column creates a parent Alias node that we want to convert to 3531 # a TableAlias and attach to the Unnest, so it matches the parser's output 3532 if isinstance(table.args.get("alias"), exp.TableAlias): 3533 table_as_column.replace(table_as_column.this) 3534 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3535 3536 table.replace(unnest) 3537 3538 refs.add(normalized_table.alias_or_name) 3539 3540 return this 3541 3542 @t.overload 3543 def _parse_query_modifiers(self, this: E) -> E: ... 3544 3545 @t.overload 3546 def _parse_query_modifiers(self, this: None) -> None: ... 3547 3548 def _parse_query_modifiers(self, this): 3549 if isinstance(this, self.MODIFIABLES): 3550 for join in self._parse_joins(): 3551 this.append("joins", join) 3552 for lateral in iter(self._parse_lateral, None): 3553 this.append("laterals", lateral) 3554 3555 while True: 3556 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3557 modifier_token = self._curr 3558 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3559 key, expression = parser(self) 3560 3561 if expression: 3562 if this.args.get(key): 3563 self.raise_error( 3564 f"Found multiple '{modifier_token.text.upper()}' clauses", 3565 token=modifier_token, 3566 ) 3567 3568 this.set(key, expression) 3569 if key == "limit": 3570 offset = expression.args.pop("offset", None) 3571 3572 if offset: 3573 offset = exp.Offset(expression=offset) 3574 this.set("offset", offset) 3575 3576 limit_by_expressions = expression.expressions 3577 expression.set("expressions", None) 3578 offset.set("expressions", limit_by_expressions) 3579 continue 3580 break 3581 3582 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3583 this = self._implicit_unnests_to_explicit(this) 3584 3585 return this 3586 3587 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3588 start = self._curr 3589 while self._curr: 3590 self._advance() 3591 3592 end = self._tokens[self._index - 1] 3593 return exp.Hint(expressions=[self._find_sql(start, end)]) 3594 3595 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3596 return self._parse_function_call() 3597 3598 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3599 start_index = self._index 3600 should_fallback_to_string = False 3601 3602 hints = [] 3603 try: 3604 for hint in iter( 3605 lambda: self._parse_csv( 3606 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3607 ), 3608 [], 3609 ): 3610 hints.extend(hint) 3611 except ParseError: 3612 should_fallback_to_string = True 3613 3614 if should_fallback_to_string or self._curr: 3615 self._retreat(start_index) 3616 return self._parse_hint_fallback_to_string() 3617 3618 return self.expression(exp.Hint, expressions=hints) 3619 3620 def _parse_hint(self) -> t.Optional[exp.Hint]: 3621 if self._match(TokenType.HINT) and self._prev_comments: 3622 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3623 3624 return None 3625 3626 def _parse_into(self) -> t.Optional[exp.Into]: 3627 if not self._match(TokenType.INTO): 3628 return None 3629 3630 temp = self._match(TokenType.TEMPORARY) 3631 unlogged = self._match_text_seq("UNLOGGED") 3632 self._match(TokenType.TABLE) 3633 3634 return self.expression( 3635 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3636 ) 3637 3638 def _parse_from( 3639 self, 3640 joins: bool = False, 3641 skip_from_token: bool = False, 3642 consume_pipe: bool = False, 3643 ) -> t.Optional[exp.From]: 3644 if not skip_from_token and not self._match(TokenType.FROM): 3645 return None 3646 3647 return self.expression( 3648 exp.From, 3649 comments=self._prev_comments, 3650 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3651 ) 3652 3653 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3654 return self.expression( 3655 exp.MatchRecognizeMeasure, 3656 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3657 this=self._parse_expression(), 3658 ) 3659 3660 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3661 if not self._match(TokenType.MATCH_RECOGNIZE): 3662 return None 3663 3664 self._match_l_paren() 3665 3666 partition = self._parse_partition_by() 3667 order = self._parse_order() 3668 3669 measures = ( 3670 self._parse_csv(self._parse_match_recognize_measure) 3671 if self._match_text_seq("MEASURES") 3672 else None 3673 ) 3674 3675 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3676 rows = exp.var("ONE ROW PER MATCH") 3677 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3678 text = "ALL ROWS PER MATCH" 3679 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3680 text += " SHOW EMPTY MATCHES" 3681 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3682 text += " OMIT EMPTY MATCHES" 3683 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3684 text += " WITH UNMATCHED ROWS" 3685 rows = exp.var(text) 3686 else: 3687 rows = None 3688 3689 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3690 text = "AFTER MATCH SKIP" 3691 if self._match_text_seq("PAST", "LAST", "ROW"): 3692 text += " PAST LAST ROW" 3693 elif self._match_text_seq("TO", "NEXT", "ROW"): 3694 text += " TO NEXT ROW" 3695 elif self._match_text_seq("TO", "FIRST"): 3696 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3697 elif self._match_text_seq("TO", "LAST"): 3698 text += f" TO LAST {self._advance_any().text}" # type: ignore 3699 after = exp.var(text) 3700 else: 3701 after = None 3702 3703 if self._match_text_seq("PATTERN"): 3704 self._match_l_paren() 3705 3706 if not self._curr: 3707 self.raise_error("Expecting )", self._curr) 3708 3709 paren = 1 3710 start = self._curr 3711 3712 while self._curr and paren > 0: 3713 if self._curr.token_type == TokenType.L_PAREN: 3714 paren += 1 3715 if self._curr.token_type == TokenType.R_PAREN: 3716 paren -= 1 3717 3718 end = self._prev 3719 self._advance() 3720 3721 if paren > 0: 3722 self.raise_error("Expecting )", self._curr) 3723 3724 pattern = exp.var(self._find_sql(start, end)) 3725 else: 3726 pattern = None 3727 3728 define = ( 3729 self._parse_csv(self._parse_name_as_expression) 3730 if self._match_text_seq("DEFINE") 3731 else None 3732 ) 3733 3734 self._match_r_paren() 3735 3736 return self.expression( 3737 exp.MatchRecognize, 3738 partition_by=partition, 3739 order=order, 3740 measures=measures, 3741 rows=rows, 3742 after=after, 3743 pattern=pattern, 3744 define=define, 3745 alias=self._parse_table_alias(), 3746 ) 3747 3748 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3749 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3750 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3751 cross_apply = False 3752 3753 if cross_apply is not None: 3754 this = self._parse_select(table=True) 3755 view = None 3756 outer = None 3757 elif self._match(TokenType.LATERAL): 3758 this = self._parse_select(table=True) 3759 view = self._match(TokenType.VIEW) 3760 outer = self._match(TokenType.OUTER) 3761 else: 3762 return None 3763 3764 if not this: 3765 this = ( 3766 self._parse_unnest() 3767 or self._parse_function() 3768 or self._parse_id_var(any_token=False) 3769 ) 3770 3771 while self._match(TokenType.DOT): 3772 this = exp.Dot( 3773 this=this, 3774 expression=self._parse_function() or self._parse_id_var(any_token=False), 3775 ) 3776 3777 ordinality: t.Optional[bool] = None 3778 3779 if view: 3780 table = self._parse_id_var(any_token=False) 3781 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3782 table_alias: t.Optional[exp.TableAlias] = self.expression( 3783 exp.TableAlias, this=table, columns=columns 3784 ) 3785 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3786 # We move the alias from the lateral's child node to the lateral itself 3787 table_alias = this.args["alias"].pop() 3788 else: 3789 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3790 table_alias = self._parse_table_alias() 3791 3792 return self.expression( 3793 exp.Lateral, 3794 this=this, 3795 view=view, 3796 outer=outer, 3797 alias=table_alias, 3798 cross_apply=cross_apply, 3799 ordinality=ordinality, 3800 ) 3801 3802 def _parse_join_parts( 3803 self, 3804 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3805 return ( 3806 self._match_set(self.JOIN_METHODS) and self._prev, 3807 self._match_set(self.JOIN_SIDES) and self._prev, 3808 self._match_set(self.JOIN_KINDS) and self._prev, 3809 ) 3810 3811 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3812 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3813 this = self._parse_column() 3814 if isinstance(this, exp.Column): 3815 return this.this 3816 return this 3817 3818 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3819 3820 def _parse_join( 3821 self, skip_join_token: bool = False, parse_bracket: bool = False 3822 ) -> t.Optional[exp.Join]: 3823 if self._match(TokenType.COMMA): 3824 table = self._try_parse(self._parse_table) 3825 cross_join = self.expression(exp.Join, this=table) if table else None 3826 3827 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3828 cross_join.set("kind", "CROSS") 3829 3830 return cross_join 3831 3832 index = self._index 3833 method, side, kind = self._parse_join_parts() 3834 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3835 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3836 join_comments = self._prev_comments 3837 3838 if not skip_join_token and not join: 3839 self._retreat(index) 3840 kind = None 3841 method = None 3842 side = None 3843 3844 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3845 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3846 3847 if not skip_join_token and not join and not outer_apply and not cross_apply: 3848 return None 3849 3850 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3851 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3852 kwargs["expressions"] = self._parse_csv( 3853 lambda: self._parse_table(parse_bracket=parse_bracket) 3854 ) 3855 3856 if method: 3857 kwargs["method"] = method.text 3858 if side: 3859 kwargs["side"] = side.text 3860 if kind: 3861 kwargs["kind"] = kind.text 3862 if hint: 3863 kwargs["hint"] = hint 3864 3865 if self._match(TokenType.MATCH_CONDITION): 3866 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3867 3868 if self._match(TokenType.ON): 3869 kwargs["on"] = self._parse_assignment() 3870 elif self._match(TokenType.USING): 3871 kwargs["using"] = self._parse_using_identifiers() 3872 elif ( 3873 not method 3874 and not (outer_apply or cross_apply) 3875 and not isinstance(kwargs["this"], exp.Unnest) 3876 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3877 ): 3878 index = self._index 3879 joins: t.Optional[list] = list(self._parse_joins()) 3880 3881 if joins and self._match(TokenType.ON): 3882 kwargs["on"] = self._parse_assignment() 3883 elif joins and self._match(TokenType.USING): 3884 kwargs["using"] = self._parse_using_identifiers() 3885 else: 3886 joins = None 3887 self._retreat(index) 3888 3889 kwargs["this"].set("joins", joins if joins else None) 3890 3891 kwargs["pivots"] = self._parse_pivots() 3892 3893 comments = [c for token in (method, side, kind) if token for c in token.comments] 3894 comments = (join_comments or []) + comments 3895 3896 if ( 3897 self.ADD_JOIN_ON_TRUE 3898 and not kwargs.get("on") 3899 and not kwargs.get("using") 3900 and not kwargs.get("method") 3901 and kwargs.get("kind") in (None, "INNER", "OUTER") 3902 ): 3903 kwargs["on"] = exp.true() 3904 3905 return self.expression(exp.Join, comments=comments, **kwargs) 3906 3907 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3908 this = self._parse_assignment() 3909 3910 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3911 return this 3912 3913 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3914 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3915 3916 return this 3917 3918 def _parse_index_params(self) -> exp.IndexParameters: 3919 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3920 3921 if self._match(TokenType.L_PAREN, advance=False): 3922 columns = self._parse_wrapped_csv(self._parse_with_operator) 3923 else: 3924 columns = None 3925 3926 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3927 partition_by = self._parse_partition_by() 3928 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3929 tablespace = ( 3930 self._parse_var(any_token=True) 3931 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3932 else None 3933 ) 3934 where = self._parse_where() 3935 3936 on = self._parse_field() if self._match(TokenType.ON) else None 3937 3938 return self.expression( 3939 exp.IndexParameters, 3940 using=using, 3941 columns=columns, 3942 include=include, 3943 partition_by=partition_by, 3944 where=where, 3945 with_storage=with_storage, 3946 tablespace=tablespace, 3947 on=on, 3948 ) 3949 3950 def _parse_index( 3951 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3952 ) -> t.Optional[exp.Index]: 3953 if index or anonymous: 3954 unique = None 3955 primary = None 3956 amp = None 3957 3958 self._match(TokenType.ON) 3959 self._match(TokenType.TABLE) # hive 3960 table = self._parse_table_parts(schema=True) 3961 else: 3962 unique = self._match(TokenType.UNIQUE) 3963 primary = self._match_text_seq("PRIMARY") 3964 amp = self._match_text_seq("AMP") 3965 3966 if not self._match(TokenType.INDEX): 3967 return None 3968 3969 index = self._parse_id_var() 3970 table = None 3971 3972 params = self._parse_index_params() 3973 3974 return self.expression( 3975 exp.Index, 3976 this=index, 3977 table=table, 3978 unique=unique, 3979 primary=primary, 3980 amp=amp, 3981 params=params, 3982 ) 3983 3984 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3985 hints: t.List[exp.Expression] = [] 3986 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3987 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3988 hints.append( 3989 self.expression( 3990 exp.WithTableHint, 3991 expressions=self._parse_csv( 3992 lambda: self._parse_function() or self._parse_var(any_token=True) 3993 ), 3994 ) 3995 ) 3996 self._match_r_paren() 3997 else: 3998 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3999 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4000 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4001 4002 self._match_set((TokenType.INDEX, TokenType.KEY)) 4003 if self._match(TokenType.FOR): 4004 hint.set("target", self._advance_any() and self._prev.text.upper()) 4005 4006 hint.set("expressions", self._parse_wrapped_id_vars()) 4007 hints.append(hint) 4008 4009 return hints or None 4010 4011 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4012 return ( 4013 (not schema and self._parse_function(optional_parens=False)) 4014 or self._parse_id_var(any_token=False) 4015 or self._parse_string_as_identifier() 4016 or self._parse_placeholder() 4017 ) 4018 4019 def _parse_table_parts( 4020 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4021 ) -> exp.Table: 4022 catalog = None 4023 db = None 4024 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4025 4026 while self._match(TokenType.DOT): 4027 if catalog: 4028 # This allows nesting the table in arbitrarily many dot expressions if needed 4029 table = self.expression( 4030 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4031 ) 4032 else: 4033 catalog = db 4034 db = table 4035 # "" used for tsql FROM a..b case 4036 table = self._parse_table_part(schema=schema) or "" 4037 4038 if ( 4039 wildcard 4040 and self._is_connected() 4041 and (isinstance(table, exp.Identifier) or not table) 4042 and self._match(TokenType.STAR) 4043 ): 4044 if isinstance(table, exp.Identifier): 4045 table.args["this"] += "*" 4046 else: 4047 table = exp.Identifier(this="*") 4048 4049 # We bubble up comments from the Identifier to the Table 4050 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4051 4052 if is_db_reference: 4053 catalog = db 4054 db = table 4055 table = None 4056 4057 if not table and not is_db_reference: 4058 self.raise_error(f"Expected table name but got {self._curr}") 4059 if not db and is_db_reference: 4060 self.raise_error(f"Expected database name but got {self._curr}") 4061 4062 table = self.expression( 4063 exp.Table, 4064 comments=comments, 4065 this=table, 4066 db=db, 4067 catalog=catalog, 4068 ) 4069 4070 changes = self._parse_changes() 4071 if changes: 4072 table.set("changes", changes) 4073 4074 at_before = self._parse_historical_data() 4075 if at_before: 4076 table.set("when", at_before) 4077 4078 pivots = self._parse_pivots() 4079 if pivots: 4080 table.set("pivots", pivots) 4081 4082 return table 4083 4084 def _parse_table( 4085 self, 4086 schema: bool = False, 4087 joins: bool = False, 4088 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4089 parse_bracket: bool = False, 4090 is_db_reference: bool = False, 4091 parse_partition: bool = False, 4092 consume_pipe: bool = False, 4093 ) -> t.Optional[exp.Expression]: 4094 lateral = self._parse_lateral() 4095 if lateral: 4096 return lateral 4097 4098 unnest = self._parse_unnest() 4099 if unnest: 4100 return unnest 4101 4102 values = self._parse_derived_table_values() 4103 if values: 4104 return values 4105 4106 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4107 if subquery: 4108 if not subquery.args.get("pivots"): 4109 subquery.set("pivots", self._parse_pivots()) 4110 return subquery 4111 4112 bracket = parse_bracket and self._parse_bracket(None) 4113 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4114 4115 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4116 self._parse_table 4117 ) 4118 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4119 4120 only = self._match(TokenType.ONLY) 4121 4122 this = t.cast( 4123 exp.Expression, 4124 bracket 4125 or rows_from 4126 or self._parse_bracket( 4127 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4128 ), 4129 ) 4130 4131 if only: 4132 this.set("only", only) 4133 4134 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4135 self._match_text_seq("*") 4136 4137 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4138 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4139 this.set("partition", self._parse_partition()) 4140 4141 if schema: 4142 return self._parse_schema(this=this) 4143 4144 version = self._parse_version() 4145 4146 if version: 4147 this.set("version", version) 4148 4149 if self.dialect.ALIAS_POST_TABLESAMPLE: 4150 this.set("sample", self._parse_table_sample()) 4151 4152 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4153 if alias: 4154 this.set("alias", alias) 4155 4156 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4157 return self.expression( 4158 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4159 ) 4160 4161 this.set("hints", self._parse_table_hints()) 4162 4163 if not this.args.get("pivots"): 4164 this.set("pivots", self._parse_pivots()) 4165 4166 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4167 this.set("sample", self._parse_table_sample()) 4168 4169 if joins: 4170 for join in self._parse_joins(): 4171 this.append("joins", join) 4172 4173 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4174 this.set("ordinality", True) 4175 this.set("alias", self._parse_table_alias()) 4176 4177 return this 4178 4179 def _parse_version(self) -> t.Optional[exp.Version]: 4180 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4181 this = "TIMESTAMP" 4182 elif self._match(TokenType.VERSION_SNAPSHOT): 4183 this = "VERSION" 4184 else: 4185 return None 4186 4187 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4188 kind = self._prev.text.upper() 4189 start = self._parse_bitwise() 4190 self._match_texts(("TO", "AND")) 4191 end = self._parse_bitwise() 4192 expression: t.Optional[exp.Expression] = self.expression( 4193 exp.Tuple, expressions=[start, end] 4194 ) 4195 elif self._match_text_seq("CONTAINED", "IN"): 4196 kind = "CONTAINED IN" 4197 expression = self.expression( 4198 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4199 ) 4200 elif self._match(TokenType.ALL): 4201 kind = "ALL" 4202 expression = None 4203 else: 4204 self._match_text_seq("AS", "OF") 4205 kind = "AS OF" 4206 expression = self._parse_type() 4207 4208 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4209 4210 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4211 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4212 index = self._index 4213 historical_data = None 4214 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4215 this = self._prev.text.upper() 4216 kind = ( 4217 self._match(TokenType.L_PAREN) 4218 and self._match_texts(self.HISTORICAL_DATA_KIND) 4219 and self._prev.text.upper() 4220 ) 4221 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4222 4223 if expression: 4224 self._match_r_paren() 4225 historical_data = self.expression( 4226 exp.HistoricalData, this=this, kind=kind, expression=expression 4227 ) 4228 else: 4229 self._retreat(index) 4230 4231 return historical_data 4232 4233 def _parse_changes(self) -> t.Optional[exp.Changes]: 4234 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4235 return None 4236 4237 information = self._parse_var(any_token=True) 4238 self._match_r_paren() 4239 4240 return self.expression( 4241 exp.Changes, 4242 information=information, 4243 at_before=self._parse_historical_data(), 4244 end=self._parse_historical_data(), 4245 ) 4246 4247 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4248 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4249 return None 4250 4251 self._advance() 4252 4253 expressions = self._parse_wrapped_csv(self._parse_equality) 4254 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4255 4256 alias = self._parse_table_alias() if with_alias else None 4257 4258 if alias: 4259 if self.dialect.UNNEST_COLUMN_ONLY: 4260 if alias.args.get("columns"): 4261 self.raise_error("Unexpected extra column alias in unnest.") 4262 4263 alias.set("columns", [alias.this]) 4264 alias.set("this", None) 4265 4266 columns = alias.args.get("columns") or [] 4267 if offset and len(expressions) < len(columns): 4268 offset = columns.pop() 4269 4270 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4271 self._match(TokenType.ALIAS) 4272 offset = self._parse_id_var( 4273 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4274 ) or exp.to_identifier("offset") 4275 4276 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4277 4278 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4279 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4280 if not is_derived and not ( 4281 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4282 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4283 ): 4284 return None 4285 4286 expressions = self._parse_csv(self._parse_value) 4287 alias = self._parse_table_alias() 4288 4289 if is_derived: 4290 self._match_r_paren() 4291 4292 return self.expression( 4293 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4294 ) 4295 4296 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4297 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4298 as_modifier and self._match_text_seq("USING", "SAMPLE") 4299 ): 4300 return None 4301 4302 bucket_numerator = None 4303 bucket_denominator = None 4304 bucket_field = None 4305 percent = None 4306 size = None 4307 seed = None 4308 4309 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4310 matched_l_paren = self._match(TokenType.L_PAREN) 4311 4312 if self.TABLESAMPLE_CSV: 4313 num = None 4314 expressions = self._parse_csv(self._parse_primary) 4315 else: 4316 expressions = None 4317 num = ( 4318 self._parse_factor() 4319 if self._match(TokenType.NUMBER, advance=False) 4320 else self._parse_primary() or self._parse_placeholder() 4321 ) 4322 4323 if self._match_text_seq("BUCKET"): 4324 bucket_numerator = self._parse_number() 4325 self._match_text_seq("OUT", "OF") 4326 bucket_denominator = bucket_denominator = self._parse_number() 4327 self._match(TokenType.ON) 4328 bucket_field = self._parse_field() 4329 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4330 percent = num 4331 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4332 size = num 4333 else: 4334 percent = num 4335 4336 if matched_l_paren: 4337 self._match_r_paren() 4338 4339 if self._match(TokenType.L_PAREN): 4340 method = self._parse_var(upper=True) 4341 seed = self._match(TokenType.COMMA) and self._parse_number() 4342 self._match_r_paren() 4343 elif self._match_texts(("SEED", "REPEATABLE")): 4344 seed = self._parse_wrapped(self._parse_number) 4345 4346 if not method and self.DEFAULT_SAMPLING_METHOD: 4347 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4348 4349 return self.expression( 4350 exp.TableSample, 4351 expressions=expressions, 4352 method=method, 4353 bucket_numerator=bucket_numerator, 4354 bucket_denominator=bucket_denominator, 4355 bucket_field=bucket_field, 4356 percent=percent, 4357 size=size, 4358 seed=seed, 4359 ) 4360 4361 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4362 return list(iter(self._parse_pivot, None)) or None 4363 4364 def _parse_joins(self) -> t.Iterator[exp.Join]: 4365 return iter(self._parse_join, None) 4366 4367 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4368 if not self._match(TokenType.INTO): 4369 return None 4370 4371 return self.expression( 4372 exp.UnpivotColumns, 4373 this=self._match_text_seq("NAME") and self._parse_column(), 4374 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4375 ) 4376 4377 # https://duckdb.org/docs/sql/statements/pivot 4378 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4379 def _parse_on() -> t.Optional[exp.Expression]: 4380 this = self._parse_bitwise() 4381 4382 if self._match(TokenType.IN): 4383 # PIVOT ... ON col IN (row_val1, row_val2) 4384 return self._parse_in(this) 4385 if self._match(TokenType.ALIAS, advance=False): 4386 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4387 return self._parse_alias(this) 4388 4389 return this 4390 4391 this = self._parse_table() 4392 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4393 into = self._parse_unpivot_columns() 4394 using = self._match(TokenType.USING) and self._parse_csv( 4395 lambda: self._parse_alias(self._parse_function()) 4396 ) 4397 group = self._parse_group() 4398 4399 return self.expression( 4400 exp.Pivot, 4401 this=this, 4402 expressions=expressions, 4403 using=using, 4404 group=group, 4405 unpivot=is_unpivot, 4406 into=into, 4407 ) 4408 4409 def _parse_pivot_in(self) -> exp.In: 4410 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4411 this = self._parse_select_or_expression() 4412 4413 self._match(TokenType.ALIAS) 4414 alias = self._parse_bitwise() 4415 if alias: 4416 if isinstance(alias, exp.Column) and not alias.db: 4417 alias = alias.this 4418 return self.expression(exp.PivotAlias, this=this, alias=alias) 4419 4420 return this 4421 4422 value = self._parse_column() 4423 4424 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4425 self.raise_error("Expecting IN (") 4426 4427 if self._match(TokenType.ANY): 4428 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4429 else: 4430 exprs = self._parse_csv(_parse_aliased_expression) 4431 4432 self._match_r_paren() 4433 return self.expression(exp.In, this=value, expressions=exprs) 4434 4435 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4436 func = self._parse_function() 4437 if not func: 4438 if self._prev and self._prev.token_type == TokenType.COMMA: 4439 return None 4440 self.raise_error("Expecting an aggregation function in PIVOT") 4441 4442 return self._parse_alias(func) 4443 4444 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4445 index = self._index 4446 include_nulls = None 4447 4448 if self._match(TokenType.PIVOT): 4449 unpivot = False 4450 elif self._match(TokenType.UNPIVOT): 4451 unpivot = True 4452 4453 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4454 if self._match_text_seq("INCLUDE", "NULLS"): 4455 include_nulls = True 4456 elif self._match_text_seq("EXCLUDE", "NULLS"): 4457 include_nulls = False 4458 else: 4459 return None 4460 4461 expressions = [] 4462 4463 if not self._match(TokenType.L_PAREN): 4464 self._retreat(index) 4465 return None 4466 4467 if unpivot: 4468 expressions = self._parse_csv(self._parse_column) 4469 else: 4470 expressions = self._parse_csv(self._parse_pivot_aggregation) 4471 4472 if not expressions: 4473 self.raise_error("Failed to parse PIVOT's aggregation list") 4474 4475 if not self._match(TokenType.FOR): 4476 self.raise_error("Expecting FOR") 4477 4478 fields = [] 4479 while True: 4480 field = self._try_parse(self._parse_pivot_in) 4481 if not field: 4482 break 4483 fields.append(field) 4484 4485 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4486 self._parse_bitwise 4487 ) 4488 4489 group = self._parse_group() 4490 4491 self._match_r_paren() 4492 4493 pivot = self.expression( 4494 exp.Pivot, 4495 expressions=expressions, 4496 fields=fields, 4497 unpivot=unpivot, 4498 include_nulls=include_nulls, 4499 default_on_null=default_on_null, 4500 group=group, 4501 ) 4502 4503 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4504 pivot.set("alias", self._parse_table_alias()) 4505 4506 if not unpivot: 4507 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4508 4509 columns: t.List[exp.Expression] = [] 4510 all_fields = [] 4511 for pivot_field in pivot.fields: 4512 pivot_field_expressions = pivot_field.expressions 4513 4514 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4515 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4516 continue 4517 4518 all_fields.append( 4519 [ 4520 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4521 for fld in pivot_field_expressions 4522 ] 4523 ) 4524 4525 if all_fields: 4526 if names: 4527 all_fields.append(names) 4528 4529 # Generate all possible combinations of the pivot columns 4530 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4531 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4532 for fld_parts_tuple in itertools.product(*all_fields): 4533 fld_parts = list(fld_parts_tuple) 4534 4535 if names and self.PREFIXED_PIVOT_COLUMNS: 4536 # Move the "name" to the front of the list 4537 fld_parts.insert(0, fld_parts.pop(-1)) 4538 4539 columns.append(exp.to_identifier("_".join(fld_parts))) 4540 4541 pivot.set("columns", columns) 4542 4543 return pivot 4544 4545 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4546 return [agg.alias for agg in aggregations if agg.alias] 4547 4548 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4549 if not skip_where_token and not self._match(TokenType.PREWHERE): 4550 return None 4551 4552 return self.expression( 4553 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4554 ) 4555 4556 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4557 if not skip_where_token and not self._match(TokenType.WHERE): 4558 return None 4559 4560 return self.expression( 4561 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4562 ) 4563 4564 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4565 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4566 return None 4567 comments = self._prev_comments 4568 4569 elements: t.Dict[str, t.Any] = defaultdict(list) 4570 4571 if self._match(TokenType.ALL): 4572 elements["all"] = True 4573 elif self._match(TokenType.DISTINCT): 4574 elements["all"] = False 4575 4576 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4577 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4578 4579 while True: 4580 index = self._index 4581 4582 elements["expressions"].extend( 4583 self._parse_csv( 4584 lambda: None 4585 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4586 else self._parse_assignment() 4587 ) 4588 ) 4589 4590 before_with_index = self._index 4591 with_prefix = self._match(TokenType.WITH) 4592 4593 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4594 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4595 elements[key].append(cube_or_rollup) 4596 elif grouping_sets := self._parse_grouping_sets(): 4597 elements["grouping_sets"].append(grouping_sets) 4598 elif self._match_text_seq("TOTALS"): 4599 elements["totals"] = True # type: ignore 4600 4601 if before_with_index <= self._index <= before_with_index + 1: 4602 self._retreat(before_with_index) 4603 break 4604 4605 if index == self._index: 4606 break 4607 4608 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4609 4610 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4611 if self._match(TokenType.CUBE): 4612 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4613 elif self._match(TokenType.ROLLUP): 4614 kind = exp.Rollup 4615 else: 4616 return None 4617 4618 return self.expression( 4619 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4620 ) 4621 4622 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4623 if self._match(TokenType.GROUPING_SETS): 4624 return self.expression( 4625 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4626 ) 4627 return None 4628 4629 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4630 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4631 4632 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4633 if not skip_having_token and not self._match(TokenType.HAVING): 4634 return None 4635 return self.expression( 4636 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4637 ) 4638 4639 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4640 if not self._match(TokenType.QUALIFY): 4641 return None 4642 return self.expression(exp.Qualify, this=self._parse_assignment()) 4643 4644 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4645 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4646 exp.Prior, this=self._parse_bitwise() 4647 ) 4648 connect = self._parse_assignment() 4649 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4650 return connect 4651 4652 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4653 if skip_start_token: 4654 start = None 4655 elif self._match(TokenType.START_WITH): 4656 start = self._parse_assignment() 4657 else: 4658 return None 4659 4660 self._match(TokenType.CONNECT_BY) 4661 nocycle = self._match_text_seq("NOCYCLE") 4662 connect = self._parse_connect_with_prior() 4663 4664 if not start and self._match(TokenType.START_WITH): 4665 start = self._parse_assignment() 4666 4667 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4668 4669 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4670 this = self._parse_id_var(any_token=True) 4671 if self._match(TokenType.ALIAS): 4672 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4673 return this 4674 4675 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4676 if self._match_text_seq("INTERPOLATE"): 4677 return self._parse_wrapped_csv(self._parse_name_as_expression) 4678 return None 4679 4680 def _parse_order( 4681 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4682 ) -> t.Optional[exp.Expression]: 4683 siblings = None 4684 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4685 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4686 return this 4687 4688 siblings = True 4689 4690 return self.expression( 4691 exp.Order, 4692 comments=self._prev_comments, 4693 this=this, 4694 expressions=self._parse_csv(self._parse_ordered), 4695 siblings=siblings, 4696 ) 4697 4698 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4699 if not self._match(token): 4700 return None 4701 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4702 4703 def _parse_ordered( 4704 self, parse_method: t.Optional[t.Callable] = None 4705 ) -> t.Optional[exp.Ordered]: 4706 this = parse_method() if parse_method else self._parse_assignment() 4707 if not this: 4708 return None 4709 4710 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4711 this = exp.var("ALL") 4712 4713 asc = self._match(TokenType.ASC) 4714 desc = self._match(TokenType.DESC) or (asc and False) 4715 4716 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4717 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4718 4719 nulls_first = is_nulls_first or False 4720 explicitly_null_ordered = is_nulls_first or is_nulls_last 4721 4722 if ( 4723 not explicitly_null_ordered 4724 and ( 4725 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4726 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4727 ) 4728 and self.dialect.NULL_ORDERING != "nulls_are_last" 4729 ): 4730 nulls_first = True 4731 4732 if self._match_text_seq("WITH", "FILL"): 4733 with_fill = self.expression( 4734 exp.WithFill, 4735 **{ # type: ignore 4736 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4737 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4738 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4739 "interpolate": self._parse_interpolate(), 4740 }, 4741 ) 4742 else: 4743 with_fill = None 4744 4745 return self.expression( 4746 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4747 ) 4748 4749 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4750 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4751 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4752 self._match_text_seq("ONLY") 4753 with_ties = self._match_text_seq("WITH", "TIES") 4754 4755 if not (percent or rows or with_ties): 4756 return None 4757 4758 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4759 4760 def _parse_limit( 4761 self, 4762 this: t.Optional[exp.Expression] = None, 4763 top: bool = False, 4764 skip_limit_token: bool = False, 4765 ) -> t.Optional[exp.Expression]: 4766 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4767 comments = self._prev_comments 4768 if top: 4769 limit_paren = self._match(TokenType.L_PAREN) 4770 expression = self._parse_term() if limit_paren else self._parse_number() 4771 4772 if limit_paren: 4773 self._match_r_paren() 4774 4775 else: 4776 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4777 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4778 # consume the factor plus parse the percentage separately 4779 expression = self._try_parse(self._parse_term) or self._parse_factor() 4780 4781 limit_options = self._parse_limit_options() 4782 4783 if self._match(TokenType.COMMA): 4784 offset = expression 4785 expression = self._parse_term() 4786 else: 4787 offset = None 4788 4789 limit_exp = self.expression( 4790 exp.Limit, 4791 this=this, 4792 expression=expression, 4793 offset=offset, 4794 comments=comments, 4795 limit_options=limit_options, 4796 expressions=self._parse_limit_by(), 4797 ) 4798 4799 return limit_exp 4800 4801 if self._match(TokenType.FETCH): 4802 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4803 direction = self._prev.text.upper() if direction else "FIRST" 4804 4805 count = self._parse_field(tokens=self.FETCH_TOKENS) 4806 4807 return self.expression( 4808 exp.Fetch, 4809 direction=direction, 4810 count=count, 4811 limit_options=self._parse_limit_options(), 4812 ) 4813 4814 return this 4815 4816 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4817 if not self._match(TokenType.OFFSET): 4818 return this 4819 4820 count = self._parse_term() 4821 self._match_set((TokenType.ROW, TokenType.ROWS)) 4822 4823 return self.expression( 4824 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4825 ) 4826 4827 def _can_parse_limit_or_offset(self) -> bool: 4828 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4829 return False 4830 4831 index = self._index 4832 result = bool( 4833 self._try_parse(self._parse_limit, retreat=True) 4834 or self._try_parse(self._parse_offset, retreat=True) 4835 ) 4836 self._retreat(index) 4837 return result 4838 4839 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4840 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4841 4842 def _parse_locks(self) -> t.List[exp.Lock]: 4843 locks = [] 4844 while True: 4845 update, key = None, None 4846 if self._match_text_seq("FOR", "UPDATE"): 4847 update = True 4848 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4849 "LOCK", "IN", "SHARE", "MODE" 4850 ): 4851 update = False 4852 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4853 update, key = False, True 4854 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4855 update, key = True, True 4856 else: 4857 break 4858 4859 expressions = None 4860 if self._match_text_seq("OF"): 4861 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4862 4863 wait: t.Optional[bool | exp.Expression] = None 4864 if self._match_text_seq("NOWAIT"): 4865 wait = True 4866 elif self._match_text_seq("WAIT"): 4867 wait = self._parse_primary() 4868 elif self._match_text_seq("SKIP", "LOCKED"): 4869 wait = False 4870 4871 locks.append( 4872 self.expression( 4873 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4874 ) 4875 ) 4876 4877 return locks 4878 4879 def parse_set_operation( 4880 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4881 ) -> t.Optional[exp.Expression]: 4882 start = self._index 4883 _, side_token, kind_token = self._parse_join_parts() 4884 4885 side = side_token.text if side_token else None 4886 kind = kind_token.text if kind_token else None 4887 4888 if not self._match_set(self.SET_OPERATIONS): 4889 self._retreat(start) 4890 return None 4891 4892 token_type = self._prev.token_type 4893 4894 if token_type == TokenType.UNION: 4895 operation: t.Type[exp.SetOperation] = exp.Union 4896 elif token_type == TokenType.EXCEPT: 4897 operation = exp.Except 4898 else: 4899 operation = exp.Intersect 4900 4901 comments = self._prev.comments 4902 4903 if self._match(TokenType.DISTINCT): 4904 distinct: t.Optional[bool] = True 4905 elif self._match(TokenType.ALL): 4906 distinct = False 4907 else: 4908 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4909 if distinct is None: 4910 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4911 4912 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4913 "STRICT", "CORRESPONDING" 4914 ) 4915 if self._match_text_seq("CORRESPONDING"): 4916 by_name = True 4917 if not side and not kind: 4918 kind = "INNER" 4919 4920 on_column_list = None 4921 if by_name and self._match_texts(("ON", "BY")): 4922 on_column_list = self._parse_wrapped_csv(self._parse_column) 4923 4924 expression = self._parse_select( 4925 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4926 ) 4927 4928 return self.expression( 4929 operation, 4930 comments=comments, 4931 this=this, 4932 distinct=distinct, 4933 by_name=by_name, 4934 expression=expression, 4935 side=side, 4936 kind=kind, 4937 on=on_column_list, 4938 ) 4939 4940 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4941 while this: 4942 setop = self.parse_set_operation(this) 4943 if not setop: 4944 break 4945 this = setop 4946 4947 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4948 expression = this.expression 4949 4950 if expression: 4951 for arg in self.SET_OP_MODIFIERS: 4952 expr = expression.args.get(arg) 4953 if expr: 4954 this.set(arg, expr.pop()) 4955 4956 return this 4957 4958 def _parse_expression(self) -> t.Optional[exp.Expression]: 4959 return self._parse_alias(self._parse_assignment()) 4960 4961 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4962 this = self._parse_disjunction() 4963 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4964 # This allows us to parse <non-identifier token> := <expr> 4965 this = exp.column( 4966 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4967 ) 4968 4969 while self._match_set(self.ASSIGNMENT): 4970 if isinstance(this, exp.Column) and len(this.parts) == 1: 4971 this = this.this 4972 4973 this = self.expression( 4974 self.ASSIGNMENT[self._prev.token_type], 4975 this=this, 4976 comments=self._prev_comments, 4977 expression=self._parse_assignment(), 4978 ) 4979 4980 return this 4981 4982 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4983 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4984 4985 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4986 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4987 4988 def _parse_equality(self) -> t.Optional[exp.Expression]: 4989 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4990 4991 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4992 return self._parse_tokens(self._parse_range, self.COMPARISON) 4993 4994 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4995 this = this or self._parse_bitwise() 4996 negate = self._match(TokenType.NOT) 4997 4998 if self._match_set(self.RANGE_PARSERS): 4999 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5000 if not expression: 5001 return this 5002 5003 this = expression 5004 elif self._match(TokenType.ISNULL): 5005 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5006 5007 # Postgres supports ISNULL and NOTNULL for conditions. 5008 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5009 if self._match(TokenType.NOTNULL): 5010 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5011 this = self.expression(exp.Not, this=this) 5012 5013 if negate: 5014 this = self._negate_range(this) 5015 5016 if self._match(TokenType.IS): 5017 this = self._parse_is(this) 5018 5019 return this 5020 5021 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5022 if not this: 5023 return this 5024 5025 return self.expression(exp.Not, this=this) 5026 5027 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5028 index = self._index - 1 5029 negate = self._match(TokenType.NOT) 5030 5031 if self._match_text_seq("DISTINCT", "FROM"): 5032 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5033 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5034 5035 if self._match(TokenType.JSON): 5036 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5037 5038 if self._match_text_seq("WITH"): 5039 _with = True 5040 elif self._match_text_seq("WITHOUT"): 5041 _with = False 5042 else: 5043 _with = None 5044 5045 unique = self._match(TokenType.UNIQUE) 5046 self._match_text_seq("KEYS") 5047 expression: t.Optional[exp.Expression] = self.expression( 5048 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5049 ) 5050 else: 5051 expression = self._parse_primary() or self._parse_null() 5052 if not expression: 5053 self._retreat(index) 5054 return None 5055 5056 this = self.expression(exp.Is, this=this, expression=expression) 5057 this = self.expression(exp.Not, this=this) if negate else this 5058 return self._parse_column_ops(this) 5059 5060 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5061 unnest = self._parse_unnest(with_alias=False) 5062 if unnest: 5063 this = self.expression(exp.In, this=this, unnest=unnest) 5064 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5065 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5066 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5067 5068 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5069 this = self.expression( 5070 exp.In, 5071 this=this, 5072 query=self._parse_query_modifiers(query).subquery(copy=False), 5073 ) 5074 else: 5075 this = self.expression(exp.In, this=this, expressions=expressions) 5076 5077 if matched_l_paren: 5078 self._match_r_paren(this) 5079 elif not self._match(TokenType.R_BRACKET, expression=this): 5080 self.raise_error("Expecting ]") 5081 else: 5082 this = self.expression(exp.In, this=this, field=self._parse_column()) 5083 5084 return this 5085 5086 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5087 symmetric = None 5088 if self._match_text_seq("SYMMETRIC"): 5089 symmetric = True 5090 elif self._match_text_seq("ASYMMETRIC"): 5091 symmetric = False 5092 5093 low = self._parse_bitwise() 5094 self._match(TokenType.AND) 5095 high = self._parse_bitwise() 5096 5097 return self.expression( 5098 exp.Between, 5099 this=this, 5100 low=low, 5101 high=high, 5102 symmetric=symmetric, 5103 ) 5104 5105 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5106 if not self._match(TokenType.ESCAPE): 5107 return this 5108 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5109 5110 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5111 index = self._index 5112 5113 if not self._match(TokenType.INTERVAL) and match_interval: 5114 return None 5115 5116 if self._match(TokenType.STRING, advance=False): 5117 this = self._parse_primary() 5118 else: 5119 this = self._parse_term() 5120 5121 if not this or ( 5122 isinstance(this, exp.Column) 5123 and not this.table 5124 and not this.this.quoted 5125 and self._curr 5126 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5127 ): 5128 self._retreat(index) 5129 return None 5130 5131 # handle day-time format interval span with omitted units: 5132 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5133 interval_span_units_omitted = None 5134 if ( 5135 this 5136 and this.is_string 5137 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5138 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5139 ): 5140 index = self._index 5141 5142 # Var "TO" Var 5143 first_unit = self._parse_var(any_token=True, upper=True) 5144 second_unit = None 5145 if first_unit and self._match_text_seq("TO"): 5146 second_unit = self._parse_var(any_token=True, upper=True) 5147 5148 interval_span_units_omitted = not (first_unit and second_unit) 5149 5150 self._retreat(index) 5151 5152 unit = ( 5153 None 5154 if interval_span_units_omitted 5155 else ( 5156 self._parse_function() 5157 or ( 5158 not self._match(TokenType.ALIAS, advance=False) 5159 and self._parse_var(any_token=True, upper=True) 5160 ) 5161 ) 5162 ) 5163 5164 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5165 # each INTERVAL expression into this canonical form so it's easy to transpile 5166 if this and this.is_number: 5167 this = exp.Literal.string(this.to_py()) 5168 elif this and this.is_string: 5169 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5170 if parts and unit: 5171 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5172 unit = None 5173 self._retreat(self._index - 1) 5174 5175 if len(parts) == 1: 5176 this = exp.Literal.string(parts[0][0]) 5177 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5178 5179 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5180 unit = self.expression( 5181 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5182 ) 5183 5184 interval = self.expression(exp.Interval, this=this, unit=unit) 5185 5186 index = self._index 5187 self._match(TokenType.PLUS) 5188 5189 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5190 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5191 return self.expression( 5192 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5193 ) 5194 5195 self._retreat(index) 5196 return interval 5197 5198 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5199 this = self._parse_term() 5200 5201 while True: 5202 if self._match_set(self.BITWISE): 5203 this = self.expression( 5204 self.BITWISE[self._prev.token_type], 5205 this=this, 5206 expression=self._parse_term(), 5207 ) 5208 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5209 this = self.expression( 5210 exp.DPipe, 5211 this=this, 5212 expression=self._parse_term(), 5213 safe=not self.dialect.STRICT_STRING_CONCAT, 5214 ) 5215 elif self._match(TokenType.DQMARK): 5216 this = self.expression( 5217 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5218 ) 5219 elif self._match_pair(TokenType.LT, TokenType.LT): 5220 this = self.expression( 5221 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5222 ) 5223 elif self._match_pair(TokenType.GT, TokenType.GT): 5224 this = self.expression( 5225 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5226 ) 5227 else: 5228 break 5229 5230 return this 5231 5232 def _parse_term(self) -> t.Optional[exp.Expression]: 5233 this = self._parse_factor() 5234 5235 while self._match_set(self.TERM): 5236 klass = self.TERM[self._prev.token_type] 5237 comments = self._prev_comments 5238 expression = self._parse_factor() 5239 5240 this = self.expression(klass, this=this, comments=comments, expression=expression) 5241 5242 if isinstance(this, exp.Collate): 5243 expr = this.expression 5244 5245 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5246 # fallback to Identifier / Var 5247 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5248 ident = expr.this 5249 if isinstance(ident, exp.Identifier): 5250 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5251 5252 return this 5253 5254 def _parse_factor(self) -> t.Optional[exp.Expression]: 5255 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5256 this = parse_method() 5257 5258 while self._match_set(self.FACTOR): 5259 klass = self.FACTOR[self._prev.token_type] 5260 comments = self._prev_comments 5261 expression = parse_method() 5262 5263 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5264 self._retreat(self._index - 1) 5265 return this 5266 5267 this = self.expression(klass, this=this, comments=comments, expression=expression) 5268 5269 if isinstance(this, exp.Div): 5270 this.args["typed"] = self.dialect.TYPED_DIVISION 5271 this.args["safe"] = self.dialect.SAFE_DIVISION 5272 5273 return this 5274 5275 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5276 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5277 5278 def _parse_unary(self) -> t.Optional[exp.Expression]: 5279 if self._match_set(self.UNARY_PARSERS): 5280 return self.UNARY_PARSERS[self._prev.token_type](self) 5281 return self._parse_at_time_zone(self._parse_type()) 5282 5283 def _parse_type( 5284 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5285 ) -> t.Optional[exp.Expression]: 5286 interval = parse_interval and self._parse_interval() 5287 if interval: 5288 return interval 5289 5290 index = self._index 5291 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5292 5293 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5294 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5295 if isinstance(data_type, exp.Cast): 5296 # This constructor can contain ops directly after it, for instance struct unnesting: 5297 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5298 return self._parse_column_ops(data_type) 5299 5300 if data_type: 5301 index2 = self._index 5302 this = self._parse_primary() 5303 5304 if isinstance(this, exp.Literal): 5305 literal = this.name 5306 this = self._parse_column_ops(this) 5307 5308 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5309 if parser: 5310 return parser(self, this, data_type) 5311 5312 if ( 5313 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5314 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5315 and TIME_ZONE_RE.search(literal) 5316 ): 5317 data_type = exp.DataType.build("TIMESTAMPTZ") 5318 5319 return self.expression(exp.Cast, this=this, to=data_type) 5320 5321 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5322 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5323 # 5324 # If the index difference here is greater than 1, that means the parser itself must have 5325 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5326 # 5327 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5328 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5329 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5330 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5331 # 5332 # In these cases, we don't really want to return the converted type, but instead retreat 5333 # and try to parse a Column or Identifier in the section below. 5334 if data_type.expressions and index2 - index > 1: 5335 self._retreat(index2) 5336 return self._parse_column_ops(data_type) 5337 5338 self._retreat(index) 5339 5340 if fallback_to_identifier: 5341 return self._parse_id_var() 5342 5343 this = self._parse_column() 5344 return this and self._parse_column_ops(this) 5345 5346 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5347 this = self._parse_type() 5348 if not this: 5349 return None 5350 5351 if isinstance(this, exp.Column) and not this.table: 5352 this = exp.var(this.name.upper()) 5353 5354 return self.expression( 5355 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5356 ) 5357 5358 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5359 type_name = identifier.name 5360 5361 while self._match(TokenType.DOT): 5362 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5363 5364 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5365 5366 def _parse_types( 5367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5368 ) -> t.Optional[exp.Expression]: 5369 index = self._index 5370 5371 this: t.Optional[exp.Expression] = None 5372 prefix = self._match_text_seq("SYSUDTLIB", ".") 5373 5374 if self._match_set(self.TYPE_TOKENS): 5375 type_token = self._prev.token_type 5376 else: 5377 type_token = None 5378 identifier = allow_identifiers and self._parse_id_var( 5379 any_token=False, tokens=(TokenType.VAR,) 5380 ) 5381 if isinstance(identifier, exp.Identifier): 5382 try: 5383 tokens = self.dialect.tokenize(identifier.name) 5384 except TokenError: 5385 tokens = None 5386 5387 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5388 type_token = tokens[0].token_type 5389 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5390 this = self._parse_user_defined_type(identifier) 5391 else: 5392 self._retreat(self._index - 1) 5393 return None 5394 else: 5395 return None 5396 5397 if type_token == TokenType.PSEUDO_TYPE: 5398 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5399 5400 if type_token == TokenType.OBJECT_IDENTIFIER: 5401 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5402 5403 # https://materialize.com/docs/sql/types/map/ 5404 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5405 key_type = self._parse_types( 5406 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5407 ) 5408 if not self._match(TokenType.FARROW): 5409 self._retreat(index) 5410 return None 5411 5412 value_type = self._parse_types( 5413 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5414 ) 5415 if not self._match(TokenType.R_BRACKET): 5416 self._retreat(index) 5417 return None 5418 5419 return exp.DataType( 5420 this=exp.DataType.Type.MAP, 5421 expressions=[key_type, value_type], 5422 nested=True, 5423 prefix=prefix, 5424 ) 5425 5426 nested = type_token in self.NESTED_TYPE_TOKENS 5427 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5428 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5429 expressions = None 5430 maybe_func = False 5431 5432 if self._match(TokenType.L_PAREN): 5433 if is_struct: 5434 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5435 elif nested: 5436 expressions = self._parse_csv( 5437 lambda: self._parse_types( 5438 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5439 ) 5440 ) 5441 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5442 this = expressions[0] 5443 this.set("nullable", True) 5444 self._match_r_paren() 5445 return this 5446 elif type_token in self.ENUM_TYPE_TOKENS: 5447 expressions = self._parse_csv(self._parse_equality) 5448 elif is_aggregate: 5449 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5450 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5451 ) 5452 if not func_or_ident: 5453 return None 5454 expressions = [func_or_ident] 5455 if self._match(TokenType.COMMA): 5456 expressions.extend( 5457 self._parse_csv( 5458 lambda: self._parse_types( 5459 check_func=check_func, 5460 schema=schema, 5461 allow_identifiers=allow_identifiers, 5462 ) 5463 ) 5464 ) 5465 else: 5466 expressions = self._parse_csv(self._parse_type_size) 5467 5468 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5469 if type_token == TokenType.VECTOR and len(expressions) == 2: 5470 expressions = self._parse_vector_expressions(expressions) 5471 5472 if not self._match(TokenType.R_PAREN): 5473 self._retreat(index) 5474 return None 5475 5476 maybe_func = True 5477 5478 values: t.Optional[t.List[exp.Expression]] = None 5479 5480 if nested and self._match(TokenType.LT): 5481 if is_struct: 5482 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5483 else: 5484 expressions = self._parse_csv( 5485 lambda: self._parse_types( 5486 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5487 ) 5488 ) 5489 5490 if not self._match(TokenType.GT): 5491 self.raise_error("Expecting >") 5492 5493 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5494 values = self._parse_csv(self._parse_assignment) 5495 if not values and is_struct: 5496 values = None 5497 self._retreat(self._index - 1) 5498 else: 5499 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5500 5501 if type_token in self.TIMESTAMPS: 5502 if self._match_text_seq("WITH", "TIME", "ZONE"): 5503 maybe_func = False 5504 tz_type = ( 5505 exp.DataType.Type.TIMETZ 5506 if type_token in self.TIMES 5507 else exp.DataType.Type.TIMESTAMPTZ 5508 ) 5509 this = exp.DataType(this=tz_type, expressions=expressions) 5510 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5511 maybe_func = False 5512 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5513 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5514 maybe_func = False 5515 elif type_token == TokenType.INTERVAL: 5516 unit = self._parse_var(upper=True) 5517 if unit: 5518 if self._match_text_seq("TO"): 5519 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5520 5521 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5522 else: 5523 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5524 elif type_token == TokenType.VOID: 5525 this = exp.DataType(this=exp.DataType.Type.NULL) 5526 5527 if maybe_func and check_func: 5528 index2 = self._index 5529 peek = self._parse_string() 5530 5531 if not peek: 5532 self._retreat(index) 5533 return None 5534 5535 self._retreat(index2) 5536 5537 if not this: 5538 if self._match_text_seq("UNSIGNED"): 5539 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5540 if not unsigned_type_token: 5541 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5542 5543 type_token = unsigned_type_token or type_token 5544 5545 # NULLABLE without parentheses can be a column (Presto/Trino) 5546 if type_token == TokenType.NULLABLE and not expressions: 5547 self._retreat(index) 5548 return None 5549 5550 this = exp.DataType( 5551 this=exp.DataType.Type[type_token.value], 5552 expressions=expressions, 5553 nested=nested, 5554 prefix=prefix, 5555 ) 5556 5557 # Empty arrays/structs are allowed 5558 if values is not None: 5559 cls = exp.Struct if is_struct else exp.Array 5560 this = exp.cast(cls(expressions=values), this, copy=False) 5561 5562 elif expressions: 5563 this.set("expressions", expressions) 5564 5565 # https://materialize.com/docs/sql/types/list/#type-name 5566 while self._match(TokenType.LIST): 5567 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5568 5569 index = self._index 5570 5571 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5572 matched_array = self._match(TokenType.ARRAY) 5573 5574 while self._curr: 5575 datatype_token = self._prev.token_type 5576 matched_l_bracket = self._match(TokenType.L_BRACKET) 5577 5578 if (not matched_l_bracket and not matched_array) or ( 5579 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5580 ): 5581 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5582 # not to be confused with the fixed size array parsing 5583 break 5584 5585 matched_array = False 5586 values = self._parse_csv(self._parse_assignment) or None 5587 if ( 5588 values 5589 and not schema 5590 and ( 5591 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5592 ) 5593 ): 5594 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5595 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5596 self._retreat(index) 5597 break 5598 5599 this = exp.DataType( 5600 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5601 ) 5602 self._match(TokenType.R_BRACKET) 5603 5604 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5605 converter = self.TYPE_CONVERTERS.get(this.this) 5606 if converter: 5607 this = converter(t.cast(exp.DataType, this)) 5608 5609 return this 5610 5611 def _parse_vector_expressions( 5612 self, expressions: t.List[exp.Expression] 5613 ) -> t.List[exp.Expression]: 5614 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5615 5616 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5617 index = self._index 5618 5619 if ( 5620 self._curr 5621 and self._next 5622 and self._curr.token_type in self.TYPE_TOKENS 5623 and self._next.token_type in self.TYPE_TOKENS 5624 ): 5625 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5626 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5627 this = self._parse_id_var() 5628 else: 5629 this = ( 5630 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5631 or self._parse_id_var() 5632 ) 5633 5634 self._match(TokenType.COLON) 5635 5636 if ( 5637 type_required 5638 and not isinstance(this, exp.DataType) 5639 and not self._match_set(self.TYPE_TOKENS, advance=False) 5640 ): 5641 self._retreat(index) 5642 return self._parse_types() 5643 5644 return self._parse_column_def(this) 5645 5646 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5647 if not self._match_text_seq("AT", "TIME", "ZONE"): 5648 return this 5649 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5650 5651 def _parse_column(self) -> t.Optional[exp.Expression]: 5652 this = self._parse_column_reference() 5653 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5654 5655 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5656 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5657 5658 return column 5659 5660 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5661 this = self._parse_field() 5662 if ( 5663 not this 5664 and self._match(TokenType.VALUES, advance=False) 5665 and self.VALUES_FOLLOWED_BY_PAREN 5666 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5667 ): 5668 this = self._parse_id_var() 5669 5670 if isinstance(this, exp.Identifier): 5671 # We bubble up comments from the Identifier to the Column 5672 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5673 5674 return this 5675 5676 def _parse_colon_as_variant_extract( 5677 self, this: t.Optional[exp.Expression] 5678 ) -> t.Optional[exp.Expression]: 5679 casts = [] 5680 json_path = [] 5681 escape = None 5682 5683 while self._match(TokenType.COLON): 5684 start_index = self._index 5685 5686 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5687 path = self._parse_column_ops( 5688 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5689 ) 5690 5691 # The cast :: operator has a lower precedence than the extraction operator :, so 5692 # we rearrange the AST appropriately to avoid casting the JSON path 5693 while isinstance(path, exp.Cast): 5694 casts.append(path.to) 5695 path = path.this 5696 5697 if casts: 5698 dcolon_offset = next( 5699 i 5700 for i, t in enumerate(self._tokens[start_index:]) 5701 if t.token_type == TokenType.DCOLON 5702 ) 5703 end_token = self._tokens[start_index + dcolon_offset - 1] 5704 else: 5705 end_token = self._prev 5706 5707 if path: 5708 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5709 # it'll roundtrip to a string literal in GET_PATH 5710 if isinstance(path, exp.Identifier) and path.quoted: 5711 escape = True 5712 5713 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5714 5715 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5716 # Databricks transforms it back to the colon/dot notation 5717 if json_path: 5718 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5719 5720 if json_path_expr: 5721 json_path_expr.set("escape", escape) 5722 5723 this = self.expression( 5724 exp.JSONExtract, 5725 this=this, 5726 expression=json_path_expr, 5727 variant_extract=True, 5728 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5729 ) 5730 5731 while casts: 5732 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5733 5734 return this 5735 5736 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5737 return self._parse_types() 5738 5739 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5740 this = self._parse_bracket(this) 5741 5742 while self._match_set(self.COLUMN_OPERATORS): 5743 op_token = self._prev.token_type 5744 op = self.COLUMN_OPERATORS.get(op_token) 5745 5746 if op_token in self.CAST_COLUMN_OPERATORS: 5747 field = self._parse_dcolon() 5748 if not field: 5749 self.raise_error("Expected type") 5750 elif op and self._curr: 5751 field = self._parse_column_reference() or self._parse_bitwise() 5752 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5753 field = self._parse_column_ops(field) 5754 else: 5755 field = self._parse_field(any_token=True, anonymous_func=True) 5756 5757 # Function calls can be qualified, e.g., x.y.FOO() 5758 # This converts the final AST to a series of Dots leading to the function call 5759 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5760 if isinstance(field, (exp.Func, exp.Window)) and this: 5761 this = this.transform( 5762 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5763 ) 5764 5765 if op: 5766 this = op(self, this, field) 5767 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5768 this = self.expression( 5769 exp.Column, 5770 comments=this.comments, 5771 this=field, 5772 table=this.this, 5773 db=this.args.get("table"), 5774 catalog=this.args.get("db"), 5775 ) 5776 elif isinstance(field, exp.Window): 5777 # Move the exp.Dot's to the window's function 5778 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5779 field.set("this", window_func) 5780 this = field 5781 else: 5782 this = self.expression(exp.Dot, this=this, expression=field) 5783 5784 if field and field.comments: 5785 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5786 5787 this = self._parse_bracket(this) 5788 5789 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5790 5791 def _parse_paren(self) -> t.Optional[exp.Expression]: 5792 if not self._match(TokenType.L_PAREN): 5793 return None 5794 5795 comments = self._prev_comments 5796 query = self._parse_select() 5797 5798 if query: 5799 expressions = [query] 5800 else: 5801 expressions = self._parse_expressions() 5802 5803 this = seq_get(expressions, 0) 5804 5805 if not this and self._match(TokenType.R_PAREN, advance=False): 5806 this = self.expression(exp.Tuple) 5807 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5808 this = self._parse_subquery(this=this, parse_alias=False) 5809 elif isinstance(this, exp.Subquery): 5810 this = self._parse_subquery( 5811 this=self._parse_query_modifiers(self._parse_set_operations(this)), 5812 parse_alias=False, 5813 ) 5814 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5815 this = self.expression(exp.Tuple, expressions=expressions) 5816 else: 5817 this = self.expression(exp.Paren, this=this) 5818 5819 if this: 5820 this.add_comments(comments) 5821 5822 self._match_r_paren(expression=this) 5823 5824 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5825 return self._parse_window(this) 5826 5827 return this 5828 5829 def _parse_primary(self) -> t.Optional[exp.Expression]: 5830 if self._match_set(self.PRIMARY_PARSERS): 5831 token_type = self._prev.token_type 5832 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5833 5834 if token_type == TokenType.STRING: 5835 expressions = [primary] 5836 while self._match(TokenType.STRING): 5837 expressions.append(exp.Literal.string(self._prev.text)) 5838 5839 if len(expressions) > 1: 5840 return self.expression(exp.Concat, expressions=expressions) 5841 5842 return primary 5843 5844 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5845 return exp.Literal.number(f"0.{self._prev.text}") 5846 5847 return self._parse_paren() 5848 5849 def _parse_field( 5850 self, 5851 any_token: bool = False, 5852 tokens: t.Optional[t.Collection[TokenType]] = None, 5853 anonymous_func: bool = False, 5854 ) -> t.Optional[exp.Expression]: 5855 if anonymous_func: 5856 field = ( 5857 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5858 or self._parse_primary() 5859 ) 5860 else: 5861 field = self._parse_primary() or self._parse_function( 5862 anonymous=anonymous_func, any_token=any_token 5863 ) 5864 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5865 5866 def _parse_function( 5867 self, 5868 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5869 anonymous: bool = False, 5870 optional_parens: bool = True, 5871 any_token: bool = False, 5872 ) -> t.Optional[exp.Expression]: 5873 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5874 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5875 fn_syntax = False 5876 if ( 5877 self._match(TokenType.L_BRACE, advance=False) 5878 and self._next 5879 and self._next.text.upper() == "FN" 5880 ): 5881 self._advance(2) 5882 fn_syntax = True 5883 5884 func = self._parse_function_call( 5885 functions=functions, 5886 anonymous=anonymous, 5887 optional_parens=optional_parens, 5888 any_token=any_token, 5889 ) 5890 5891 if fn_syntax: 5892 self._match(TokenType.R_BRACE) 5893 5894 return func 5895 5896 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5897 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5898 5899 def _parse_function_call( 5900 self, 5901 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5902 anonymous: bool = False, 5903 optional_parens: bool = True, 5904 any_token: bool = False, 5905 ) -> t.Optional[exp.Expression]: 5906 if not self._curr: 5907 return None 5908 5909 comments = self._curr.comments 5910 prev = self._prev 5911 token = self._curr 5912 token_type = self._curr.token_type 5913 this = self._curr.text 5914 upper = this.upper() 5915 5916 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5917 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5918 self._advance() 5919 return self._parse_window(parser(self)) 5920 5921 if not self._next or self._next.token_type != TokenType.L_PAREN: 5922 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5923 self._advance() 5924 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5925 5926 return None 5927 5928 if any_token: 5929 if token_type in self.RESERVED_TOKENS: 5930 return None 5931 elif token_type not in self.FUNC_TOKENS: 5932 return None 5933 5934 self._advance(2) 5935 5936 parser = self.FUNCTION_PARSERS.get(upper) 5937 if parser and not anonymous: 5938 this = parser(self) 5939 else: 5940 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5941 5942 if subquery_predicate: 5943 expr = None 5944 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5945 expr = self._parse_select() 5946 self._match_r_paren() 5947 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5948 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5949 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5950 self._advance(-1) 5951 expr = self._parse_bitwise() 5952 5953 if expr: 5954 return self.expression(subquery_predicate, comments=comments, this=expr) 5955 5956 if functions is None: 5957 functions = self.FUNCTIONS 5958 5959 function = functions.get(upper) 5960 known_function = function and not anonymous 5961 5962 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5963 args = self._parse_function_args(alias) 5964 5965 post_func_comments = self._curr and self._curr.comments 5966 if known_function and post_func_comments: 5967 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5968 # call we'll construct it as exp.Anonymous, even if it's "known" 5969 if any( 5970 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5971 for comment in post_func_comments 5972 ): 5973 known_function = False 5974 5975 if alias and known_function: 5976 args = self._kv_to_prop_eq(args) 5977 5978 if known_function: 5979 func_builder = t.cast(t.Callable, function) 5980 5981 if "dialect" in func_builder.__code__.co_varnames: 5982 func = func_builder(args, dialect=self.dialect) 5983 else: 5984 func = func_builder(args) 5985 5986 func = self.validate_expression(func, args) 5987 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5988 func.meta["name"] = this 5989 5990 this = func 5991 else: 5992 if token_type == TokenType.IDENTIFIER: 5993 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5994 5995 this = self.expression(exp.Anonymous, this=this, expressions=args) 5996 this = this.update_positions(token) 5997 5998 if isinstance(this, exp.Expression): 5999 this.add_comments(comments) 6000 6001 self._match_r_paren(this) 6002 return self._parse_window(this) 6003 6004 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6005 return expression 6006 6007 def _kv_to_prop_eq( 6008 self, expressions: t.List[exp.Expression], parse_map: bool = False 6009 ) -> t.List[exp.Expression]: 6010 transformed = [] 6011 6012 for index, e in enumerate(expressions): 6013 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6014 if isinstance(e, exp.Alias): 6015 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6016 6017 if not isinstance(e, exp.PropertyEQ): 6018 e = self.expression( 6019 exp.PropertyEQ, 6020 this=e.this if parse_map else exp.to_identifier(e.this.name), 6021 expression=e.expression, 6022 ) 6023 6024 if isinstance(e.this, exp.Column): 6025 e.this.replace(e.this.this) 6026 else: 6027 e = self._to_prop_eq(e, index) 6028 6029 transformed.append(e) 6030 6031 return transformed 6032 6033 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6034 return self._parse_statement() 6035 6036 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6037 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6038 6039 def _parse_user_defined_function( 6040 self, kind: t.Optional[TokenType] = None 6041 ) -> t.Optional[exp.Expression]: 6042 this = self._parse_table_parts(schema=True) 6043 6044 if not self._match(TokenType.L_PAREN): 6045 return this 6046 6047 expressions = self._parse_csv(self._parse_function_parameter) 6048 self._match_r_paren() 6049 return self.expression( 6050 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6051 ) 6052 6053 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6054 literal = self._parse_primary() 6055 if literal: 6056 return self.expression(exp.Introducer, this=token.text, expression=literal) 6057 6058 return self._identifier_expression(token) 6059 6060 def _parse_session_parameter(self) -> exp.SessionParameter: 6061 kind = None 6062 this = self._parse_id_var() or self._parse_primary() 6063 6064 if this and self._match(TokenType.DOT): 6065 kind = this.name 6066 this = self._parse_var() or self._parse_primary() 6067 6068 return self.expression(exp.SessionParameter, this=this, kind=kind) 6069 6070 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6071 return self._parse_id_var() 6072 6073 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6074 index = self._index 6075 6076 if self._match(TokenType.L_PAREN): 6077 expressions = t.cast( 6078 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6079 ) 6080 6081 if not self._match(TokenType.R_PAREN): 6082 self._retreat(index) 6083 else: 6084 expressions = [self._parse_lambda_arg()] 6085 6086 if self._match_set(self.LAMBDAS): 6087 return self.LAMBDAS[self._prev.token_type](self, expressions) 6088 6089 self._retreat(index) 6090 6091 this: t.Optional[exp.Expression] 6092 6093 if self._match(TokenType.DISTINCT): 6094 this = self.expression( 6095 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6096 ) 6097 else: 6098 this = self._parse_select_or_expression(alias=alias) 6099 6100 return self._parse_limit( 6101 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6102 ) 6103 6104 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6105 index = self._index 6106 if not self._match(TokenType.L_PAREN): 6107 return this 6108 6109 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6110 # expr can be of both types 6111 if self._match_set(self.SELECT_START_TOKENS): 6112 self._retreat(index) 6113 return this 6114 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6115 self._match_r_paren() 6116 return self.expression(exp.Schema, this=this, expressions=args) 6117 6118 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6119 return self._parse_column_def(self._parse_field(any_token=True)) 6120 6121 def _parse_column_def( 6122 self, this: t.Optional[exp.Expression], computed_column: bool = True 6123 ) -> t.Optional[exp.Expression]: 6124 # column defs are not really columns, they're identifiers 6125 if isinstance(this, exp.Column): 6126 this = this.this 6127 6128 if not computed_column: 6129 self._match(TokenType.ALIAS) 6130 6131 kind = self._parse_types(schema=True) 6132 6133 if self._match_text_seq("FOR", "ORDINALITY"): 6134 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6135 6136 constraints: t.List[exp.Expression] = [] 6137 6138 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6139 ("ALIAS", "MATERIALIZED") 6140 ): 6141 persisted = self._prev.text.upper() == "MATERIALIZED" 6142 constraint_kind = exp.ComputedColumnConstraint( 6143 this=self._parse_assignment(), 6144 persisted=persisted or self._match_text_seq("PERSISTED"), 6145 data_type=exp.Var(this="AUTO") 6146 if self._match_text_seq("AUTO") 6147 else self._parse_types(), 6148 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6149 ) 6150 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6151 elif ( 6152 kind 6153 and self._match(TokenType.ALIAS, advance=False) 6154 and ( 6155 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6156 or (self._next and self._next.token_type == TokenType.L_PAREN) 6157 ) 6158 ): 6159 self._advance() 6160 constraints.append( 6161 self.expression( 6162 exp.ColumnConstraint, 6163 kind=exp.ComputedColumnConstraint( 6164 this=self._parse_disjunction(), 6165 persisted=self._match_texts(("STORED", "VIRTUAL")) 6166 and self._prev.text.upper() == "STORED", 6167 ), 6168 ) 6169 ) 6170 6171 while True: 6172 constraint = self._parse_column_constraint() 6173 if not constraint: 6174 break 6175 constraints.append(constraint) 6176 6177 if not kind and not constraints: 6178 return this 6179 6180 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6181 6182 def _parse_auto_increment( 6183 self, 6184 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6185 start = None 6186 increment = None 6187 order = None 6188 6189 if self._match(TokenType.L_PAREN, advance=False): 6190 args = self._parse_wrapped_csv(self._parse_bitwise) 6191 start = seq_get(args, 0) 6192 increment = seq_get(args, 1) 6193 elif self._match_text_seq("START"): 6194 start = self._parse_bitwise() 6195 self._match_text_seq("INCREMENT") 6196 increment = self._parse_bitwise() 6197 if self._match_text_seq("ORDER"): 6198 order = True 6199 elif self._match_text_seq("NOORDER"): 6200 order = False 6201 6202 if start and increment: 6203 return exp.GeneratedAsIdentityColumnConstraint( 6204 start=start, increment=increment, this=False, order=order 6205 ) 6206 6207 return exp.AutoIncrementColumnConstraint() 6208 6209 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6210 if not self._match_text_seq("REFRESH"): 6211 self._retreat(self._index - 1) 6212 return None 6213 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6214 6215 def _parse_compress(self) -> exp.CompressColumnConstraint: 6216 if self._match(TokenType.L_PAREN, advance=False): 6217 return self.expression( 6218 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6219 ) 6220 6221 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6222 6223 def _parse_generated_as_identity( 6224 self, 6225 ) -> ( 6226 exp.GeneratedAsIdentityColumnConstraint 6227 | exp.ComputedColumnConstraint 6228 | exp.GeneratedAsRowColumnConstraint 6229 ): 6230 if self._match_text_seq("BY", "DEFAULT"): 6231 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6232 this = self.expression( 6233 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6234 ) 6235 else: 6236 self._match_text_seq("ALWAYS") 6237 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6238 6239 self._match(TokenType.ALIAS) 6240 6241 if self._match_text_seq("ROW"): 6242 start = self._match_text_seq("START") 6243 if not start: 6244 self._match(TokenType.END) 6245 hidden = self._match_text_seq("HIDDEN") 6246 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6247 6248 identity = self._match_text_seq("IDENTITY") 6249 6250 if self._match(TokenType.L_PAREN): 6251 if self._match(TokenType.START_WITH): 6252 this.set("start", self._parse_bitwise()) 6253 if self._match_text_seq("INCREMENT", "BY"): 6254 this.set("increment", self._parse_bitwise()) 6255 if self._match_text_seq("MINVALUE"): 6256 this.set("minvalue", self._parse_bitwise()) 6257 if self._match_text_seq("MAXVALUE"): 6258 this.set("maxvalue", self._parse_bitwise()) 6259 6260 if self._match_text_seq("CYCLE"): 6261 this.set("cycle", True) 6262 elif self._match_text_seq("NO", "CYCLE"): 6263 this.set("cycle", False) 6264 6265 if not identity: 6266 this.set("expression", self._parse_range()) 6267 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6268 args = self._parse_csv(self._parse_bitwise) 6269 this.set("start", seq_get(args, 0)) 6270 this.set("increment", seq_get(args, 1)) 6271 6272 self._match_r_paren() 6273 6274 return this 6275 6276 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6277 self._match_text_seq("LENGTH") 6278 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6279 6280 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6281 if self._match_text_seq("NULL"): 6282 return self.expression(exp.NotNullColumnConstraint) 6283 if self._match_text_seq("CASESPECIFIC"): 6284 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6285 if self._match_text_seq("FOR", "REPLICATION"): 6286 return self.expression(exp.NotForReplicationColumnConstraint) 6287 6288 # Unconsume the `NOT` token 6289 self._retreat(self._index - 1) 6290 return None 6291 6292 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6293 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6294 6295 procedure_option_follows = ( 6296 self._match(TokenType.WITH, advance=False) 6297 and self._next 6298 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6299 ) 6300 6301 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6302 return self.expression( 6303 exp.ColumnConstraint, 6304 this=this, 6305 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6306 ) 6307 6308 return this 6309 6310 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6311 if not self._match(TokenType.CONSTRAINT): 6312 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6313 6314 return self.expression( 6315 exp.Constraint, 6316 this=self._parse_id_var(), 6317 expressions=self._parse_unnamed_constraints(), 6318 ) 6319 6320 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6321 constraints = [] 6322 while True: 6323 constraint = self._parse_unnamed_constraint() or self._parse_function() 6324 if not constraint: 6325 break 6326 constraints.append(constraint) 6327 6328 return constraints 6329 6330 def _parse_unnamed_constraint( 6331 self, constraints: t.Optional[t.Collection[str]] = None 6332 ) -> t.Optional[exp.Expression]: 6333 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6334 constraints or self.CONSTRAINT_PARSERS 6335 ): 6336 return None 6337 6338 constraint = self._prev.text.upper() 6339 if constraint not in self.CONSTRAINT_PARSERS: 6340 self.raise_error(f"No parser found for schema constraint {constraint}.") 6341 6342 return self.CONSTRAINT_PARSERS[constraint](self) 6343 6344 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6345 return self._parse_id_var(any_token=False) 6346 6347 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6348 self._match_texts(("KEY", "INDEX")) 6349 return self.expression( 6350 exp.UniqueColumnConstraint, 6351 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6352 this=self._parse_schema(self._parse_unique_key()), 6353 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6354 on_conflict=self._parse_on_conflict(), 6355 options=self._parse_key_constraint_options(), 6356 ) 6357 6358 def _parse_key_constraint_options(self) -> t.List[str]: 6359 options = [] 6360 while True: 6361 if not self._curr: 6362 break 6363 6364 if self._match(TokenType.ON): 6365 action = None 6366 on = self._advance_any() and self._prev.text 6367 6368 if self._match_text_seq("NO", "ACTION"): 6369 action = "NO ACTION" 6370 elif self._match_text_seq("CASCADE"): 6371 action = "CASCADE" 6372 elif self._match_text_seq("RESTRICT"): 6373 action = "RESTRICT" 6374 elif self._match_pair(TokenType.SET, TokenType.NULL): 6375 action = "SET NULL" 6376 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6377 action = "SET DEFAULT" 6378 else: 6379 self.raise_error("Invalid key constraint") 6380 6381 options.append(f"ON {on} {action}") 6382 else: 6383 var = self._parse_var_from_options( 6384 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6385 ) 6386 if not var: 6387 break 6388 options.append(var.name) 6389 6390 return options 6391 6392 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6393 if match and not self._match(TokenType.REFERENCES): 6394 return None 6395 6396 expressions = None 6397 this = self._parse_table(schema=True) 6398 options = self._parse_key_constraint_options() 6399 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6400 6401 def _parse_foreign_key(self) -> exp.ForeignKey: 6402 expressions = ( 6403 self._parse_wrapped_id_vars() 6404 if not self._match(TokenType.REFERENCES, advance=False) 6405 else None 6406 ) 6407 reference = self._parse_references() 6408 on_options = {} 6409 6410 while self._match(TokenType.ON): 6411 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6412 self.raise_error("Expected DELETE or UPDATE") 6413 6414 kind = self._prev.text.lower() 6415 6416 if self._match_text_seq("NO", "ACTION"): 6417 action = "NO ACTION" 6418 elif self._match(TokenType.SET): 6419 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6420 action = "SET " + self._prev.text.upper() 6421 else: 6422 self._advance() 6423 action = self._prev.text.upper() 6424 6425 on_options[kind] = action 6426 6427 return self.expression( 6428 exp.ForeignKey, 6429 expressions=expressions, 6430 reference=reference, 6431 options=self._parse_key_constraint_options(), 6432 **on_options, # type: ignore 6433 ) 6434 6435 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6436 return self._parse_ordered() or self._parse_field() 6437 6438 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6439 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6440 self._retreat(self._index - 1) 6441 return None 6442 6443 id_vars = self._parse_wrapped_id_vars() 6444 return self.expression( 6445 exp.PeriodForSystemTimeConstraint, 6446 this=seq_get(id_vars, 0), 6447 expression=seq_get(id_vars, 1), 6448 ) 6449 6450 def _parse_primary_key( 6451 self, wrapped_optional: bool = False, in_props: bool = False 6452 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6453 desc = ( 6454 self._match_set((TokenType.ASC, TokenType.DESC)) 6455 and self._prev.token_type == TokenType.DESC 6456 ) 6457 6458 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6459 return self.expression( 6460 exp.PrimaryKeyColumnConstraint, 6461 desc=desc, 6462 options=self._parse_key_constraint_options(), 6463 ) 6464 6465 expressions = self._parse_wrapped_csv( 6466 self._parse_primary_key_part, optional=wrapped_optional 6467 ) 6468 6469 return self.expression( 6470 exp.PrimaryKey, 6471 expressions=expressions, 6472 include=self._parse_index_params(), 6473 options=self._parse_key_constraint_options(), 6474 ) 6475 6476 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6477 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6478 6479 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6480 """ 6481 Parses a datetime column in ODBC format. We parse the column into the corresponding 6482 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6483 same as we did for `DATE('yyyy-mm-dd')`. 6484 6485 Reference: 6486 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6487 """ 6488 self._match(TokenType.VAR) 6489 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6490 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6491 if not self._match(TokenType.R_BRACE): 6492 self.raise_error("Expected }") 6493 return expression 6494 6495 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6496 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6497 return this 6498 6499 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6500 map_token = seq_get(self._tokens, self._index - 2) 6501 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6502 else: 6503 parse_map = False 6504 6505 bracket_kind = self._prev.token_type 6506 if ( 6507 bracket_kind == TokenType.L_BRACE 6508 and self._curr 6509 and self._curr.token_type == TokenType.VAR 6510 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6511 ): 6512 return self._parse_odbc_datetime_literal() 6513 6514 expressions = self._parse_csv( 6515 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6516 ) 6517 6518 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6519 self.raise_error("Expected ]") 6520 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6521 self.raise_error("Expected }") 6522 6523 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6524 if bracket_kind == TokenType.L_BRACE: 6525 this = self.expression( 6526 exp.Struct, 6527 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6528 ) 6529 elif not this: 6530 this = build_array_constructor( 6531 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6532 ) 6533 else: 6534 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6535 if constructor_type: 6536 return build_array_constructor( 6537 constructor_type, 6538 args=expressions, 6539 bracket_kind=bracket_kind, 6540 dialect=self.dialect, 6541 ) 6542 6543 expressions = apply_index_offset( 6544 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6545 ) 6546 this = self.expression( 6547 exp.Bracket, 6548 this=this, 6549 expressions=expressions, 6550 comments=this.pop_comments(), 6551 ) 6552 6553 self._add_comments(this) 6554 return self._parse_bracket(this) 6555 6556 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6557 if self._match(TokenType.COLON): 6558 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6559 return this 6560 6561 def _parse_case(self) -> t.Optional[exp.Expression]: 6562 if self._match(TokenType.DOT, advance=False): 6563 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6564 self._retreat(self._index - 1) 6565 return None 6566 6567 ifs = [] 6568 default = None 6569 6570 comments = self._prev_comments 6571 expression = self._parse_assignment() 6572 6573 while self._match(TokenType.WHEN): 6574 this = self._parse_assignment() 6575 self._match(TokenType.THEN) 6576 then = self._parse_assignment() 6577 ifs.append(self.expression(exp.If, this=this, true=then)) 6578 6579 if self._match(TokenType.ELSE): 6580 default = self._parse_assignment() 6581 6582 if not self._match(TokenType.END): 6583 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6584 default = exp.column("interval") 6585 else: 6586 self.raise_error("Expected END after CASE", self._prev) 6587 6588 return self.expression( 6589 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6590 ) 6591 6592 def _parse_if(self) -> t.Optional[exp.Expression]: 6593 if self._match(TokenType.L_PAREN): 6594 args = self._parse_csv( 6595 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6596 ) 6597 this = self.validate_expression(exp.If.from_arg_list(args), args) 6598 self._match_r_paren() 6599 else: 6600 index = self._index - 1 6601 6602 if self.NO_PAREN_IF_COMMANDS and index == 0: 6603 return self._parse_as_command(self._prev) 6604 6605 condition = self._parse_assignment() 6606 6607 if not condition: 6608 self._retreat(index) 6609 return None 6610 6611 self._match(TokenType.THEN) 6612 true = self._parse_assignment() 6613 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6614 self._match(TokenType.END) 6615 this = self.expression(exp.If, this=condition, true=true, false=false) 6616 6617 return this 6618 6619 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6620 if not self._match_text_seq("VALUE", "FOR"): 6621 self._retreat(self._index - 1) 6622 return None 6623 6624 return self.expression( 6625 exp.NextValueFor, 6626 this=self._parse_column(), 6627 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6628 ) 6629 6630 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6631 this = self._parse_function() or self._parse_var_or_string(upper=True) 6632 6633 if self._match(TokenType.FROM): 6634 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6635 6636 if not self._match(TokenType.COMMA): 6637 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6638 6639 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6640 6641 def _parse_gap_fill(self) -> exp.GapFill: 6642 self._match(TokenType.TABLE) 6643 this = self._parse_table() 6644 6645 self._match(TokenType.COMMA) 6646 args = [this, *self._parse_csv(self._parse_lambda)] 6647 6648 gap_fill = exp.GapFill.from_arg_list(args) 6649 return self.validate_expression(gap_fill, args) 6650 6651 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6652 this = self._parse_assignment() 6653 6654 if not self._match(TokenType.ALIAS): 6655 if self._match(TokenType.COMMA): 6656 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6657 6658 self.raise_error("Expected AS after CAST") 6659 6660 fmt = None 6661 to = self._parse_types() 6662 6663 default = self._match(TokenType.DEFAULT) 6664 if default: 6665 default = self._parse_bitwise() 6666 self._match_text_seq("ON", "CONVERSION", "ERROR") 6667 6668 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6669 fmt_string = self._parse_string() 6670 fmt = self._parse_at_time_zone(fmt_string) 6671 6672 if not to: 6673 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6674 if to.this in exp.DataType.TEMPORAL_TYPES: 6675 this = self.expression( 6676 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6677 this=this, 6678 format=exp.Literal.string( 6679 format_time( 6680 fmt_string.this if fmt_string else "", 6681 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6682 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6683 ) 6684 ), 6685 safe=safe, 6686 ) 6687 6688 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6689 this.set("zone", fmt.args["zone"]) 6690 return this 6691 elif not to: 6692 self.raise_error("Expected TYPE after CAST") 6693 elif isinstance(to, exp.Identifier): 6694 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6695 elif to.this == exp.DataType.Type.CHAR: 6696 if self._match(TokenType.CHARACTER_SET): 6697 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6698 6699 return self.build_cast( 6700 strict=strict, 6701 this=this, 6702 to=to, 6703 format=fmt, 6704 safe=safe, 6705 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6706 default=default, 6707 ) 6708 6709 def _parse_string_agg(self) -> exp.GroupConcat: 6710 if self._match(TokenType.DISTINCT): 6711 args: t.List[t.Optional[exp.Expression]] = [ 6712 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6713 ] 6714 if self._match(TokenType.COMMA): 6715 args.extend(self._parse_csv(self._parse_assignment)) 6716 else: 6717 args = self._parse_csv(self._parse_assignment) # type: ignore 6718 6719 if self._match_text_seq("ON", "OVERFLOW"): 6720 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6721 if self._match_text_seq("ERROR"): 6722 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6723 else: 6724 self._match_text_seq("TRUNCATE") 6725 on_overflow = self.expression( 6726 exp.OverflowTruncateBehavior, 6727 this=self._parse_string(), 6728 with_count=( 6729 self._match_text_seq("WITH", "COUNT") 6730 or not self._match_text_seq("WITHOUT", "COUNT") 6731 ), 6732 ) 6733 else: 6734 on_overflow = None 6735 6736 index = self._index 6737 if not self._match(TokenType.R_PAREN) and args: 6738 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6739 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6740 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6741 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6742 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6743 6744 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6745 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6746 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6747 if not self._match_text_seq("WITHIN", "GROUP"): 6748 self._retreat(index) 6749 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6750 6751 # The corresponding match_r_paren will be called in parse_function (caller) 6752 self._match_l_paren() 6753 6754 return self.expression( 6755 exp.GroupConcat, 6756 this=self._parse_order(this=seq_get(args, 0)), 6757 separator=seq_get(args, 1), 6758 on_overflow=on_overflow, 6759 ) 6760 6761 def _parse_convert( 6762 self, strict: bool, safe: t.Optional[bool] = None 6763 ) -> t.Optional[exp.Expression]: 6764 this = self._parse_bitwise() 6765 6766 if self._match(TokenType.USING): 6767 to: t.Optional[exp.Expression] = self.expression( 6768 exp.CharacterSet, this=self._parse_var() 6769 ) 6770 elif self._match(TokenType.COMMA): 6771 to = self._parse_types() 6772 else: 6773 to = None 6774 6775 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6776 6777 def _parse_xml_table(self) -> exp.XMLTable: 6778 namespaces = None 6779 passing = None 6780 columns = None 6781 6782 if self._match_text_seq("XMLNAMESPACES", "("): 6783 namespaces = self._parse_xml_namespace() 6784 self._match_text_seq(")", ",") 6785 6786 this = self._parse_string() 6787 6788 if self._match_text_seq("PASSING"): 6789 # The BY VALUE keywords are optional and are provided for semantic clarity 6790 self._match_text_seq("BY", "VALUE") 6791 passing = self._parse_csv(self._parse_column) 6792 6793 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6794 6795 if self._match_text_seq("COLUMNS"): 6796 columns = self._parse_csv(self._parse_field_def) 6797 6798 return self.expression( 6799 exp.XMLTable, 6800 this=this, 6801 namespaces=namespaces, 6802 passing=passing, 6803 columns=columns, 6804 by_ref=by_ref, 6805 ) 6806 6807 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6808 namespaces = [] 6809 6810 while True: 6811 if self._match(TokenType.DEFAULT): 6812 uri = self._parse_string() 6813 else: 6814 uri = self._parse_alias(self._parse_string()) 6815 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6816 if not self._match(TokenType.COMMA): 6817 break 6818 6819 return namespaces 6820 6821 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6822 args = self._parse_csv(self._parse_assignment) 6823 6824 if len(args) < 3: 6825 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6826 6827 return self.expression(exp.DecodeCase, expressions=args) 6828 6829 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6830 self._match_text_seq("KEY") 6831 key = self._parse_column() 6832 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6833 self._match_text_seq("VALUE") 6834 value = self._parse_bitwise() 6835 6836 if not key and not value: 6837 return None 6838 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6839 6840 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6841 if not this or not self._match_text_seq("FORMAT", "JSON"): 6842 return this 6843 6844 return self.expression(exp.FormatJson, this=this) 6845 6846 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6847 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6848 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6849 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6850 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6851 else: 6852 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6853 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6854 6855 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6856 6857 if not empty and not error and not null: 6858 return None 6859 6860 return self.expression( 6861 exp.OnCondition, 6862 empty=empty, 6863 error=error, 6864 null=null, 6865 ) 6866 6867 def _parse_on_handling( 6868 self, on: str, *values: str 6869 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6870 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6871 for value in values: 6872 if self._match_text_seq(value, "ON", on): 6873 return f"{value} ON {on}" 6874 6875 index = self._index 6876 if self._match(TokenType.DEFAULT): 6877 default_value = self._parse_bitwise() 6878 if self._match_text_seq("ON", on): 6879 return default_value 6880 6881 self._retreat(index) 6882 6883 return None 6884 6885 @t.overload 6886 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6887 6888 @t.overload 6889 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6890 6891 def _parse_json_object(self, agg=False): 6892 star = self._parse_star() 6893 expressions = ( 6894 [star] 6895 if star 6896 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6897 ) 6898 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6899 6900 unique_keys = None 6901 if self._match_text_seq("WITH", "UNIQUE"): 6902 unique_keys = True 6903 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6904 unique_keys = False 6905 6906 self._match_text_seq("KEYS") 6907 6908 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6909 self._parse_type() 6910 ) 6911 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6912 6913 return self.expression( 6914 exp.JSONObjectAgg if agg else exp.JSONObject, 6915 expressions=expressions, 6916 null_handling=null_handling, 6917 unique_keys=unique_keys, 6918 return_type=return_type, 6919 encoding=encoding, 6920 ) 6921 6922 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6923 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6924 if not self._match_text_seq("NESTED"): 6925 this = self._parse_id_var() 6926 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 6927 kind = self._parse_types(allow_identifiers=False) 6928 nested = None 6929 else: 6930 this = None 6931 ordinality = None 6932 kind = None 6933 nested = True 6934 6935 path = self._match_text_seq("PATH") and self._parse_string() 6936 nested_schema = nested and self._parse_json_schema() 6937 6938 return self.expression( 6939 exp.JSONColumnDef, 6940 this=this, 6941 kind=kind, 6942 path=path, 6943 nested_schema=nested_schema, 6944 ordinality=ordinality, 6945 ) 6946 6947 def _parse_json_schema(self) -> exp.JSONSchema: 6948 self._match_text_seq("COLUMNS") 6949 return self.expression( 6950 exp.JSONSchema, 6951 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6952 ) 6953 6954 def _parse_json_table(self) -> exp.JSONTable: 6955 this = self._parse_format_json(self._parse_bitwise()) 6956 path = self._match(TokenType.COMMA) and self._parse_string() 6957 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6958 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6959 schema = self._parse_json_schema() 6960 6961 return exp.JSONTable( 6962 this=this, 6963 schema=schema, 6964 path=path, 6965 error_handling=error_handling, 6966 empty_handling=empty_handling, 6967 ) 6968 6969 def _parse_match_against(self) -> exp.MatchAgainst: 6970 if self._match_text_seq("TABLE"): 6971 # parse SingleStore MATCH(TABLE ...) syntax 6972 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6973 expressions = [] 6974 table = self._parse_table() 6975 if table: 6976 expressions = [table] 6977 else: 6978 expressions = self._parse_csv(self._parse_column) 6979 6980 self._match_text_seq(")", "AGAINST", "(") 6981 6982 this = self._parse_string() 6983 6984 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6985 modifier = "IN NATURAL LANGUAGE MODE" 6986 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6987 modifier = f"{modifier} WITH QUERY EXPANSION" 6988 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6989 modifier = "IN BOOLEAN MODE" 6990 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6991 modifier = "WITH QUERY EXPANSION" 6992 else: 6993 modifier = None 6994 6995 return self.expression( 6996 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6997 ) 6998 6999 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7000 def _parse_open_json(self) -> exp.OpenJSON: 7001 this = self._parse_bitwise() 7002 path = self._match(TokenType.COMMA) and self._parse_string() 7003 7004 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7005 this = self._parse_field(any_token=True) 7006 kind = self._parse_types() 7007 path = self._parse_string() 7008 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7009 7010 return self.expression( 7011 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7012 ) 7013 7014 expressions = None 7015 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7016 self._match_l_paren() 7017 expressions = self._parse_csv(_parse_open_json_column_def) 7018 7019 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7020 7021 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7022 args = self._parse_csv(self._parse_bitwise) 7023 7024 if self._match(TokenType.IN): 7025 return self.expression( 7026 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7027 ) 7028 7029 if haystack_first: 7030 haystack = seq_get(args, 0) 7031 needle = seq_get(args, 1) 7032 else: 7033 haystack = seq_get(args, 1) 7034 needle = seq_get(args, 0) 7035 7036 return self.expression( 7037 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7038 ) 7039 7040 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7041 args = self._parse_csv(self._parse_table) 7042 return exp.JoinHint(this=func_name.upper(), expressions=args) 7043 7044 def _parse_substring(self) -> exp.Substring: 7045 # Postgres supports the form: substring(string [from int] [for int]) 7046 # (despite being undocumented, the reverse order also works) 7047 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7048 7049 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7050 7051 start, length = None, None 7052 7053 while self._curr: 7054 if self._match(TokenType.FROM): 7055 start = self._parse_bitwise() 7056 elif self._match(TokenType.FOR): 7057 if not start: 7058 start = exp.Literal.number(1) 7059 length = self._parse_bitwise() 7060 else: 7061 break 7062 7063 if start: 7064 args.append(start) 7065 if length: 7066 args.append(length) 7067 7068 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7069 7070 def _parse_trim(self) -> exp.Trim: 7071 # https://www.w3resource.com/sql/character-functions/trim.php 7072 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7073 7074 position = None 7075 collation = None 7076 expression = None 7077 7078 if self._match_texts(self.TRIM_TYPES): 7079 position = self._prev.text.upper() 7080 7081 this = self._parse_bitwise() 7082 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7083 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7084 expression = self._parse_bitwise() 7085 7086 if invert_order: 7087 this, expression = expression, this 7088 7089 if self._match(TokenType.COLLATE): 7090 collation = self._parse_bitwise() 7091 7092 return self.expression( 7093 exp.Trim, this=this, position=position, expression=expression, collation=collation 7094 ) 7095 7096 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7097 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7098 7099 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7100 return self._parse_window(self._parse_id_var(), alias=True) 7101 7102 def _parse_respect_or_ignore_nulls( 7103 self, this: t.Optional[exp.Expression] 7104 ) -> t.Optional[exp.Expression]: 7105 if self._match_text_seq("IGNORE", "NULLS"): 7106 return self.expression(exp.IgnoreNulls, this=this) 7107 if self._match_text_seq("RESPECT", "NULLS"): 7108 return self.expression(exp.RespectNulls, this=this) 7109 return this 7110 7111 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7112 if self._match(TokenType.HAVING): 7113 self._match_texts(("MAX", "MIN")) 7114 max = self._prev.text.upper() != "MIN" 7115 return self.expression( 7116 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7117 ) 7118 7119 return this 7120 7121 def _parse_window( 7122 self, this: t.Optional[exp.Expression], alias: bool = False 7123 ) -> t.Optional[exp.Expression]: 7124 func = this 7125 comments = func.comments if isinstance(func, exp.Expression) else None 7126 7127 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7128 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7129 if self._match_text_seq("WITHIN", "GROUP"): 7130 order = self._parse_wrapped(self._parse_order) 7131 this = self.expression(exp.WithinGroup, this=this, expression=order) 7132 7133 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7134 self._match(TokenType.WHERE) 7135 this = self.expression( 7136 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7137 ) 7138 self._match_r_paren() 7139 7140 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7141 # Some dialects choose to implement and some do not. 7142 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7143 7144 # There is some code above in _parse_lambda that handles 7145 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7146 7147 # The below changes handle 7148 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7149 7150 # Oracle allows both formats 7151 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7152 # and Snowflake chose to do the same for familiarity 7153 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7154 if isinstance(this, exp.AggFunc): 7155 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7156 7157 if ignore_respect and ignore_respect is not this: 7158 ignore_respect.replace(ignore_respect.this) 7159 this = self.expression(ignore_respect.__class__, this=this) 7160 7161 this = self._parse_respect_or_ignore_nulls(this) 7162 7163 # bigquery select from window x AS (partition by ...) 7164 if alias: 7165 over = None 7166 self._match(TokenType.ALIAS) 7167 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7168 return this 7169 else: 7170 over = self._prev.text.upper() 7171 7172 if comments and isinstance(func, exp.Expression): 7173 func.pop_comments() 7174 7175 if not self._match(TokenType.L_PAREN): 7176 return self.expression( 7177 exp.Window, 7178 comments=comments, 7179 this=this, 7180 alias=self._parse_id_var(False), 7181 over=over, 7182 ) 7183 7184 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7185 7186 first = self._match(TokenType.FIRST) 7187 if self._match_text_seq("LAST"): 7188 first = False 7189 7190 partition, order = self._parse_partition_and_order() 7191 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7192 7193 if kind: 7194 self._match(TokenType.BETWEEN) 7195 start = self._parse_window_spec() 7196 7197 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7198 exclude = ( 7199 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7200 if self._match_text_seq("EXCLUDE") 7201 else None 7202 ) 7203 7204 spec = self.expression( 7205 exp.WindowSpec, 7206 kind=kind, 7207 start=start["value"], 7208 start_side=start["side"], 7209 end=end.get("value"), 7210 end_side=end.get("side"), 7211 exclude=exclude, 7212 ) 7213 else: 7214 spec = None 7215 7216 self._match_r_paren() 7217 7218 window = self.expression( 7219 exp.Window, 7220 comments=comments, 7221 this=this, 7222 partition_by=partition, 7223 order=order, 7224 spec=spec, 7225 alias=window_alias, 7226 over=over, 7227 first=first, 7228 ) 7229 7230 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7231 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7232 return self._parse_window(window, alias=alias) 7233 7234 return window 7235 7236 def _parse_partition_and_order( 7237 self, 7238 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7239 return self._parse_partition_by(), self._parse_order() 7240 7241 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7242 self._match(TokenType.BETWEEN) 7243 7244 return { 7245 "value": ( 7246 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7247 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7248 or self._parse_type() 7249 ), 7250 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7251 } 7252 7253 def _parse_alias( 7254 self, this: t.Optional[exp.Expression], explicit: bool = False 7255 ) -> t.Optional[exp.Expression]: 7256 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7257 # so this section tries to parse the clause version and if it fails, it treats the token 7258 # as an identifier (alias) 7259 if self._can_parse_limit_or_offset(): 7260 return this 7261 7262 any_token = self._match(TokenType.ALIAS) 7263 comments = self._prev_comments or [] 7264 7265 if explicit and not any_token: 7266 return this 7267 7268 if self._match(TokenType.L_PAREN): 7269 aliases = self.expression( 7270 exp.Aliases, 7271 comments=comments, 7272 this=this, 7273 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7274 ) 7275 self._match_r_paren(aliases) 7276 return aliases 7277 7278 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7279 self.STRING_ALIASES and self._parse_string_as_identifier() 7280 ) 7281 7282 if alias: 7283 comments.extend(alias.pop_comments()) 7284 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7285 column = this.this 7286 7287 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7288 if not this.comments and column and column.comments: 7289 this.comments = column.pop_comments() 7290 7291 return this 7292 7293 def _parse_id_var( 7294 self, 7295 any_token: bool = True, 7296 tokens: t.Optional[t.Collection[TokenType]] = None, 7297 ) -> t.Optional[exp.Expression]: 7298 expression = self._parse_identifier() 7299 if not expression and ( 7300 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7301 ): 7302 quoted = self._prev.token_type == TokenType.STRING 7303 expression = self._identifier_expression(quoted=quoted) 7304 7305 return expression 7306 7307 def _parse_string(self) -> t.Optional[exp.Expression]: 7308 if self._match_set(self.STRING_PARSERS): 7309 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7310 return self._parse_placeholder() 7311 7312 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7313 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7314 if output: 7315 output.update_positions(self._prev) 7316 return output 7317 7318 def _parse_number(self) -> t.Optional[exp.Expression]: 7319 if self._match_set(self.NUMERIC_PARSERS): 7320 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7321 return self._parse_placeholder() 7322 7323 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7324 if self._match(TokenType.IDENTIFIER): 7325 return self._identifier_expression(quoted=True) 7326 return self._parse_placeholder() 7327 7328 def _parse_var( 7329 self, 7330 any_token: bool = False, 7331 tokens: t.Optional[t.Collection[TokenType]] = None, 7332 upper: bool = False, 7333 ) -> t.Optional[exp.Expression]: 7334 if ( 7335 (any_token and self._advance_any()) 7336 or self._match(TokenType.VAR) 7337 or (self._match_set(tokens) if tokens else False) 7338 ): 7339 return self.expression( 7340 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7341 ) 7342 return self._parse_placeholder() 7343 7344 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7345 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7346 self._advance() 7347 return self._prev 7348 return None 7349 7350 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7351 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7352 7353 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7354 return self._parse_primary() or self._parse_var(any_token=True) 7355 7356 def _parse_null(self) -> t.Optional[exp.Expression]: 7357 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7358 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7359 return self._parse_placeholder() 7360 7361 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7362 if self._match(TokenType.TRUE): 7363 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7364 if self._match(TokenType.FALSE): 7365 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7366 return self._parse_placeholder() 7367 7368 def _parse_star(self) -> t.Optional[exp.Expression]: 7369 if self._match(TokenType.STAR): 7370 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7371 return self._parse_placeholder() 7372 7373 def _parse_parameter(self) -> exp.Parameter: 7374 this = self._parse_identifier() or self._parse_primary_or_var() 7375 return self.expression(exp.Parameter, this=this) 7376 7377 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7378 if self._match_set(self.PLACEHOLDER_PARSERS): 7379 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7380 if placeholder: 7381 return placeholder 7382 self._advance(-1) 7383 return None 7384 7385 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7386 if not self._match_texts(keywords): 7387 return None 7388 if self._match(TokenType.L_PAREN, advance=False): 7389 return self._parse_wrapped_csv(self._parse_expression) 7390 7391 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7392 return [expression] if expression else None 7393 7394 def _parse_csv( 7395 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7396 ) -> t.List[exp.Expression]: 7397 parse_result = parse_method() 7398 items = [parse_result] if parse_result is not None else [] 7399 7400 while self._match(sep): 7401 self._add_comments(parse_result) 7402 parse_result = parse_method() 7403 if parse_result is not None: 7404 items.append(parse_result) 7405 7406 return items 7407 7408 def _parse_tokens( 7409 self, parse_method: t.Callable, expressions: t.Dict 7410 ) -> t.Optional[exp.Expression]: 7411 this = parse_method() 7412 7413 while self._match_set(expressions): 7414 this = self.expression( 7415 expressions[self._prev.token_type], 7416 this=this, 7417 comments=self._prev_comments, 7418 expression=parse_method(), 7419 ) 7420 7421 return this 7422 7423 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7424 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7425 7426 def _parse_wrapped_csv( 7427 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7428 ) -> t.List[exp.Expression]: 7429 return self._parse_wrapped( 7430 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7431 ) 7432 7433 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7434 wrapped = self._match(TokenType.L_PAREN) 7435 if not wrapped and not optional: 7436 self.raise_error("Expecting (") 7437 parse_result = parse_method() 7438 if wrapped: 7439 self._match_r_paren() 7440 return parse_result 7441 7442 def _parse_expressions(self) -> t.List[exp.Expression]: 7443 return self._parse_csv(self._parse_expression) 7444 7445 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7446 return ( 7447 self._parse_set_operations( 7448 self._parse_alias(self._parse_assignment(), explicit=True) 7449 if alias 7450 else self._parse_assignment() 7451 ) 7452 or self._parse_select() 7453 ) 7454 7455 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7456 return self._parse_query_modifiers( 7457 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7458 ) 7459 7460 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7461 this = None 7462 if self._match_texts(self.TRANSACTION_KIND): 7463 this = self._prev.text 7464 7465 self._match_texts(("TRANSACTION", "WORK")) 7466 7467 modes = [] 7468 while True: 7469 mode = [] 7470 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7471 mode.append(self._prev.text) 7472 7473 if mode: 7474 modes.append(" ".join(mode)) 7475 if not self._match(TokenType.COMMA): 7476 break 7477 7478 return self.expression(exp.Transaction, this=this, modes=modes) 7479 7480 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7481 chain = None 7482 savepoint = None 7483 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7484 7485 self._match_texts(("TRANSACTION", "WORK")) 7486 7487 if self._match_text_seq("TO"): 7488 self._match_text_seq("SAVEPOINT") 7489 savepoint = self._parse_id_var() 7490 7491 if self._match(TokenType.AND): 7492 chain = not self._match_text_seq("NO") 7493 self._match_text_seq("CHAIN") 7494 7495 if is_rollback: 7496 return self.expression(exp.Rollback, savepoint=savepoint) 7497 7498 return self.expression(exp.Commit, chain=chain) 7499 7500 def _parse_refresh(self) -> exp.Refresh: 7501 self._match(TokenType.TABLE) 7502 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7503 7504 def _parse_column_def_with_exists(self): 7505 start = self._index 7506 self._match(TokenType.COLUMN) 7507 7508 exists_column = self._parse_exists(not_=True) 7509 expression = self._parse_field_def() 7510 7511 if not isinstance(expression, exp.ColumnDef): 7512 self._retreat(start) 7513 return None 7514 7515 expression.set("exists", exists_column) 7516 7517 return expression 7518 7519 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7520 if not self._prev.text.upper() == "ADD": 7521 return None 7522 7523 expression = self._parse_column_def_with_exists() 7524 if not expression: 7525 return None 7526 7527 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7528 if self._match_texts(("FIRST", "AFTER")): 7529 position = self._prev.text 7530 column_position = self.expression( 7531 exp.ColumnPosition, this=self._parse_column(), position=position 7532 ) 7533 expression.set("position", column_position) 7534 7535 return expression 7536 7537 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7538 drop = self._match(TokenType.DROP) and self._parse_drop() 7539 if drop and not isinstance(drop, exp.Command): 7540 drop.set("kind", drop.args.get("kind", "COLUMN")) 7541 return drop 7542 7543 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7544 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7545 return self.expression( 7546 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7547 ) 7548 7549 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7550 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7551 self._match_text_seq("ADD") 7552 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7553 return self.expression( 7554 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7555 ) 7556 7557 column_def = self._parse_add_column() 7558 if isinstance(column_def, exp.ColumnDef): 7559 return column_def 7560 7561 exists = self._parse_exists(not_=True) 7562 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7563 return self.expression( 7564 exp.AddPartition, 7565 exists=exists, 7566 this=self._parse_field(any_token=True), 7567 location=self._match_text_seq("LOCATION", advance=False) 7568 and self._parse_property(), 7569 ) 7570 7571 return None 7572 7573 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7574 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7575 or self._match_text_seq("COLUMNS") 7576 ): 7577 schema = self._parse_schema() 7578 7579 return ( 7580 ensure_list(schema) 7581 if schema 7582 else self._parse_csv(self._parse_column_def_with_exists) 7583 ) 7584 7585 return self._parse_csv(_parse_add_alteration) 7586 7587 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7588 if self._match_texts(self.ALTER_ALTER_PARSERS): 7589 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7590 7591 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7592 # keyword after ALTER we default to parsing this statement 7593 self._match(TokenType.COLUMN) 7594 column = self._parse_field(any_token=True) 7595 7596 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7597 return self.expression(exp.AlterColumn, this=column, drop=True) 7598 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7599 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7600 if self._match(TokenType.COMMENT): 7601 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7602 if self._match_text_seq("DROP", "NOT", "NULL"): 7603 return self.expression( 7604 exp.AlterColumn, 7605 this=column, 7606 drop=True, 7607 allow_null=True, 7608 ) 7609 if self._match_text_seq("SET", "NOT", "NULL"): 7610 return self.expression( 7611 exp.AlterColumn, 7612 this=column, 7613 allow_null=False, 7614 ) 7615 7616 if self._match_text_seq("SET", "VISIBLE"): 7617 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7618 if self._match_text_seq("SET", "INVISIBLE"): 7619 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7620 7621 self._match_text_seq("SET", "DATA") 7622 self._match_text_seq("TYPE") 7623 return self.expression( 7624 exp.AlterColumn, 7625 this=column, 7626 dtype=self._parse_types(), 7627 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7628 using=self._match(TokenType.USING) and self._parse_assignment(), 7629 ) 7630 7631 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7632 if self._match_texts(("ALL", "EVEN", "AUTO")): 7633 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7634 7635 self._match_text_seq("KEY", "DISTKEY") 7636 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7637 7638 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7639 if compound: 7640 self._match_text_seq("SORTKEY") 7641 7642 if self._match(TokenType.L_PAREN, advance=False): 7643 return self.expression( 7644 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7645 ) 7646 7647 self._match_texts(("AUTO", "NONE")) 7648 return self.expression( 7649 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7650 ) 7651 7652 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7653 index = self._index - 1 7654 7655 partition_exists = self._parse_exists() 7656 if self._match(TokenType.PARTITION, advance=False): 7657 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7658 7659 self._retreat(index) 7660 return self._parse_csv(self._parse_drop_column) 7661 7662 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7663 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7664 exists = self._parse_exists() 7665 old_column = self._parse_column() 7666 to = self._match_text_seq("TO") 7667 new_column = self._parse_column() 7668 7669 if old_column is None or to is None or new_column is None: 7670 return None 7671 7672 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7673 7674 self._match_text_seq("TO") 7675 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7676 7677 def _parse_alter_table_set(self) -> exp.AlterSet: 7678 alter_set = self.expression(exp.AlterSet) 7679 7680 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7681 "TABLE", "PROPERTIES" 7682 ): 7683 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7684 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7685 alter_set.set("expressions", [self._parse_assignment()]) 7686 elif self._match_texts(("LOGGED", "UNLOGGED")): 7687 alter_set.set("option", exp.var(self._prev.text.upper())) 7688 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7689 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7690 elif self._match_text_seq("LOCATION"): 7691 alter_set.set("location", self._parse_field()) 7692 elif self._match_text_seq("ACCESS", "METHOD"): 7693 alter_set.set("access_method", self._parse_field()) 7694 elif self._match_text_seq("TABLESPACE"): 7695 alter_set.set("tablespace", self._parse_field()) 7696 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7697 alter_set.set("file_format", [self._parse_field()]) 7698 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7699 alter_set.set("file_format", self._parse_wrapped_options()) 7700 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7701 alter_set.set("copy_options", self._parse_wrapped_options()) 7702 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7703 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7704 else: 7705 if self._match_text_seq("SERDE"): 7706 alter_set.set("serde", self._parse_field()) 7707 7708 properties = self._parse_wrapped(self._parse_properties, optional=True) 7709 alter_set.set("expressions", [properties]) 7710 7711 return alter_set 7712 7713 def _parse_alter_session(self) -> exp.AlterSession: 7714 """Parse ALTER SESSION SET/UNSET statements.""" 7715 if self._match(TokenType.SET): 7716 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7717 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7718 7719 self._match_text_seq("UNSET") 7720 expressions = self._parse_csv( 7721 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7722 ) 7723 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7724 7725 def _parse_alter(self) -> exp.Alter | exp.Command: 7726 start = self._prev 7727 7728 alter_token = self._match_set(self.ALTERABLES) and self._prev 7729 if not alter_token: 7730 return self._parse_as_command(start) 7731 7732 exists = self._parse_exists() 7733 only = self._match_text_seq("ONLY") 7734 7735 if alter_token.token_type == TokenType.SESSION: 7736 this = None 7737 check = None 7738 cluster = None 7739 else: 7740 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 7741 check = self._match_text_seq("WITH", "CHECK") 7742 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7743 7744 if self._next: 7745 self._advance() 7746 7747 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7748 if parser: 7749 actions = ensure_list(parser(self)) 7750 not_valid = self._match_text_seq("NOT", "VALID") 7751 options = self._parse_csv(self._parse_property) 7752 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 7753 7754 if not self._curr and actions: 7755 return self.expression( 7756 exp.Alter, 7757 this=this, 7758 kind=alter_token.text.upper(), 7759 exists=exists, 7760 actions=actions, 7761 only=only, 7762 options=options, 7763 cluster=cluster, 7764 not_valid=not_valid, 7765 check=check, 7766 cascade=cascade, 7767 ) 7768 7769 return self._parse_as_command(start) 7770 7771 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7772 start = self._prev 7773 # https://duckdb.org/docs/sql/statements/analyze 7774 if not self._curr: 7775 return self.expression(exp.Analyze) 7776 7777 options = [] 7778 while self._match_texts(self.ANALYZE_STYLES): 7779 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7780 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7781 else: 7782 options.append(self._prev.text.upper()) 7783 7784 this: t.Optional[exp.Expression] = None 7785 inner_expression: t.Optional[exp.Expression] = None 7786 7787 kind = self._curr and self._curr.text.upper() 7788 7789 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7790 this = self._parse_table_parts() 7791 elif self._match_text_seq("TABLES"): 7792 if self._match_set((TokenType.FROM, TokenType.IN)): 7793 kind = f"{kind} {self._prev.text.upper()}" 7794 this = self._parse_table(schema=True, is_db_reference=True) 7795 elif self._match_text_seq("DATABASE"): 7796 this = self._parse_table(schema=True, is_db_reference=True) 7797 elif self._match_text_seq("CLUSTER"): 7798 this = self._parse_table() 7799 # Try matching inner expr keywords before fallback to parse table. 7800 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7801 kind = None 7802 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7803 else: 7804 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7805 kind = None 7806 this = self._parse_table_parts() 7807 7808 partition = self._try_parse(self._parse_partition) 7809 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7810 return self._parse_as_command(start) 7811 7812 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7813 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7814 "WITH", "ASYNC", "MODE" 7815 ): 7816 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7817 else: 7818 mode = None 7819 7820 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7821 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7822 7823 properties = self._parse_properties() 7824 return self.expression( 7825 exp.Analyze, 7826 kind=kind, 7827 this=this, 7828 mode=mode, 7829 partition=partition, 7830 properties=properties, 7831 expression=inner_expression, 7832 options=options, 7833 ) 7834 7835 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7836 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7837 this = None 7838 kind = self._prev.text.upper() 7839 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7840 expressions = [] 7841 7842 if not self._match_text_seq("STATISTICS"): 7843 self.raise_error("Expecting token STATISTICS") 7844 7845 if self._match_text_seq("NOSCAN"): 7846 this = "NOSCAN" 7847 elif self._match(TokenType.FOR): 7848 if self._match_text_seq("ALL", "COLUMNS"): 7849 this = "FOR ALL COLUMNS" 7850 if self._match_texts("COLUMNS"): 7851 this = "FOR COLUMNS" 7852 expressions = self._parse_csv(self._parse_column_reference) 7853 elif self._match_text_seq("SAMPLE"): 7854 sample = self._parse_number() 7855 expressions = [ 7856 self.expression( 7857 exp.AnalyzeSample, 7858 sample=sample, 7859 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7860 ) 7861 ] 7862 7863 return self.expression( 7864 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7865 ) 7866 7867 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7868 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7869 kind = None 7870 this = None 7871 expression: t.Optional[exp.Expression] = None 7872 if self._match_text_seq("REF", "UPDATE"): 7873 kind = "REF" 7874 this = "UPDATE" 7875 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7876 this = "UPDATE SET DANGLING TO NULL" 7877 elif self._match_text_seq("STRUCTURE"): 7878 kind = "STRUCTURE" 7879 if self._match_text_seq("CASCADE", "FAST"): 7880 this = "CASCADE FAST" 7881 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7882 ("ONLINE", "OFFLINE") 7883 ): 7884 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7885 expression = self._parse_into() 7886 7887 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7888 7889 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7890 this = self._prev.text.upper() 7891 if self._match_text_seq("COLUMNS"): 7892 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7893 return None 7894 7895 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7896 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7897 if self._match_text_seq("STATISTICS"): 7898 return self.expression(exp.AnalyzeDelete, kind=kind) 7899 return None 7900 7901 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7902 if self._match_text_seq("CHAINED", "ROWS"): 7903 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7904 return None 7905 7906 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7907 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7908 this = self._prev.text.upper() 7909 expression: t.Optional[exp.Expression] = None 7910 expressions = [] 7911 update_options = None 7912 7913 if self._match_text_seq("HISTOGRAM", "ON"): 7914 expressions = self._parse_csv(self._parse_column_reference) 7915 with_expressions = [] 7916 while self._match(TokenType.WITH): 7917 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7918 if self._match_texts(("SYNC", "ASYNC")): 7919 if self._match_text_seq("MODE", advance=False): 7920 with_expressions.append(f"{self._prev.text.upper()} MODE") 7921 self._advance() 7922 else: 7923 buckets = self._parse_number() 7924 if self._match_text_seq("BUCKETS"): 7925 with_expressions.append(f"{buckets} BUCKETS") 7926 if with_expressions: 7927 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7928 7929 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7930 TokenType.UPDATE, advance=False 7931 ): 7932 update_options = self._prev.text.upper() 7933 self._advance() 7934 elif self._match_text_seq("USING", "DATA"): 7935 expression = self.expression(exp.UsingData, this=self._parse_string()) 7936 7937 return self.expression( 7938 exp.AnalyzeHistogram, 7939 this=this, 7940 expressions=expressions, 7941 expression=expression, 7942 update_options=update_options, 7943 ) 7944 7945 def _parse_merge(self) -> exp.Merge: 7946 self._match(TokenType.INTO) 7947 target = self._parse_table() 7948 7949 if target and self._match(TokenType.ALIAS, advance=False): 7950 target.set("alias", self._parse_table_alias()) 7951 7952 self._match(TokenType.USING) 7953 using = self._parse_table() 7954 7955 self._match(TokenType.ON) 7956 on = self._parse_assignment() 7957 7958 return self.expression( 7959 exp.Merge, 7960 this=target, 7961 using=using, 7962 on=on, 7963 whens=self._parse_when_matched(), 7964 returning=self._parse_returning(), 7965 ) 7966 7967 def _parse_when_matched(self) -> exp.Whens: 7968 whens = [] 7969 7970 while self._match(TokenType.WHEN): 7971 matched = not self._match(TokenType.NOT) 7972 self._match_text_seq("MATCHED") 7973 source = ( 7974 False 7975 if self._match_text_seq("BY", "TARGET") 7976 else self._match_text_seq("BY", "SOURCE") 7977 ) 7978 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7979 7980 self._match(TokenType.THEN) 7981 7982 if self._match(TokenType.INSERT): 7983 this = self._parse_star() 7984 if this: 7985 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7986 else: 7987 then = self.expression( 7988 exp.Insert, 7989 this=exp.var("ROW") 7990 if self._match_text_seq("ROW") 7991 else self._parse_value(values=False), 7992 expression=self._match_text_seq("VALUES") and self._parse_value(), 7993 ) 7994 elif self._match(TokenType.UPDATE): 7995 expressions = self._parse_star() 7996 if expressions: 7997 then = self.expression(exp.Update, expressions=expressions) 7998 else: 7999 then = self.expression( 8000 exp.Update, 8001 expressions=self._match(TokenType.SET) 8002 and self._parse_csv(self._parse_equality), 8003 ) 8004 elif self._match(TokenType.DELETE): 8005 then = self.expression(exp.Var, this=self._prev.text) 8006 else: 8007 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8008 8009 whens.append( 8010 self.expression( 8011 exp.When, 8012 matched=matched, 8013 source=source, 8014 condition=condition, 8015 then=then, 8016 ) 8017 ) 8018 return self.expression(exp.Whens, expressions=whens) 8019 8020 def _parse_show(self) -> t.Optional[exp.Expression]: 8021 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8022 if parser: 8023 return parser(self) 8024 return self._parse_as_command(self._prev) 8025 8026 def _parse_set_item_assignment( 8027 self, kind: t.Optional[str] = None 8028 ) -> t.Optional[exp.Expression]: 8029 index = self._index 8030 8031 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8032 return self._parse_set_transaction(global_=kind == "GLOBAL") 8033 8034 left = self._parse_primary() or self._parse_column() 8035 assignment_delimiter = self._match_texts(("=", "TO")) 8036 8037 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8038 self._retreat(index) 8039 return None 8040 8041 right = self._parse_statement() or self._parse_id_var() 8042 if isinstance(right, (exp.Column, exp.Identifier)): 8043 right = exp.var(right.name) 8044 8045 this = self.expression(exp.EQ, this=left, expression=right) 8046 return self.expression(exp.SetItem, this=this, kind=kind) 8047 8048 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8049 self._match_text_seq("TRANSACTION") 8050 characteristics = self._parse_csv( 8051 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8052 ) 8053 return self.expression( 8054 exp.SetItem, 8055 expressions=characteristics, 8056 kind="TRANSACTION", 8057 **{"global": global_}, # type: ignore 8058 ) 8059 8060 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8061 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8062 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8063 8064 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8065 index = self._index 8066 set_ = self.expression( 8067 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8068 ) 8069 8070 if self._curr: 8071 self._retreat(index) 8072 return self._parse_as_command(self._prev) 8073 8074 return set_ 8075 8076 def _parse_var_from_options( 8077 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8078 ) -> t.Optional[exp.Var]: 8079 start = self._curr 8080 if not start: 8081 return None 8082 8083 option = start.text.upper() 8084 continuations = options.get(option) 8085 8086 index = self._index 8087 self._advance() 8088 for keywords in continuations or []: 8089 if isinstance(keywords, str): 8090 keywords = (keywords,) 8091 8092 if self._match_text_seq(*keywords): 8093 option = f"{option} {' '.join(keywords)}" 8094 break 8095 else: 8096 if continuations or continuations is None: 8097 if raise_unmatched: 8098 self.raise_error(f"Unknown option {option}") 8099 8100 self._retreat(index) 8101 return None 8102 8103 return exp.var(option) 8104 8105 def _parse_as_command(self, start: Token) -> exp.Command: 8106 while self._curr: 8107 self._advance() 8108 text = self._find_sql(start, self._prev) 8109 size = len(start.text) 8110 self._warn_unsupported() 8111 return exp.Command(this=text[:size], expression=text[size:]) 8112 8113 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8114 settings = [] 8115 8116 self._match_l_paren() 8117 kind = self._parse_id_var() 8118 8119 if self._match(TokenType.L_PAREN): 8120 while True: 8121 key = self._parse_id_var() 8122 value = self._parse_primary() 8123 if not key and value is None: 8124 break 8125 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8126 self._match(TokenType.R_PAREN) 8127 8128 self._match_r_paren() 8129 8130 return self.expression( 8131 exp.DictProperty, 8132 this=this, 8133 kind=kind.this if kind else None, 8134 settings=settings, 8135 ) 8136 8137 def _parse_dict_range(self, this: str) -> exp.DictRange: 8138 self._match_l_paren() 8139 has_min = self._match_text_seq("MIN") 8140 if has_min: 8141 min = self._parse_var() or self._parse_primary() 8142 self._match_text_seq("MAX") 8143 max = self._parse_var() or self._parse_primary() 8144 else: 8145 max = self._parse_var() or self._parse_primary() 8146 min = exp.Literal.number(0) 8147 self._match_r_paren() 8148 return self.expression(exp.DictRange, this=this, min=min, max=max) 8149 8150 def _parse_comprehension( 8151 self, this: t.Optional[exp.Expression] 8152 ) -> t.Optional[exp.Comprehension]: 8153 index = self._index 8154 expression = self._parse_column() 8155 if not self._match(TokenType.IN): 8156 self._retreat(index - 1) 8157 return None 8158 iterator = self._parse_column() 8159 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8160 return self.expression( 8161 exp.Comprehension, 8162 this=this, 8163 expression=expression, 8164 iterator=iterator, 8165 condition=condition, 8166 ) 8167 8168 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8169 if self._match(TokenType.HEREDOC_STRING): 8170 return self.expression(exp.Heredoc, this=self._prev.text) 8171 8172 if not self._match_text_seq("$"): 8173 return None 8174 8175 tags = ["$"] 8176 tag_text = None 8177 8178 if self._is_connected(): 8179 self._advance() 8180 tags.append(self._prev.text.upper()) 8181 else: 8182 self.raise_error("No closing $ found") 8183 8184 if tags[-1] != "$": 8185 if self._is_connected() and self._match_text_seq("$"): 8186 tag_text = tags[-1] 8187 tags.append("$") 8188 else: 8189 self.raise_error("No closing $ found") 8190 8191 heredoc_start = self._curr 8192 8193 while self._curr: 8194 if self._match_text_seq(*tags, advance=False): 8195 this = self._find_sql(heredoc_start, self._prev) 8196 self._advance(len(tags)) 8197 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8198 8199 self._advance() 8200 8201 self.raise_error(f"No closing {''.join(tags)} found") 8202 return None 8203 8204 def _find_parser( 8205 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8206 ) -> t.Optional[t.Callable]: 8207 if not self._curr: 8208 return None 8209 8210 index = self._index 8211 this = [] 8212 while True: 8213 # The current token might be multiple words 8214 curr = self._curr.text.upper() 8215 key = curr.split(" ") 8216 this.append(curr) 8217 8218 self._advance() 8219 result, trie = in_trie(trie, key) 8220 if result == TrieResult.FAILED: 8221 break 8222 8223 if result == TrieResult.EXISTS: 8224 subparser = parsers[" ".join(this)] 8225 return subparser 8226 8227 self._retreat(index) 8228 return None 8229 8230 def _match(self, token_type, advance=True, expression=None): 8231 if not self._curr: 8232 return None 8233 8234 if self._curr.token_type == token_type: 8235 if advance: 8236 self._advance() 8237 self._add_comments(expression) 8238 return True 8239 8240 return None 8241 8242 def _match_set(self, types, advance=True): 8243 if not self._curr: 8244 return None 8245 8246 if self._curr.token_type in types: 8247 if advance: 8248 self._advance() 8249 return True 8250 8251 return None 8252 8253 def _match_pair(self, token_type_a, token_type_b, advance=True): 8254 if not self._curr or not self._next: 8255 return None 8256 8257 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8258 if advance: 8259 self._advance(2) 8260 return True 8261 8262 return None 8263 8264 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8265 if not self._match(TokenType.L_PAREN, expression=expression): 8266 self.raise_error("Expecting (") 8267 8268 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8269 if not self._match(TokenType.R_PAREN, expression=expression): 8270 self.raise_error("Expecting )") 8271 8272 def _match_texts(self, texts, advance=True): 8273 if ( 8274 self._curr 8275 and self._curr.token_type != TokenType.STRING 8276 and self._curr.text.upper() in texts 8277 ): 8278 if advance: 8279 self._advance() 8280 return True 8281 return None 8282 8283 def _match_text_seq(self, *texts, advance=True): 8284 index = self._index 8285 for text in texts: 8286 if ( 8287 self._curr 8288 and self._curr.token_type != TokenType.STRING 8289 and self._curr.text.upper() == text 8290 ): 8291 self._advance() 8292 else: 8293 self._retreat(index) 8294 return None 8295 8296 if not advance: 8297 self._retreat(index) 8298 8299 return True 8300 8301 def _replace_lambda( 8302 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8303 ) -> t.Optional[exp.Expression]: 8304 if not node: 8305 return node 8306 8307 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8308 8309 for column in node.find_all(exp.Column): 8310 typ = lambda_types.get(column.parts[0].name) 8311 if typ is not None: 8312 dot_or_id = column.to_dot() if column.table else column.this 8313 8314 if typ: 8315 dot_or_id = self.expression( 8316 exp.Cast, 8317 this=dot_or_id, 8318 to=typ, 8319 ) 8320 8321 parent = column.parent 8322 8323 while isinstance(parent, exp.Dot): 8324 if not isinstance(parent.parent, exp.Dot): 8325 parent.replace(dot_or_id) 8326 break 8327 parent = parent.parent 8328 else: 8329 if column is node: 8330 node = dot_or_id 8331 else: 8332 column.replace(dot_or_id) 8333 return node 8334 8335 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8336 start = self._prev 8337 8338 # Not to be confused with TRUNCATE(number, decimals) function call 8339 if self._match(TokenType.L_PAREN): 8340 self._retreat(self._index - 2) 8341 return self._parse_function() 8342 8343 # Clickhouse supports TRUNCATE DATABASE as well 8344 is_database = self._match(TokenType.DATABASE) 8345 8346 self._match(TokenType.TABLE) 8347 8348 exists = self._parse_exists(not_=False) 8349 8350 expressions = self._parse_csv( 8351 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8352 ) 8353 8354 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8355 8356 if self._match_text_seq("RESTART", "IDENTITY"): 8357 identity = "RESTART" 8358 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8359 identity = "CONTINUE" 8360 else: 8361 identity = None 8362 8363 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8364 option = self._prev.text 8365 else: 8366 option = None 8367 8368 partition = self._parse_partition() 8369 8370 # Fallback case 8371 if self._curr: 8372 return self._parse_as_command(start) 8373 8374 return self.expression( 8375 exp.TruncateTable, 8376 expressions=expressions, 8377 is_database=is_database, 8378 exists=exists, 8379 cluster=cluster, 8380 identity=identity, 8381 option=option, 8382 partition=partition, 8383 ) 8384 8385 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8386 this = self._parse_ordered(self._parse_opclass) 8387 8388 if not self._match(TokenType.WITH): 8389 return this 8390 8391 op = self._parse_var(any_token=True) 8392 8393 return self.expression(exp.WithOperator, this=this, op=op) 8394 8395 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8396 self._match(TokenType.EQ) 8397 self._match(TokenType.L_PAREN) 8398 8399 opts: t.List[t.Optional[exp.Expression]] = [] 8400 option: exp.Expression | None 8401 while self._curr and not self._match(TokenType.R_PAREN): 8402 if self._match_text_seq("FORMAT_NAME", "="): 8403 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8404 option = self._parse_format_name() 8405 else: 8406 option = self._parse_property() 8407 8408 if option is None: 8409 self.raise_error("Unable to parse option") 8410 break 8411 8412 opts.append(option) 8413 8414 return opts 8415 8416 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8417 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8418 8419 options = [] 8420 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8421 option = self._parse_var(any_token=True) 8422 prev = self._prev.text.upper() 8423 8424 # Different dialects might separate options and values by white space, "=" and "AS" 8425 self._match(TokenType.EQ) 8426 self._match(TokenType.ALIAS) 8427 8428 param = self.expression(exp.CopyParameter, this=option) 8429 8430 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8431 TokenType.L_PAREN, advance=False 8432 ): 8433 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8434 param.set("expressions", self._parse_wrapped_options()) 8435 elif prev == "FILE_FORMAT": 8436 # T-SQL's external file format case 8437 param.set("expression", self._parse_field()) 8438 else: 8439 param.set("expression", self._parse_unquoted_field()) 8440 8441 options.append(param) 8442 self._match(sep) 8443 8444 return options 8445 8446 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8447 expr = self.expression(exp.Credentials) 8448 8449 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8450 expr.set("storage", self._parse_field()) 8451 if self._match_text_seq("CREDENTIALS"): 8452 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8453 creds = ( 8454 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8455 ) 8456 expr.set("credentials", creds) 8457 if self._match_text_seq("ENCRYPTION"): 8458 expr.set("encryption", self._parse_wrapped_options()) 8459 if self._match_text_seq("IAM_ROLE"): 8460 expr.set("iam_role", self._parse_field()) 8461 if self._match_text_seq("REGION"): 8462 expr.set("region", self._parse_field()) 8463 8464 return expr 8465 8466 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8467 return self._parse_field() 8468 8469 def _parse_copy(self) -> exp.Copy | exp.Command: 8470 start = self._prev 8471 8472 self._match(TokenType.INTO) 8473 8474 this = ( 8475 self._parse_select(nested=True, parse_subquery_alias=False) 8476 if self._match(TokenType.L_PAREN, advance=False) 8477 else self._parse_table(schema=True) 8478 ) 8479 8480 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8481 8482 files = self._parse_csv(self._parse_file_location) 8483 if self._match(TokenType.EQ, advance=False): 8484 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8485 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8486 # list via `_parse_wrapped(..)` below. 8487 self._advance(-1) 8488 files = [] 8489 8490 credentials = self._parse_credentials() 8491 8492 self._match_text_seq("WITH") 8493 8494 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8495 8496 # Fallback case 8497 if self._curr: 8498 return self._parse_as_command(start) 8499 8500 return self.expression( 8501 exp.Copy, 8502 this=this, 8503 kind=kind, 8504 credentials=credentials, 8505 files=files, 8506 params=params, 8507 ) 8508 8509 def _parse_normalize(self) -> exp.Normalize: 8510 return self.expression( 8511 exp.Normalize, 8512 this=self._parse_bitwise(), 8513 form=self._match(TokenType.COMMA) and self._parse_var(), 8514 ) 8515 8516 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8517 args = self._parse_csv(lambda: self._parse_lambda()) 8518 8519 this = seq_get(args, 0) 8520 decimals = seq_get(args, 1) 8521 8522 return expr_type( 8523 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8524 ) 8525 8526 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8527 star_token = self._prev 8528 8529 if self._match_text_seq("COLUMNS", "(", advance=False): 8530 this = self._parse_function() 8531 if isinstance(this, exp.Columns): 8532 this.set("unpack", True) 8533 return this 8534 8535 return self.expression( 8536 exp.Star, 8537 **{ # type: ignore 8538 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8539 "replace": self._parse_star_op("REPLACE"), 8540 "rename": self._parse_star_op("RENAME"), 8541 }, 8542 ).update_positions(star_token) 8543 8544 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8545 privilege_parts = [] 8546 8547 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8548 # (end of privilege list) or L_PAREN (start of column list) are met 8549 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8550 privilege_parts.append(self._curr.text.upper()) 8551 self._advance() 8552 8553 this = exp.var(" ".join(privilege_parts)) 8554 expressions = ( 8555 self._parse_wrapped_csv(self._parse_column) 8556 if self._match(TokenType.L_PAREN, advance=False) 8557 else None 8558 ) 8559 8560 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8561 8562 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8563 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8564 principal = self._parse_id_var() 8565 8566 if not principal: 8567 return None 8568 8569 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8570 8571 def _parse_grant_revoke_common( 8572 self, 8573 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8574 privileges = self._parse_csv(self._parse_grant_privilege) 8575 8576 self._match(TokenType.ON) 8577 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8578 8579 # Attempt to parse the securable e.g. MySQL allows names 8580 # such as "foo.*", "*.*" which are not easily parseable yet 8581 securable = self._try_parse(self._parse_table_parts) 8582 8583 return privileges, kind, securable 8584 8585 def _parse_grant(self) -> exp.Grant | exp.Command: 8586 start = self._prev 8587 8588 privileges, kind, securable = self._parse_grant_revoke_common() 8589 8590 if not securable or not self._match_text_seq("TO"): 8591 return self._parse_as_command(start) 8592 8593 principals = self._parse_csv(self._parse_grant_principal) 8594 8595 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8596 8597 if self._curr: 8598 return self._parse_as_command(start) 8599 8600 return self.expression( 8601 exp.Grant, 8602 privileges=privileges, 8603 kind=kind, 8604 securable=securable, 8605 principals=principals, 8606 grant_option=grant_option, 8607 ) 8608 8609 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8610 start = self._prev 8611 8612 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8613 8614 privileges, kind, securable = self._parse_grant_revoke_common() 8615 8616 if not securable or not self._match_text_seq("FROM"): 8617 return self._parse_as_command(start) 8618 8619 principals = self._parse_csv(self._parse_grant_principal) 8620 8621 cascade = None 8622 if self._match_texts(("CASCADE", "RESTRICT")): 8623 cascade = self._prev.text.upper() 8624 8625 if self._curr: 8626 return self._parse_as_command(start) 8627 8628 return self.expression( 8629 exp.Revoke, 8630 privileges=privileges, 8631 kind=kind, 8632 securable=securable, 8633 principals=principals, 8634 grant_option=grant_option, 8635 cascade=cascade, 8636 ) 8637 8638 def _parse_overlay(self) -> exp.Overlay: 8639 return self.expression( 8640 exp.Overlay, 8641 **{ # type: ignore 8642 "this": self._parse_bitwise(), 8643 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8644 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8645 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8646 }, 8647 ) 8648 8649 def _parse_format_name(self) -> exp.Property: 8650 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8651 # for FILE_FORMAT = <format_name> 8652 return self.expression( 8653 exp.Property, 8654 this=exp.var("FORMAT_NAME"), 8655 value=self._parse_string() or self._parse_table_parts(), 8656 ) 8657 8658 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8659 args: t.List[exp.Expression] = [] 8660 8661 if self._match(TokenType.DISTINCT): 8662 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8663 self._match(TokenType.COMMA) 8664 8665 args.extend(self._parse_function_args()) 8666 8667 return self.expression( 8668 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8669 ) 8670 8671 def _identifier_expression( 8672 self, token: t.Optional[Token] = None, **kwargs: t.Any 8673 ) -> exp.Identifier: 8674 token = token or self._prev 8675 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8676 expression.update_positions(token) 8677 return expression 8678 8679 def _build_pipe_cte( 8680 self, 8681 query: exp.Query, 8682 expressions: t.List[exp.Expression], 8683 alias_cte: t.Optional[exp.TableAlias] = None, 8684 ) -> exp.Select: 8685 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8686 if alias_cte: 8687 new_cte = alias_cte 8688 else: 8689 self._pipe_cte_counter += 1 8690 new_cte = f"__tmp{self._pipe_cte_counter}" 8691 8692 with_ = query.args.get("with") 8693 ctes = with_.pop() if with_ else None 8694 8695 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8696 if ctes: 8697 new_select.set("with", ctes) 8698 8699 return new_select.with_(new_cte, as_=query, copy=False) 8700 8701 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8702 select = self._parse_select(consume_pipe=False) 8703 if not select: 8704 return query 8705 8706 return self._build_pipe_cte( 8707 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8708 ) 8709 8710 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8711 limit = self._parse_limit() 8712 offset = self._parse_offset() 8713 if limit: 8714 curr_limit = query.args.get("limit", limit) 8715 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8716 query.limit(limit, copy=False) 8717 if offset: 8718 curr_offset = query.args.get("offset") 8719 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8720 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8721 8722 return query 8723 8724 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8725 this = self._parse_assignment() 8726 if self._match_text_seq("GROUP", "AND", advance=False): 8727 return this 8728 8729 this = self._parse_alias(this) 8730 8731 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8732 return self._parse_ordered(lambda: this) 8733 8734 return this 8735 8736 def _parse_pipe_syntax_aggregate_group_order_by( 8737 self, query: exp.Select, group_by_exists: bool = True 8738 ) -> exp.Select: 8739 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8740 aggregates_or_groups, orders = [], [] 8741 for element in expr: 8742 if isinstance(element, exp.Ordered): 8743 this = element.this 8744 if isinstance(this, exp.Alias): 8745 element.set("this", this.args["alias"]) 8746 orders.append(element) 8747 else: 8748 this = element 8749 aggregates_or_groups.append(this) 8750 8751 if group_by_exists: 8752 query.select(*aggregates_or_groups, copy=False).group_by( 8753 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8754 copy=False, 8755 ) 8756 else: 8757 query.select(*aggregates_or_groups, append=False, copy=False) 8758 8759 if orders: 8760 return query.order_by(*orders, append=False, copy=False) 8761 8762 return query 8763 8764 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8765 self._match_text_seq("AGGREGATE") 8766 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8767 8768 if self._match(TokenType.GROUP_BY) or ( 8769 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8770 ): 8771 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8772 8773 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8774 8775 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8776 first_setop = self.parse_set_operation(this=query) 8777 if not first_setop: 8778 return None 8779 8780 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8781 expr = self._parse_paren() 8782 return expr.assert_is(exp.Subquery).unnest() if expr else None 8783 8784 first_setop.this.pop() 8785 8786 setops = [ 8787 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8788 *self._parse_csv(_parse_and_unwrap_query), 8789 ] 8790 8791 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8792 with_ = query.args.get("with") 8793 ctes = with_.pop() if with_ else None 8794 8795 if isinstance(first_setop, exp.Union): 8796 query = query.union(*setops, copy=False, **first_setop.args) 8797 elif isinstance(first_setop, exp.Except): 8798 query = query.except_(*setops, copy=False, **first_setop.args) 8799 else: 8800 query = query.intersect(*setops, copy=False, **first_setop.args) 8801 8802 query.set("with", ctes) 8803 8804 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8805 8806 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8807 join = self._parse_join() 8808 if not join: 8809 return None 8810 8811 if isinstance(query, exp.Select): 8812 return query.join(join, copy=False) 8813 8814 return query 8815 8816 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8817 pivots = self._parse_pivots() 8818 if not pivots: 8819 return query 8820 8821 from_ = query.args.get("from") 8822 if from_: 8823 from_.this.set("pivots", pivots) 8824 8825 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8826 8827 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8828 self._match_text_seq("EXTEND") 8829 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8830 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8831 8832 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8833 sample = self._parse_table_sample() 8834 8835 with_ = query.args.get("with") 8836 if with_: 8837 with_.expressions[-1].this.set("sample", sample) 8838 else: 8839 query.set("sample", sample) 8840 8841 return query 8842 8843 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8844 if isinstance(query, exp.Subquery): 8845 query = exp.select("*").from_(query, copy=False) 8846 8847 if not query.args.get("from"): 8848 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8849 8850 while self._match(TokenType.PIPE_GT): 8851 start = self._curr 8852 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8853 if not parser: 8854 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8855 # keywords, making it tricky to disambiguate them without lookahead. The approach 8856 # here is to try and parse a set operation and if that fails, then try to parse a 8857 # join operator. If that fails as well, then the operator is not supported. 8858 parsed_query = self._parse_pipe_syntax_set_operator(query) 8859 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8860 if not parsed_query: 8861 self._retreat(start) 8862 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8863 break 8864 query = parsed_query 8865 else: 8866 query = parser(self, query) 8867 8868 return query 8869 8870 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8871 vars = self._parse_csv(self._parse_id_var) 8872 if not vars: 8873 return None 8874 8875 return self.expression( 8876 exp.DeclareItem, 8877 this=vars, 8878 kind=self._parse_types(), 8879 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8880 ) 8881 8882 def _parse_declare(self) -> exp.Declare | exp.Command: 8883 start = self._prev 8884 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8885 8886 if not expressions or self._curr: 8887 return self._parse_as_command(start) 8888 8889 return self.expression(exp.Declare, expressions=expressions) 8890 8891 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8892 exp_class = exp.Cast if strict else exp.TryCast 8893 8894 if exp_class == exp.TryCast: 8895 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8896 8897 return self.expression(exp_class, **kwargs) 8898 8899 def _parse_json_value(self) -> exp.JSONValue: 8900 this = self._parse_bitwise() 8901 self._match(TokenType.COMMA) 8902 path = self._parse_bitwise() 8903 8904 returning = self._match(TokenType.RETURNING) and self._parse_type() 8905 8906 return self.expression( 8907 exp.JSONValue, 8908 this=this, 8909 path=self.dialect.to_json_path(path), 8910 returning=returning, 8911 on_condition=self._parse_on_condition(), 8912 ) 8913 8914 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8915 def concat_exprs( 8916 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8917 ) -> exp.Expression: 8918 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8919 concat_exprs = [ 8920 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8921 ] 8922 node.set("expressions", concat_exprs) 8923 return node 8924 if len(exprs) == 1: 8925 return exprs[0] 8926 return self.expression(exp.Concat, expressions=args, safe=True) 8927 8928 args = self._parse_csv(self._parse_lambda) 8929 8930 if args: 8931 order = args[-1] if isinstance(args[-1], exp.Order) else None 8932 8933 if order: 8934 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8935 # remove 'expr' from exp.Order and add it back to args 8936 args[-1] = order.this 8937 order.set("this", concat_exprs(order.this, args)) 8938 8939 this = order or concat_exprs(args[0], args) 8940 else: 8941 this = None 8942 8943 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8944 8945 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset()
1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 )
Logs or raises any found errors, depending on the chosen error level setting.
1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4879 def parse_set_operation( 4880 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4881 ) -> t.Optional[exp.Expression]: 4882 start = self._index 4883 _, side_token, kind_token = self._parse_join_parts() 4884 4885 side = side_token.text if side_token else None 4886 kind = kind_token.text if kind_token else None 4887 4888 if not self._match_set(self.SET_OPERATIONS): 4889 self._retreat(start) 4890 return None 4891 4892 token_type = self._prev.token_type 4893 4894 if token_type == TokenType.UNION: 4895 operation: t.Type[exp.SetOperation] = exp.Union 4896 elif token_type == TokenType.EXCEPT: 4897 operation = exp.Except 4898 else: 4899 operation = exp.Intersect 4900 4901 comments = self._prev.comments 4902 4903 if self._match(TokenType.DISTINCT): 4904 distinct: t.Optional[bool] = True 4905 elif self._match(TokenType.ALL): 4906 distinct = False 4907 else: 4908 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4909 if distinct is None: 4910 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4911 4912 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4913 "STRICT", "CORRESPONDING" 4914 ) 4915 if self._match_text_seq("CORRESPONDING"): 4916 by_name = True 4917 if not side and not kind: 4918 kind = "INNER" 4919 4920 on_column_list = None 4921 if by_name and self._match_texts(("ON", "BY")): 4922 on_column_list = self._parse_wrapped_csv(self._parse_column) 4923 4924 expression = self._parse_select( 4925 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4926 ) 4927 4928 return self.expression( 4929 operation, 4930 comments=comments, 4931 this=this, 4932 distinct=distinct, 4933 by_name=by_name, 4934 expression=expression, 4935 side=side, 4936 kind=kind, 4937 on=on_column_list, 4938 )