sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOMETRY, 384 TokenType.POINT, 385 TokenType.RING, 386 TokenType.LINESTRING, 387 TokenType.MULTILINESTRING, 388 TokenType.POLYGON, 389 TokenType.MULTIPOLYGON, 390 TokenType.HLLSKETCH, 391 TokenType.HSTORE, 392 TokenType.PSEUDO_TYPE, 393 TokenType.SUPER, 394 TokenType.SERIAL, 395 TokenType.SMALLSERIAL, 396 TokenType.BIGSERIAL, 397 TokenType.XML, 398 TokenType.YEAR, 399 TokenType.USERDEFINED, 400 TokenType.MONEY, 401 TokenType.SMALLMONEY, 402 TokenType.ROWVERSION, 403 TokenType.IMAGE, 404 TokenType.VARIANT, 405 TokenType.VECTOR, 406 TokenType.VOID, 407 TokenType.OBJECT, 408 TokenType.OBJECT_IDENTIFIER, 409 TokenType.INET, 410 TokenType.IPADDRESS, 411 TokenType.IPPREFIX, 412 TokenType.IPV4, 413 TokenType.IPV6, 414 TokenType.UNKNOWN, 415 TokenType.NOTHING, 416 TokenType.NULL, 417 TokenType.NAME, 418 TokenType.TDIGEST, 419 TokenType.DYNAMIC, 420 *ENUM_TYPE_TOKENS, 421 *NESTED_TYPE_TOKENS, 422 *AGGREGATE_TYPE_TOKENS, 423 } 424 425 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 426 TokenType.BIGINT: TokenType.UBIGINT, 427 TokenType.INT: TokenType.UINT, 428 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 429 TokenType.SMALLINT: TokenType.USMALLINT, 430 TokenType.TINYINT: TokenType.UTINYINT, 431 TokenType.DECIMAL: TokenType.UDECIMAL, 432 TokenType.DOUBLE: TokenType.UDOUBLE, 433 } 434 435 SUBQUERY_PREDICATES = { 436 TokenType.ANY: exp.Any, 437 TokenType.ALL: exp.All, 438 TokenType.EXISTS: exp.Exists, 439 TokenType.SOME: exp.Any, 440 } 441 442 RESERVED_TOKENS = { 443 *Tokenizer.SINGLE_TOKENS.values(), 444 TokenType.SELECT, 445 } - {TokenType.IDENTIFIER} 446 447 DB_CREATABLES = { 448 TokenType.DATABASE, 449 TokenType.DICTIONARY, 450 TokenType.FILE_FORMAT, 451 TokenType.MODEL, 452 TokenType.NAMESPACE, 453 TokenType.SCHEMA, 454 TokenType.SEQUENCE, 455 TokenType.SINK, 456 TokenType.SOURCE, 457 TokenType.STAGE, 458 TokenType.STORAGE_INTEGRATION, 459 TokenType.STREAMLIT, 460 TokenType.TABLE, 461 TokenType.TAG, 462 TokenType.VIEW, 463 TokenType.WAREHOUSE, 464 } 465 466 CREATABLES = { 467 TokenType.COLUMN, 468 TokenType.CONSTRAINT, 469 TokenType.FOREIGN_KEY, 470 TokenType.FUNCTION, 471 TokenType.INDEX, 472 TokenType.PROCEDURE, 473 *DB_CREATABLES, 474 } 475 476 ALTERABLES = { 477 TokenType.INDEX, 478 TokenType.TABLE, 479 TokenType.VIEW, 480 } 481 482 # Tokens that can represent identifiers 483 ID_VAR_TOKENS = { 484 TokenType.ALL, 485 TokenType.ATTACH, 486 TokenType.VAR, 487 TokenType.ANTI, 488 TokenType.APPLY, 489 TokenType.ASC, 490 TokenType.ASOF, 491 TokenType.AUTO_INCREMENT, 492 TokenType.BEGIN, 493 TokenType.BPCHAR, 494 TokenType.CACHE, 495 TokenType.CASE, 496 TokenType.COLLATE, 497 TokenType.COMMAND, 498 TokenType.COMMENT, 499 TokenType.COMMIT, 500 TokenType.CONSTRAINT, 501 TokenType.COPY, 502 TokenType.CUBE, 503 TokenType.CURRENT_SCHEMA, 504 TokenType.DEFAULT, 505 TokenType.DELETE, 506 TokenType.DESC, 507 TokenType.DESCRIBE, 508 TokenType.DETACH, 509 TokenType.DICTIONARY, 510 TokenType.DIV, 511 TokenType.END, 512 TokenType.EXECUTE, 513 TokenType.EXPORT, 514 TokenType.ESCAPE, 515 TokenType.FALSE, 516 TokenType.FIRST, 517 TokenType.FILTER, 518 TokenType.FINAL, 519 TokenType.FORMAT, 520 TokenType.FULL, 521 TokenType.GET, 522 TokenType.IDENTIFIER, 523 TokenType.IS, 524 TokenType.ISNULL, 525 TokenType.INTERVAL, 526 TokenType.KEEP, 527 TokenType.KILL, 528 TokenType.LEFT, 529 TokenType.LIMIT, 530 TokenType.LOAD, 531 TokenType.MERGE, 532 TokenType.NATURAL, 533 TokenType.NEXT, 534 TokenType.OFFSET, 535 TokenType.OPERATOR, 536 TokenType.ORDINALITY, 537 TokenType.OVERLAPS, 538 TokenType.OVERWRITE, 539 TokenType.PARTITION, 540 TokenType.PERCENT, 541 TokenType.PIVOT, 542 TokenType.PRAGMA, 543 TokenType.PUT, 544 TokenType.RANGE, 545 TokenType.RECURSIVE, 546 TokenType.REFERENCES, 547 TokenType.REFRESH, 548 TokenType.RENAME, 549 TokenType.REPLACE, 550 TokenType.RIGHT, 551 TokenType.ROLLUP, 552 TokenType.ROW, 553 TokenType.ROWS, 554 TokenType.SEMI, 555 TokenType.SET, 556 TokenType.SETTINGS, 557 TokenType.SHOW, 558 TokenType.TEMPORARY, 559 TokenType.TOP, 560 TokenType.TRUE, 561 TokenType.TRUNCATE, 562 TokenType.UNIQUE, 563 TokenType.UNNEST, 564 TokenType.UNPIVOT, 565 TokenType.UPDATE, 566 TokenType.USE, 567 TokenType.VOLATILE, 568 TokenType.WINDOW, 569 *CREATABLES, 570 *SUBQUERY_PREDICATES, 571 *TYPE_TOKENS, 572 *NO_PAREN_FUNCTIONS, 573 } 574 ID_VAR_TOKENS.remove(TokenType.UNION) 575 576 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 577 TokenType.ANTI, 578 TokenType.APPLY, 579 TokenType.ASOF, 580 TokenType.FULL, 581 TokenType.LEFT, 582 TokenType.LOCK, 583 TokenType.NATURAL, 584 TokenType.RIGHT, 585 TokenType.SEMI, 586 TokenType.WINDOW, 587 } 588 589 ALIAS_TOKENS = ID_VAR_TOKENS 590 591 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 592 593 ARRAY_CONSTRUCTORS = { 594 "ARRAY": exp.Array, 595 "LIST": exp.List, 596 } 597 598 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 599 600 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 601 602 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 603 604 FUNC_TOKENS = { 605 TokenType.COLLATE, 606 TokenType.COMMAND, 607 TokenType.CURRENT_DATE, 608 TokenType.CURRENT_DATETIME, 609 TokenType.CURRENT_SCHEMA, 610 TokenType.CURRENT_TIMESTAMP, 611 TokenType.CURRENT_TIME, 612 TokenType.CURRENT_USER, 613 TokenType.FILTER, 614 TokenType.FIRST, 615 TokenType.FORMAT, 616 TokenType.GET, 617 TokenType.GLOB, 618 TokenType.IDENTIFIER, 619 TokenType.INDEX, 620 TokenType.ISNULL, 621 TokenType.ILIKE, 622 TokenType.INSERT, 623 TokenType.LIKE, 624 TokenType.MERGE, 625 TokenType.NEXT, 626 TokenType.OFFSET, 627 TokenType.PRIMARY_KEY, 628 TokenType.RANGE, 629 TokenType.REPLACE, 630 TokenType.RLIKE, 631 TokenType.ROW, 632 TokenType.UNNEST, 633 TokenType.VAR, 634 TokenType.LEFT, 635 TokenType.RIGHT, 636 TokenType.SEQUENCE, 637 TokenType.DATE, 638 TokenType.DATETIME, 639 TokenType.TABLE, 640 TokenType.TIMESTAMP, 641 TokenType.TIMESTAMPTZ, 642 TokenType.TRUNCATE, 643 TokenType.WINDOW, 644 TokenType.XOR, 645 *TYPE_TOKENS, 646 *SUBQUERY_PREDICATES, 647 } 648 649 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.AND: exp.And, 651 } 652 653 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.COLON_EQ: exp.PropertyEQ, 655 } 656 657 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.OR: exp.Or, 659 } 660 661 EQUALITY = { 662 TokenType.EQ: exp.EQ, 663 TokenType.NEQ: exp.NEQ, 664 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 665 } 666 667 COMPARISON = { 668 TokenType.GT: exp.GT, 669 TokenType.GTE: exp.GTE, 670 TokenType.LT: exp.LT, 671 TokenType.LTE: exp.LTE, 672 } 673 674 BITWISE = { 675 TokenType.AMP: exp.BitwiseAnd, 676 TokenType.CARET: exp.BitwiseXor, 677 TokenType.PIPE: exp.BitwiseOr, 678 } 679 680 TERM = { 681 TokenType.DASH: exp.Sub, 682 TokenType.PLUS: exp.Add, 683 TokenType.MOD: exp.Mod, 684 TokenType.COLLATE: exp.Collate, 685 } 686 687 FACTOR = { 688 TokenType.DIV: exp.IntDiv, 689 TokenType.LR_ARROW: exp.Distance, 690 TokenType.SLASH: exp.Div, 691 TokenType.STAR: exp.Mul, 692 } 693 694 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 695 696 TIMES = { 697 TokenType.TIME, 698 TokenType.TIMETZ, 699 } 700 701 TIMESTAMPS = { 702 TokenType.TIMESTAMP, 703 TokenType.TIMESTAMPNTZ, 704 TokenType.TIMESTAMPTZ, 705 TokenType.TIMESTAMPLTZ, 706 *TIMES, 707 } 708 709 SET_OPERATIONS = { 710 TokenType.UNION, 711 TokenType.INTERSECT, 712 TokenType.EXCEPT, 713 } 714 715 JOIN_METHODS = { 716 TokenType.ASOF, 717 TokenType.NATURAL, 718 TokenType.POSITIONAL, 719 } 720 721 JOIN_SIDES = { 722 TokenType.LEFT, 723 TokenType.RIGHT, 724 TokenType.FULL, 725 } 726 727 JOIN_KINDS = { 728 TokenType.ANTI, 729 TokenType.CROSS, 730 TokenType.INNER, 731 TokenType.OUTER, 732 TokenType.SEMI, 733 TokenType.STRAIGHT_JOIN, 734 } 735 736 JOIN_HINTS: t.Set[str] = set() 737 738 LAMBDAS = { 739 TokenType.ARROW: lambda self, expressions: self.expression( 740 exp.Lambda, 741 this=self._replace_lambda( 742 self._parse_assignment(), 743 expressions, 744 ), 745 expressions=expressions, 746 ), 747 TokenType.FARROW: lambda self, expressions: self.expression( 748 exp.Kwarg, 749 this=exp.var(expressions[0].name), 750 expression=self._parse_assignment(), 751 ), 752 } 753 754 COLUMN_OPERATORS = { 755 TokenType.DOT: None, 756 TokenType.DOTCOLON: lambda self, this, to: self.expression( 757 exp.JSONCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.DCOLON: lambda self, this, to: self.expression( 762 exp.Cast if self.STRICT_CAST else exp.TryCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1141 klass = ( 1142 exp.PartitionedByBucket 1143 if self._prev.text.upper() == "BUCKET" 1144 else exp.PartitionByTruncate 1145 ) 1146 1147 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1148 this, expression = seq_get(args, 0), seq_get(args, 1) 1149 1150 if isinstance(this, exp.Literal): 1151 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1152 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1153 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1154 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1155 # 1156 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1157 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1158 this, expression = expression, this 1159 1160 return self.expression(klass, this=this, expression=expression) 1161 1162 ALTER_PARSERS = { 1163 "ADD": lambda self: self._parse_alter_table_add(), 1164 "AS": lambda self: self._parse_select(), 1165 "ALTER": lambda self: self._parse_alter_table_alter(), 1166 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1167 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1168 "DROP": lambda self: self._parse_alter_table_drop(), 1169 "RENAME": lambda self: self._parse_alter_table_rename(), 1170 "SET": lambda self: self._parse_alter_table_set(), 1171 "SWAP": lambda self: self.expression( 1172 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1173 ), 1174 } 1175 1176 ALTER_ALTER_PARSERS = { 1177 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1178 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1179 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1180 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1181 } 1182 1183 SCHEMA_UNNAMED_CONSTRAINTS = { 1184 "CHECK", 1185 "EXCLUDE", 1186 "FOREIGN KEY", 1187 "LIKE", 1188 "PERIOD", 1189 "PRIMARY KEY", 1190 "UNIQUE", 1191 "WATERMARK", 1192 "BUCKET", 1193 "TRUNCATE", 1194 } 1195 1196 NO_PAREN_FUNCTION_PARSERS = { 1197 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1198 "CASE": lambda self: self._parse_case(), 1199 "CONNECT_BY_ROOT": lambda self: self.expression( 1200 exp.ConnectByRoot, this=self._parse_column() 1201 ), 1202 "IF": lambda self: self._parse_if(), 1203 } 1204 1205 INVALID_FUNC_NAME_TOKENS = { 1206 TokenType.IDENTIFIER, 1207 TokenType.STRING, 1208 } 1209 1210 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1211 1212 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1213 1214 FUNCTION_PARSERS = { 1215 **{ 1216 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1217 }, 1218 **{ 1219 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1220 }, 1221 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1222 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1223 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1224 "DECODE": lambda self: self._parse_decode(), 1225 "EXTRACT": lambda self: self._parse_extract(), 1226 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1227 "GAP_FILL": lambda self: self._parse_gap_fill(), 1228 "JSON_OBJECT": lambda self: self._parse_json_object(), 1229 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1230 "JSON_TABLE": lambda self: self._parse_json_table(), 1231 "MATCH": lambda self: self._parse_match_against(), 1232 "NORMALIZE": lambda self: self._parse_normalize(), 1233 "OPENJSON": lambda self: self._parse_open_json(), 1234 "OVERLAY": lambda self: self._parse_overlay(), 1235 "POSITION": lambda self: self._parse_position(), 1236 "PREDICT": lambda self: self._parse_predict(), 1237 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1238 "STRING_AGG": lambda self: self._parse_string_agg(), 1239 "SUBSTRING": lambda self: self._parse_substring(), 1240 "TRIM": lambda self: self._parse_trim(), 1241 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1242 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1243 "XMLELEMENT": lambda self: self.expression( 1244 exp.XMLElement, 1245 this=self._match_text_seq("NAME") and self._parse_id_var(), 1246 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1247 ), 1248 "XMLTABLE": lambda self: self._parse_xml_table(), 1249 } 1250 1251 QUERY_MODIFIER_PARSERS = { 1252 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1253 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1254 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1255 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1256 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1257 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1258 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1259 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1260 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1261 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1262 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1263 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1264 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1265 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.CLUSTER_BY: lambda self: ( 1268 "cluster", 1269 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1270 ), 1271 TokenType.DISTRIBUTE_BY: lambda self: ( 1272 "distribute", 1273 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1274 ), 1275 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1276 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1277 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1278 } 1279 1280 SET_PARSERS = { 1281 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1282 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1283 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1284 "TRANSACTION": lambda self: self._parse_set_transaction(), 1285 } 1286 1287 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1288 1289 TYPE_LITERAL_PARSERS = { 1290 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1291 } 1292 1293 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1294 1295 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1296 1297 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1298 1299 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1300 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1301 "ISOLATION": ( 1302 ("LEVEL", "REPEATABLE", "READ"), 1303 ("LEVEL", "READ", "COMMITTED"), 1304 ("LEVEL", "READ", "UNCOMITTED"), 1305 ("LEVEL", "SERIALIZABLE"), 1306 ), 1307 "READ": ("WRITE", "ONLY"), 1308 } 1309 1310 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1311 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1312 ) 1313 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1314 1315 CREATE_SEQUENCE: OPTIONS_TYPE = { 1316 "SCALE": ("EXTEND", "NOEXTEND"), 1317 "SHARD": ("EXTEND", "NOEXTEND"), 1318 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1319 **dict.fromkeys( 1320 ( 1321 "SESSION", 1322 "GLOBAL", 1323 "KEEP", 1324 "NOKEEP", 1325 "ORDER", 1326 "NOORDER", 1327 "NOCACHE", 1328 "CYCLE", 1329 "NOCYCLE", 1330 "NOMINVALUE", 1331 "NOMAXVALUE", 1332 "NOSCALE", 1333 "NOSHARD", 1334 ), 1335 tuple(), 1336 ), 1337 } 1338 1339 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1340 1341 USABLES: OPTIONS_TYPE = dict.fromkeys( 1342 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1343 ) 1344 1345 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1346 1347 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1348 "TYPE": ("EVOLUTION",), 1349 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1350 } 1351 1352 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1353 1354 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1355 1356 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1357 "NOT": ("ENFORCED",), 1358 "MATCH": ( 1359 "FULL", 1360 "PARTIAL", 1361 "SIMPLE", 1362 ), 1363 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1364 "USING": ( 1365 "BTREE", 1366 "HASH", 1367 ), 1368 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1369 } 1370 1371 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1372 "NO": ("OTHERS",), 1373 "CURRENT": ("ROW",), 1374 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1375 } 1376 1377 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1378 1379 CLONE_KEYWORDS = {"CLONE", "COPY"} 1380 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1381 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1382 1383 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1384 1385 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1386 1387 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1388 1389 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1390 1391 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1392 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1393 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1394 1395 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1396 1397 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1398 1399 ADD_CONSTRAINT_TOKENS = { 1400 TokenType.CONSTRAINT, 1401 TokenType.FOREIGN_KEY, 1402 TokenType.INDEX, 1403 TokenType.KEY, 1404 TokenType.PRIMARY_KEY, 1405 TokenType.UNIQUE, 1406 } 1407 1408 DISTINCT_TOKENS = {TokenType.DISTINCT} 1409 1410 NULL_TOKENS = {TokenType.NULL} 1411 1412 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1413 1414 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1415 1416 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1417 1418 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1419 1420 ODBC_DATETIME_LITERALS = { 1421 "d": exp.Date, 1422 "t": exp.Time, 1423 "ts": exp.Timestamp, 1424 } 1425 1426 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1427 1428 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1429 1430 # The style options for the DESCRIBE statement 1431 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1432 1433 # The style options for the ANALYZE statement 1434 ANALYZE_STYLES = { 1435 "BUFFER_USAGE_LIMIT", 1436 "FULL", 1437 "LOCAL", 1438 "NO_WRITE_TO_BINLOG", 1439 "SAMPLE", 1440 "SKIP_LOCKED", 1441 "VERBOSE", 1442 } 1443 1444 ANALYZE_EXPRESSION_PARSERS = { 1445 "ALL": lambda self: self._parse_analyze_columns(), 1446 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1447 "DELETE": lambda self: self._parse_analyze_delete(), 1448 "DROP": lambda self: self._parse_analyze_histogram(), 1449 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1450 "LIST": lambda self: self._parse_analyze_list(), 1451 "PREDICATE": lambda self: self._parse_analyze_columns(), 1452 "UPDATE": lambda self: self._parse_analyze_histogram(), 1453 "VALIDATE": lambda self: self._parse_analyze_validate(), 1454 } 1455 1456 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1457 1458 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1459 1460 OPERATION_MODIFIERS: t.Set[str] = set() 1461 1462 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1463 1464 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1465 1466 STRICT_CAST = True 1467 1468 PREFIXED_PIVOT_COLUMNS = False 1469 IDENTIFY_PIVOT_STRINGS = False 1470 1471 LOG_DEFAULTS_TO_LN = False 1472 1473 # Whether the table sample clause expects CSV syntax 1474 TABLESAMPLE_CSV = False 1475 1476 # The default method used for table sampling 1477 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1478 1479 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1480 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1481 1482 # Whether the TRIM function expects the characters to trim as its first argument 1483 TRIM_PATTERN_FIRST = False 1484 1485 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1486 STRING_ALIASES = False 1487 1488 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1489 MODIFIERS_ATTACHED_TO_SET_OP = True 1490 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1491 1492 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1493 NO_PAREN_IF_COMMANDS = True 1494 1495 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1496 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1497 1498 # Whether the `:` operator is used to extract a value from a VARIANT column 1499 COLON_IS_VARIANT_EXTRACT = False 1500 1501 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1502 # If this is True and '(' is not found, the keyword will be treated as an identifier 1503 VALUES_FOLLOWED_BY_PAREN = True 1504 1505 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1506 SUPPORTS_IMPLICIT_UNNEST = False 1507 1508 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1509 INTERVAL_SPANS = True 1510 1511 # Whether a PARTITION clause can follow a table reference 1512 SUPPORTS_PARTITION_SELECTION = False 1513 1514 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1515 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1516 1517 # Whether the 'AS' keyword is optional in the CTE definition syntax 1518 OPTIONAL_ALIAS_TOKEN_CTE = True 1519 1520 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1521 ALTER_RENAME_REQUIRES_COLUMN = True 1522 1523 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1524 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1525 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1526 # as BigQuery, where all joins have the same precedence. 1527 JOINS_HAVE_EQUAL_PRECEDENCE = False 1528 1529 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1530 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1531 1532 __slots__ = ( 1533 "error_level", 1534 "error_message_context", 1535 "max_errors", 1536 "dialect", 1537 "sql", 1538 "errors", 1539 "_tokens", 1540 "_index", 1541 "_curr", 1542 "_next", 1543 "_prev", 1544 "_prev_comments", 1545 "_pipe_cte_counter", 1546 ) 1547 1548 # Autofilled 1549 SHOW_TRIE: t.Dict = {} 1550 SET_TRIE: t.Dict = {} 1551 1552 def __init__( 1553 self, 1554 error_level: t.Optional[ErrorLevel] = None, 1555 error_message_context: int = 100, 1556 max_errors: int = 3, 1557 dialect: DialectType = None, 1558 ): 1559 from sqlglot.dialects import Dialect 1560 1561 self.error_level = error_level or ErrorLevel.IMMEDIATE 1562 self.error_message_context = error_message_context 1563 self.max_errors = max_errors 1564 self.dialect = Dialect.get_or_raise(dialect) 1565 self.reset() 1566 1567 def reset(self): 1568 self.sql = "" 1569 self.errors = [] 1570 self._tokens = [] 1571 self._index = 0 1572 self._curr = None 1573 self._next = None 1574 self._prev = None 1575 self._prev_comments = None 1576 self._pipe_cte_counter = 0 1577 1578 def parse( 1579 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1580 ) -> t.List[t.Optional[exp.Expression]]: 1581 """ 1582 Parses a list of tokens and returns a list of syntax trees, one tree 1583 per parsed SQL statement. 1584 1585 Args: 1586 raw_tokens: The list of tokens. 1587 sql: The original SQL string, used to produce helpful debug messages. 1588 1589 Returns: 1590 The list of the produced syntax trees. 1591 """ 1592 return self._parse( 1593 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1594 ) 1595 1596 def parse_into( 1597 self, 1598 expression_types: exp.IntoType, 1599 raw_tokens: t.List[Token], 1600 sql: t.Optional[str] = None, 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens into a given Expression type. If a collection of Expression 1604 types is given instead, this method will try to parse the token list into each one 1605 of them, stopping at the first for which the parsing succeeds. 1606 1607 Args: 1608 expression_types: The expression type(s) to try and parse the token list into. 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The target Expression. 1614 """ 1615 errors = [] 1616 for expression_type in ensure_list(expression_types): 1617 parser = self.EXPRESSION_PARSERS.get(expression_type) 1618 if not parser: 1619 raise TypeError(f"No parser registered for {expression_type}") 1620 1621 try: 1622 return self._parse(parser, raw_tokens, sql) 1623 except ParseError as e: 1624 e.errors[0]["into_expression"] = expression_type 1625 errors.append(e) 1626 1627 raise ParseError( 1628 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1629 errors=merge_errors(errors), 1630 ) from errors[-1] 1631 1632 def _parse( 1633 self, 1634 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1635 raw_tokens: t.List[Token], 1636 sql: t.Optional[str] = None, 1637 ) -> t.List[t.Optional[exp.Expression]]: 1638 self.reset() 1639 self.sql = sql or "" 1640 1641 total = len(raw_tokens) 1642 chunks: t.List[t.List[Token]] = [[]] 1643 1644 for i, token in enumerate(raw_tokens): 1645 if token.token_type == TokenType.SEMICOLON: 1646 if token.comments: 1647 chunks.append([token]) 1648 1649 if i < total - 1: 1650 chunks.append([]) 1651 else: 1652 chunks[-1].append(token) 1653 1654 expressions = [] 1655 1656 for tokens in chunks: 1657 self._index = -1 1658 self._tokens = tokens 1659 self._advance() 1660 1661 expressions.append(parse_method(self)) 1662 1663 if self._index < len(self._tokens): 1664 self.raise_error("Invalid expression / Unexpected token") 1665 1666 self.check_errors() 1667 1668 return expressions 1669 1670 def check_errors(self) -> None: 1671 """Logs or raises any found errors, depending on the chosen error level setting.""" 1672 if self.error_level == ErrorLevel.WARN: 1673 for error in self.errors: 1674 logger.error(str(error)) 1675 elif self.error_level == ErrorLevel.RAISE and self.errors: 1676 raise ParseError( 1677 concat_messages(self.errors, self.max_errors), 1678 errors=merge_errors(self.errors), 1679 ) 1680 1681 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1682 """ 1683 Appends an error in the list of recorded errors or raises it, depending on the chosen 1684 error level setting. 1685 """ 1686 token = token or self._curr or self._prev or Token.string("") 1687 start = token.start 1688 end = token.end + 1 1689 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1690 highlight = self.sql[start:end] 1691 end_context = self.sql[end : end + self.error_message_context] 1692 1693 error = ParseError.new( 1694 f"{message}. Line {token.line}, Col: {token.col}.\n" 1695 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1696 description=message, 1697 line=token.line, 1698 col=token.col, 1699 start_context=start_context, 1700 highlight=highlight, 1701 end_context=end_context, 1702 ) 1703 1704 if self.error_level == ErrorLevel.IMMEDIATE: 1705 raise error 1706 1707 self.errors.append(error) 1708 1709 def expression( 1710 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1711 ) -> E: 1712 """ 1713 Creates a new, validated Expression. 1714 1715 Args: 1716 exp_class: The expression class to instantiate. 1717 comments: An optional list of comments to attach to the expression. 1718 kwargs: The arguments to set for the expression along with their respective values. 1719 1720 Returns: 1721 The target expression. 1722 """ 1723 instance = exp_class(**kwargs) 1724 instance.add_comments(comments) if comments else self._add_comments(instance) 1725 return self.validate_expression(instance) 1726 1727 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1728 if expression and self._prev_comments: 1729 expression.add_comments(self._prev_comments) 1730 self._prev_comments = None 1731 1732 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1733 """ 1734 Validates an Expression, making sure that all its mandatory arguments are set. 1735 1736 Args: 1737 expression: The expression to validate. 1738 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1739 1740 Returns: 1741 The validated expression. 1742 """ 1743 if self.error_level != ErrorLevel.IGNORE: 1744 for error_message in expression.error_messages(args): 1745 self.raise_error(error_message) 1746 1747 return expression 1748 1749 def _find_sql(self, start: Token, end: Token) -> str: 1750 return self.sql[start.start : end.end + 1] 1751 1752 def _is_connected(self) -> bool: 1753 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1754 1755 def _advance(self, times: int = 1) -> None: 1756 self._index += times 1757 self._curr = seq_get(self._tokens, self._index) 1758 self._next = seq_get(self._tokens, self._index + 1) 1759 1760 if self._index > 0: 1761 self._prev = self._tokens[self._index - 1] 1762 self._prev_comments = self._prev.comments 1763 else: 1764 self._prev = None 1765 self._prev_comments = None 1766 1767 def _retreat(self, index: int) -> None: 1768 if index != self._index: 1769 self._advance(index - self._index) 1770 1771 def _warn_unsupported(self) -> None: 1772 if len(self._tokens) <= 1: 1773 return 1774 1775 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1776 # interested in emitting a warning for the one being currently processed. 1777 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1778 1779 logger.warning( 1780 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1781 ) 1782 1783 def _parse_command(self) -> exp.Command: 1784 self._warn_unsupported() 1785 return self.expression( 1786 exp.Command, 1787 comments=self._prev_comments, 1788 this=self._prev.text.upper(), 1789 expression=self._parse_string(), 1790 ) 1791 1792 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1793 """ 1794 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1795 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1796 solve this by setting & resetting the parser state accordingly 1797 """ 1798 index = self._index 1799 error_level = self.error_level 1800 1801 self.error_level = ErrorLevel.IMMEDIATE 1802 try: 1803 this = parse_method() 1804 except ParseError: 1805 this = None 1806 finally: 1807 if not this or retreat: 1808 self._retreat(index) 1809 self.error_level = error_level 1810 1811 return this 1812 1813 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1814 start = self._prev 1815 exists = self._parse_exists() if allow_exists else None 1816 1817 self._match(TokenType.ON) 1818 1819 materialized = self._match_text_seq("MATERIALIZED") 1820 kind = self._match_set(self.CREATABLES) and self._prev 1821 if not kind: 1822 return self._parse_as_command(start) 1823 1824 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1825 this = self._parse_user_defined_function(kind=kind.token_type) 1826 elif kind.token_type == TokenType.TABLE: 1827 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1828 elif kind.token_type == TokenType.COLUMN: 1829 this = self._parse_column() 1830 else: 1831 this = self._parse_id_var() 1832 1833 self._match(TokenType.IS) 1834 1835 return self.expression( 1836 exp.Comment, 1837 this=this, 1838 kind=kind.text, 1839 expression=self._parse_string(), 1840 exists=exists, 1841 materialized=materialized, 1842 ) 1843 1844 def _parse_to_table( 1845 self, 1846 ) -> exp.ToTableProperty: 1847 table = self._parse_table_parts(schema=True) 1848 return self.expression(exp.ToTableProperty, this=table) 1849 1850 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1851 def _parse_ttl(self) -> exp.Expression: 1852 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1853 this = self._parse_bitwise() 1854 1855 if self._match_text_seq("DELETE"): 1856 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1857 if self._match_text_seq("RECOMPRESS"): 1858 return self.expression( 1859 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1860 ) 1861 if self._match_text_seq("TO", "DISK"): 1862 return self.expression( 1863 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1864 ) 1865 if self._match_text_seq("TO", "VOLUME"): 1866 return self.expression( 1867 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1868 ) 1869 1870 return this 1871 1872 expressions = self._parse_csv(_parse_ttl_action) 1873 where = self._parse_where() 1874 group = self._parse_group() 1875 1876 aggregates = None 1877 if group and self._match(TokenType.SET): 1878 aggregates = self._parse_csv(self._parse_set_item) 1879 1880 return self.expression( 1881 exp.MergeTreeTTL, 1882 expressions=expressions, 1883 where=where, 1884 group=group, 1885 aggregates=aggregates, 1886 ) 1887 1888 def _parse_statement(self) -> t.Optional[exp.Expression]: 1889 if self._curr is None: 1890 return None 1891 1892 if self._match_set(self.STATEMENT_PARSERS): 1893 comments = self._prev_comments 1894 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1895 stmt.add_comments(comments, prepend=True) 1896 return stmt 1897 1898 if self._match_set(self.dialect.tokenizer.COMMANDS): 1899 return self._parse_command() 1900 1901 expression = self._parse_expression() 1902 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1903 return self._parse_query_modifiers(expression) 1904 1905 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1906 start = self._prev 1907 temporary = self._match(TokenType.TEMPORARY) 1908 materialized = self._match_text_seq("MATERIALIZED") 1909 1910 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1911 if not kind: 1912 return self._parse_as_command(start) 1913 1914 concurrently = self._match_text_seq("CONCURRENTLY") 1915 if_exists = exists or self._parse_exists() 1916 1917 if kind == "COLUMN": 1918 this = self._parse_column() 1919 else: 1920 this = self._parse_table_parts( 1921 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1922 ) 1923 1924 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1925 1926 if self._match(TokenType.L_PAREN, advance=False): 1927 expressions = self._parse_wrapped_csv(self._parse_types) 1928 else: 1929 expressions = None 1930 1931 return self.expression( 1932 exp.Drop, 1933 exists=if_exists, 1934 this=this, 1935 expressions=expressions, 1936 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1937 temporary=temporary, 1938 materialized=materialized, 1939 cascade=self._match_text_seq("CASCADE"), 1940 constraints=self._match_text_seq("CONSTRAINTS"), 1941 purge=self._match_text_seq("PURGE"), 1942 cluster=cluster, 1943 concurrently=concurrently, 1944 ) 1945 1946 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1947 return ( 1948 self._match_text_seq("IF") 1949 and (not not_ or self._match(TokenType.NOT)) 1950 and self._match(TokenType.EXISTS) 1951 ) 1952 1953 def _parse_create(self) -> exp.Create | exp.Command: 1954 # Note: this can't be None because we've matched a statement parser 1955 start = self._prev 1956 1957 replace = ( 1958 start.token_type == TokenType.REPLACE 1959 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1960 or self._match_pair(TokenType.OR, TokenType.ALTER) 1961 ) 1962 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1963 1964 unique = self._match(TokenType.UNIQUE) 1965 1966 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1967 clustered = True 1968 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1969 "COLUMNSTORE" 1970 ): 1971 clustered = False 1972 else: 1973 clustered = None 1974 1975 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1976 self._advance() 1977 1978 properties = None 1979 create_token = self._match_set(self.CREATABLES) and self._prev 1980 1981 if not create_token: 1982 # exp.Properties.Location.POST_CREATE 1983 properties = self._parse_properties() 1984 create_token = self._match_set(self.CREATABLES) and self._prev 1985 1986 if not properties or not create_token: 1987 return self._parse_as_command(start) 1988 1989 concurrently = self._match_text_seq("CONCURRENTLY") 1990 exists = self._parse_exists(not_=True) 1991 this = None 1992 expression: t.Optional[exp.Expression] = None 1993 indexes = None 1994 no_schema_binding = None 1995 begin = None 1996 end = None 1997 clone = None 1998 1999 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2000 nonlocal properties 2001 if properties and temp_props: 2002 properties.expressions.extend(temp_props.expressions) 2003 elif temp_props: 2004 properties = temp_props 2005 2006 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2007 this = self._parse_user_defined_function(kind=create_token.token_type) 2008 2009 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2010 extend_props(self._parse_properties()) 2011 2012 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2013 extend_props(self._parse_properties()) 2014 2015 if not expression: 2016 if self._match(TokenType.COMMAND): 2017 expression = self._parse_as_command(self._prev) 2018 else: 2019 begin = self._match(TokenType.BEGIN) 2020 return_ = self._match_text_seq("RETURN") 2021 2022 if self._match(TokenType.STRING, advance=False): 2023 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2024 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2025 expression = self._parse_string() 2026 extend_props(self._parse_properties()) 2027 else: 2028 expression = self._parse_user_defined_function_expression() 2029 2030 end = self._match_text_seq("END") 2031 2032 if return_: 2033 expression = self.expression(exp.Return, this=expression) 2034 elif create_token.token_type == TokenType.INDEX: 2035 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2036 if not self._match(TokenType.ON): 2037 index = self._parse_id_var() 2038 anonymous = False 2039 else: 2040 index = None 2041 anonymous = True 2042 2043 this = self._parse_index(index=index, anonymous=anonymous) 2044 elif create_token.token_type in self.DB_CREATABLES: 2045 table_parts = self._parse_table_parts( 2046 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2047 ) 2048 2049 # exp.Properties.Location.POST_NAME 2050 self._match(TokenType.COMMA) 2051 extend_props(self._parse_properties(before=True)) 2052 2053 this = self._parse_schema(this=table_parts) 2054 2055 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2056 extend_props(self._parse_properties()) 2057 2058 has_alias = self._match(TokenType.ALIAS) 2059 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2060 # exp.Properties.Location.POST_ALIAS 2061 extend_props(self._parse_properties()) 2062 2063 if create_token.token_type == TokenType.SEQUENCE: 2064 expression = self._parse_types() 2065 extend_props(self._parse_properties()) 2066 else: 2067 expression = self._parse_ddl_select() 2068 2069 # Some dialects also support using a table as an alias instead of a SELECT. 2070 # Here we fallback to this as an alternative. 2071 if not expression and has_alias: 2072 expression = self._try_parse(self._parse_table_parts) 2073 2074 if create_token.token_type == TokenType.TABLE: 2075 # exp.Properties.Location.POST_EXPRESSION 2076 extend_props(self._parse_properties()) 2077 2078 indexes = [] 2079 while True: 2080 index = self._parse_index() 2081 2082 # exp.Properties.Location.POST_INDEX 2083 extend_props(self._parse_properties()) 2084 if not index: 2085 break 2086 else: 2087 self._match(TokenType.COMMA) 2088 indexes.append(index) 2089 elif create_token.token_type == TokenType.VIEW: 2090 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2091 no_schema_binding = True 2092 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2093 extend_props(self._parse_properties()) 2094 2095 shallow = self._match_text_seq("SHALLOW") 2096 2097 if self._match_texts(self.CLONE_KEYWORDS): 2098 copy = self._prev.text.lower() == "copy" 2099 clone = self.expression( 2100 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2101 ) 2102 2103 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2104 return self._parse_as_command(start) 2105 2106 create_kind_text = create_token.text.upper() 2107 return self.expression( 2108 exp.Create, 2109 this=this, 2110 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2111 replace=replace, 2112 refresh=refresh, 2113 unique=unique, 2114 expression=expression, 2115 exists=exists, 2116 properties=properties, 2117 indexes=indexes, 2118 no_schema_binding=no_schema_binding, 2119 begin=begin, 2120 end=end, 2121 clone=clone, 2122 concurrently=concurrently, 2123 clustered=clustered, 2124 ) 2125 2126 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2127 seq = exp.SequenceProperties() 2128 2129 options = [] 2130 index = self._index 2131 2132 while self._curr: 2133 self._match(TokenType.COMMA) 2134 if self._match_text_seq("INCREMENT"): 2135 self._match_text_seq("BY") 2136 self._match_text_seq("=") 2137 seq.set("increment", self._parse_term()) 2138 elif self._match_text_seq("MINVALUE"): 2139 seq.set("minvalue", self._parse_term()) 2140 elif self._match_text_seq("MAXVALUE"): 2141 seq.set("maxvalue", self._parse_term()) 2142 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2143 self._match_text_seq("=") 2144 seq.set("start", self._parse_term()) 2145 elif self._match_text_seq("CACHE"): 2146 # T-SQL allows empty CACHE which is initialized dynamically 2147 seq.set("cache", self._parse_number() or True) 2148 elif self._match_text_seq("OWNED", "BY"): 2149 # "OWNED BY NONE" is the default 2150 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2151 else: 2152 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2153 if opt: 2154 options.append(opt) 2155 else: 2156 break 2157 2158 seq.set("options", options if options else None) 2159 return None if self._index == index else seq 2160 2161 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2162 # only used for teradata currently 2163 self._match(TokenType.COMMA) 2164 2165 kwargs = { 2166 "no": self._match_text_seq("NO"), 2167 "dual": self._match_text_seq("DUAL"), 2168 "before": self._match_text_seq("BEFORE"), 2169 "default": self._match_text_seq("DEFAULT"), 2170 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2171 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2172 "after": self._match_text_seq("AFTER"), 2173 "minimum": self._match_texts(("MIN", "MINIMUM")), 2174 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2175 } 2176 2177 if self._match_texts(self.PROPERTY_PARSERS): 2178 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2179 try: 2180 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2181 except TypeError: 2182 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2183 2184 return None 2185 2186 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2187 return self._parse_wrapped_csv(self._parse_property) 2188 2189 def _parse_property(self) -> t.Optional[exp.Expression]: 2190 if self._match_texts(self.PROPERTY_PARSERS): 2191 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2192 2193 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2194 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2195 2196 if self._match_text_seq("COMPOUND", "SORTKEY"): 2197 return self._parse_sortkey(compound=True) 2198 2199 if self._match_text_seq("SQL", "SECURITY"): 2200 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2201 2202 index = self._index 2203 key = self._parse_column() 2204 2205 if not self._match(TokenType.EQ): 2206 self._retreat(index) 2207 return self._parse_sequence_properties() 2208 2209 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2210 if isinstance(key, exp.Column): 2211 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2212 2213 value = self._parse_bitwise() or self._parse_var(any_token=True) 2214 2215 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2216 if isinstance(value, exp.Column): 2217 value = exp.var(value.name) 2218 2219 return self.expression(exp.Property, this=key, value=value) 2220 2221 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2222 if self._match_text_seq("BY"): 2223 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2224 2225 self._match(TokenType.ALIAS) 2226 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2227 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2228 2229 return self.expression( 2230 exp.FileFormatProperty, 2231 this=( 2232 self.expression( 2233 exp.InputOutputFormat, 2234 input_format=input_format, 2235 output_format=output_format, 2236 ) 2237 if input_format or output_format 2238 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2239 ), 2240 ) 2241 2242 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2243 field = self._parse_field() 2244 if isinstance(field, exp.Identifier) and not field.quoted: 2245 field = exp.var(field) 2246 2247 return field 2248 2249 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2250 self._match(TokenType.EQ) 2251 self._match(TokenType.ALIAS) 2252 2253 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2254 2255 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2256 properties = [] 2257 while True: 2258 if before: 2259 prop = self._parse_property_before() 2260 else: 2261 prop = self._parse_property() 2262 if not prop: 2263 break 2264 for p in ensure_list(prop): 2265 properties.append(p) 2266 2267 if properties: 2268 return self.expression(exp.Properties, expressions=properties) 2269 2270 return None 2271 2272 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2273 return self.expression( 2274 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2275 ) 2276 2277 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2278 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2279 security_specifier = self._prev.text.upper() 2280 return self.expression(exp.SecurityProperty, this=security_specifier) 2281 return None 2282 2283 def _parse_settings_property(self) -> exp.SettingsProperty: 2284 return self.expression( 2285 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2286 ) 2287 2288 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2289 if self._index >= 2: 2290 pre_volatile_token = self._tokens[self._index - 2] 2291 else: 2292 pre_volatile_token = None 2293 2294 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2295 return exp.VolatileProperty() 2296 2297 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2298 2299 def _parse_retention_period(self) -> exp.Var: 2300 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2301 number = self._parse_number() 2302 number_str = f"{number} " if number else "" 2303 unit = self._parse_var(any_token=True) 2304 return exp.var(f"{number_str}{unit}") 2305 2306 def _parse_system_versioning_property( 2307 self, with_: bool = False 2308 ) -> exp.WithSystemVersioningProperty: 2309 self._match(TokenType.EQ) 2310 prop = self.expression( 2311 exp.WithSystemVersioningProperty, 2312 **{ # type: ignore 2313 "on": True, 2314 "with": with_, 2315 }, 2316 ) 2317 2318 if self._match_text_seq("OFF"): 2319 prop.set("on", False) 2320 return prop 2321 2322 self._match(TokenType.ON) 2323 if self._match(TokenType.L_PAREN): 2324 while self._curr and not self._match(TokenType.R_PAREN): 2325 if self._match_text_seq("HISTORY_TABLE", "="): 2326 prop.set("this", self._parse_table_parts()) 2327 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2328 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2329 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2330 prop.set("retention_period", self._parse_retention_period()) 2331 2332 self._match(TokenType.COMMA) 2333 2334 return prop 2335 2336 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2337 self._match(TokenType.EQ) 2338 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2339 prop = self.expression(exp.DataDeletionProperty, on=on) 2340 2341 if self._match(TokenType.L_PAREN): 2342 while self._curr and not self._match(TokenType.R_PAREN): 2343 if self._match_text_seq("FILTER_COLUMN", "="): 2344 prop.set("filter_column", self._parse_column()) 2345 elif self._match_text_seq("RETENTION_PERIOD", "="): 2346 prop.set("retention_period", self._parse_retention_period()) 2347 2348 self._match(TokenType.COMMA) 2349 2350 return prop 2351 2352 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2353 kind = "HASH" 2354 expressions: t.Optional[t.List[exp.Expression]] = None 2355 if self._match_text_seq("BY", "HASH"): 2356 expressions = self._parse_wrapped_csv(self._parse_id_var) 2357 elif self._match_text_seq("BY", "RANDOM"): 2358 kind = "RANDOM" 2359 2360 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2361 buckets: t.Optional[exp.Expression] = None 2362 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2363 buckets = self._parse_number() 2364 2365 return self.expression( 2366 exp.DistributedByProperty, 2367 expressions=expressions, 2368 kind=kind, 2369 buckets=buckets, 2370 order=self._parse_order(), 2371 ) 2372 2373 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2374 self._match_text_seq("KEY") 2375 expressions = self._parse_wrapped_id_vars() 2376 return self.expression(expr_type, expressions=expressions) 2377 2378 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2379 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2380 prop = self._parse_system_versioning_property(with_=True) 2381 self._match_r_paren() 2382 return prop 2383 2384 if self._match(TokenType.L_PAREN, advance=False): 2385 return self._parse_wrapped_properties() 2386 2387 if self._match_text_seq("JOURNAL"): 2388 return self._parse_withjournaltable() 2389 2390 if self._match_texts(self.VIEW_ATTRIBUTES): 2391 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2392 2393 if self._match_text_seq("DATA"): 2394 return self._parse_withdata(no=False) 2395 elif self._match_text_seq("NO", "DATA"): 2396 return self._parse_withdata(no=True) 2397 2398 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2399 return self._parse_serde_properties(with_=True) 2400 2401 if self._match(TokenType.SCHEMA): 2402 return self.expression( 2403 exp.WithSchemaBindingProperty, 2404 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2405 ) 2406 2407 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2408 return self.expression( 2409 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2410 ) 2411 2412 if not self._next: 2413 return None 2414 2415 return self._parse_withisolatedloading() 2416 2417 def _parse_procedure_option(self) -> exp.Expression | None: 2418 if self._match_text_seq("EXECUTE", "AS"): 2419 return self.expression( 2420 exp.ExecuteAsProperty, 2421 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2422 or self._parse_string(), 2423 ) 2424 2425 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2426 2427 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2428 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2429 self._match(TokenType.EQ) 2430 2431 user = self._parse_id_var() 2432 self._match(TokenType.PARAMETER) 2433 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2434 2435 if not user or not host: 2436 return None 2437 2438 return exp.DefinerProperty(this=f"{user}@{host}") 2439 2440 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2441 self._match(TokenType.TABLE) 2442 self._match(TokenType.EQ) 2443 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2444 2445 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2446 return self.expression(exp.LogProperty, no=no) 2447 2448 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2449 return self.expression(exp.JournalProperty, **kwargs) 2450 2451 def _parse_checksum(self) -> exp.ChecksumProperty: 2452 self._match(TokenType.EQ) 2453 2454 on = None 2455 if self._match(TokenType.ON): 2456 on = True 2457 elif self._match_text_seq("OFF"): 2458 on = False 2459 2460 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2461 2462 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2463 return self.expression( 2464 exp.Cluster, 2465 expressions=( 2466 self._parse_wrapped_csv(self._parse_ordered) 2467 if wrapped 2468 else self._parse_csv(self._parse_ordered) 2469 ), 2470 ) 2471 2472 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2473 self._match_text_seq("BY") 2474 2475 self._match_l_paren() 2476 expressions = self._parse_csv(self._parse_column) 2477 self._match_r_paren() 2478 2479 if self._match_text_seq("SORTED", "BY"): 2480 self._match_l_paren() 2481 sorted_by = self._parse_csv(self._parse_ordered) 2482 self._match_r_paren() 2483 else: 2484 sorted_by = None 2485 2486 self._match(TokenType.INTO) 2487 buckets = self._parse_number() 2488 self._match_text_seq("BUCKETS") 2489 2490 return self.expression( 2491 exp.ClusteredByProperty, 2492 expressions=expressions, 2493 sorted_by=sorted_by, 2494 buckets=buckets, 2495 ) 2496 2497 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2498 if not self._match_text_seq("GRANTS"): 2499 self._retreat(self._index - 1) 2500 return None 2501 2502 return self.expression(exp.CopyGrantsProperty) 2503 2504 def _parse_freespace(self) -> exp.FreespaceProperty: 2505 self._match(TokenType.EQ) 2506 return self.expression( 2507 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2508 ) 2509 2510 def _parse_mergeblockratio( 2511 self, no: bool = False, default: bool = False 2512 ) -> exp.MergeBlockRatioProperty: 2513 if self._match(TokenType.EQ): 2514 return self.expression( 2515 exp.MergeBlockRatioProperty, 2516 this=self._parse_number(), 2517 percent=self._match(TokenType.PERCENT), 2518 ) 2519 2520 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2521 2522 def _parse_datablocksize( 2523 self, 2524 default: t.Optional[bool] = None, 2525 minimum: t.Optional[bool] = None, 2526 maximum: t.Optional[bool] = None, 2527 ) -> exp.DataBlocksizeProperty: 2528 self._match(TokenType.EQ) 2529 size = self._parse_number() 2530 2531 units = None 2532 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2533 units = self._prev.text 2534 2535 return self.expression( 2536 exp.DataBlocksizeProperty, 2537 size=size, 2538 units=units, 2539 default=default, 2540 minimum=minimum, 2541 maximum=maximum, 2542 ) 2543 2544 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2545 self._match(TokenType.EQ) 2546 always = self._match_text_seq("ALWAYS") 2547 manual = self._match_text_seq("MANUAL") 2548 never = self._match_text_seq("NEVER") 2549 default = self._match_text_seq("DEFAULT") 2550 2551 autotemp = None 2552 if self._match_text_seq("AUTOTEMP"): 2553 autotemp = self._parse_schema() 2554 2555 return self.expression( 2556 exp.BlockCompressionProperty, 2557 always=always, 2558 manual=manual, 2559 never=never, 2560 default=default, 2561 autotemp=autotemp, 2562 ) 2563 2564 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2565 index = self._index 2566 no = self._match_text_seq("NO") 2567 concurrent = self._match_text_seq("CONCURRENT") 2568 2569 if not self._match_text_seq("ISOLATED", "LOADING"): 2570 self._retreat(index) 2571 return None 2572 2573 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2574 return self.expression( 2575 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2576 ) 2577 2578 def _parse_locking(self) -> exp.LockingProperty: 2579 if self._match(TokenType.TABLE): 2580 kind = "TABLE" 2581 elif self._match(TokenType.VIEW): 2582 kind = "VIEW" 2583 elif self._match(TokenType.ROW): 2584 kind = "ROW" 2585 elif self._match_text_seq("DATABASE"): 2586 kind = "DATABASE" 2587 else: 2588 kind = None 2589 2590 if kind in ("DATABASE", "TABLE", "VIEW"): 2591 this = self._parse_table_parts() 2592 else: 2593 this = None 2594 2595 if self._match(TokenType.FOR): 2596 for_or_in = "FOR" 2597 elif self._match(TokenType.IN): 2598 for_or_in = "IN" 2599 else: 2600 for_or_in = None 2601 2602 if self._match_text_seq("ACCESS"): 2603 lock_type = "ACCESS" 2604 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2605 lock_type = "EXCLUSIVE" 2606 elif self._match_text_seq("SHARE"): 2607 lock_type = "SHARE" 2608 elif self._match_text_seq("READ"): 2609 lock_type = "READ" 2610 elif self._match_text_seq("WRITE"): 2611 lock_type = "WRITE" 2612 elif self._match_text_seq("CHECKSUM"): 2613 lock_type = "CHECKSUM" 2614 else: 2615 lock_type = None 2616 2617 override = self._match_text_seq("OVERRIDE") 2618 2619 return self.expression( 2620 exp.LockingProperty, 2621 this=this, 2622 kind=kind, 2623 for_or_in=for_or_in, 2624 lock_type=lock_type, 2625 override=override, 2626 ) 2627 2628 def _parse_partition_by(self) -> t.List[exp.Expression]: 2629 if self._match(TokenType.PARTITION_BY): 2630 return self._parse_csv(self._parse_assignment) 2631 return [] 2632 2633 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2634 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2635 if self._match_text_seq("MINVALUE"): 2636 return exp.var("MINVALUE") 2637 if self._match_text_seq("MAXVALUE"): 2638 return exp.var("MAXVALUE") 2639 return self._parse_bitwise() 2640 2641 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2642 expression = None 2643 from_expressions = None 2644 to_expressions = None 2645 2646 if self._match(TokenType.IN): 2647 this = self._parse_wrapped_csv(self._parse_bitwise) 2648 elif self._match(TokenType.FROM): 2649 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2650 self._match_text_seq("TO") 2651 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2652 elif self._match_text_seq("WITH", "(", "MODULUS"): 2653 this = self._parse_number() 2654 self._match_text_seq(",", "REMAINDER") 2655 expression = self._parse_number() 2656 self._match_r_paren() 2657 else: 2658 self.raise_error("Failed to parse partition bound spec.") 2659 2660 return self.expression( 2661 exp.PartitionBoundSpec, 2662 this=this, 2663 expression=expression, 2664 from_expressions=from_expressions, 2665 to_expressions=to_expressions, 2666 ) 2667 2668 # https://www.postgresql.org/docs/current/sql-createtable.html 2669 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2670 if not self._match_text_seq("OF"): 2671 self._retreat(self._index - 1) 2672 return None 2673 2674 this = self._parse_table(schema=True) 2675 2676 if self._match(TokenType.DEFAULT): 2677 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2678 elif self._match_text_seq("FOR", "VALUES"): 2679 expression = self._parse_partition_bound_spec() 2680 else: 2681 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2682 2683 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2684 2685 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2686 self._match(TokenType.EQ) 2687 return self.expression( 2688 exp.PartitionedByProperty, 2689 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2690 ) 2691 2692 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2693 if self._match_text_seq("AND", "STATISTICS"): 2694 statistics = True 2695 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2696 statistics = False 2697 else: 2698 statistics = None 2699 2700 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2701 2702 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2703 if self._match_text_seq("SQL"): 2704 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2705 return None 2706 2707 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2708 if self._match_text_seq("SQL", "DATA"): 2709 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2710 return None 2711 2712 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2713 if self._match_text_seq("PRIMARY", "INDEX"): 2714 return exp.NoPrimaryIndexProperty() 2715 if self._match_text_seq("SQL"): 2716 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2717 return None 2718 2719 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2720 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2721 return exp.OnCommitProperty() 2722 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2723 return exp.OnCommitProperty(delete=True) 2724 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2725 2726 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2727 if self._match_text_seq("SQL", "DATA"): 2728 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2729 return None 2730 2731 def _parse_distkey(self) -> exp.DistKeyProperty: 2732 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2733 2734 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2735 table = self._parse_table(schema=True) 2736 2737 options = [] 2738 while self._match_texts(("INCLUDING", "EXCLUDING")): 2739 this = self._prev.text.upper() 2740 2741 id_var = self._parse_id_var() 2742 if not id_var: 2743 return None 2744 2745 options.append( 2746 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2747 ) 2748 2749 return self.expression(exp.LikeProperty, this=table, expressions=options) 2750 2751 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2752 return self.expression( 2753 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2754 ) 2755 2756 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2757 self._match(TokenType.EQ) 2758 return self.expression( 2759 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2760 ) 2761 2762 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2763 self._match_text_seq("WITH", "CONNECTION") 2764 return self.expression( 2765 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2766 ) 2767 2768 def _parse_returns(self) -> exp.ReturnsProperty: 2769 value: t.Optional[exp.Expression] 2770 null = None 2771 is_table = self._match(TokenType.TABLE) 2772 2773 if is_table: 2774 if self._match(TokenType.LT): 2775 value = self.expression( 2776 exp.Schema, 2777 this="TABLE", 2778 expressions=self._parse_csv(self._parse_struct_types), 2779 ) 2780 if not self._match(TokenType.GT): 2781 self.raise_error("Expecting >") 2782 else: 2783 value = self._parse_schema(exp.var("TABLE")) 2784 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2785 null = True 2786 value = None 2787 else: 2788 value = self._parse_types() 2789 2790 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2791 2792 def _parse_describe(self) -> exp.Describe: 2793 kind = self._match_set(self.CREATABLES) and self._prev.text 2794 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2795 if self._match(TokenType.DOT): 2796 style = None 2797 self._retreat(self._index - 2) 2798 2799 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2800 2801 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2802 this = self._parse_statement() 2803 else: 2804 this = self._parse_table(schema=True) 2805 2806 properties = self._parse_properties() 2807 expressions = properties.expressions if properties else None 2808 partition = self._parse_partition() 2809 return self.expression( 2810 exp.Describe, 2811 this=this, 2812 style=style, 2813 kind=kind, 2814 expressions=expressions, 2815 partition=partition, 2816 format=format, 2817 ) 2818 2819 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2820 kind = self._prev.text.upper() 2821 expressions = [] 2822 2823 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2824 if self._match(TokenType.WHEN): 2825 expression = self._parse_disjunction() 2826 self._match(TokenType.THEN) 2827 else: 2828 expression = None 2829 2830 else_ = self._match(TokenType.ELSE) 2831 2832 if not self._match(TokenType.INTO): 2833 return None 2834 2835 return self.expression( 2836 exp.ConditionalInsert, 2837 this=self.expression( 2838 exp.Insert, 2839 this=self._parse_table(schema=True), 2840 expression=self._parse_derived_table_values(), 2841 ), 2842 expression=expression, 2843 else_=else_, 2844 ) 2845 2846 expression = parse_conditional_insert() 2847 while expression is not None: 2848 expressions.append(expression) 2849 expression = parse_conditional_insert() 2850 2851 return self.expression( 2852 exp.MultitableInserts, 2853 kind=kind, 2854 comments=comments, 2855 expressions=expressions, 2856 source=self._parse_table(), 2857 ) 2858 2859 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2860 comments = [] 2861 hint = self._parse_hint() 2862 overwrite = self._match(TokenType.OVERWRITE) 2863 ignore = self._match(TokenType.IGNORE) 2864 local = self._match_text_seq("LOCAL") 2865 alternative = None 2866 is_function = None 2867 2868 if self._match_text_seq("DIRECTORY"): 2869 this: t.Optional[exp.Expression] = self.expression( 2870 exp.Directory, 2871 this=self._parse_var_or_string(), 2872 local=local, 2873 row_format=self._parse_row_format(match_row=True), 2874 ) 2875 else: 2876 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2877 comments += ensure_list(self._prev_comments) 2878 return self._parse_multitable_inserts(comments) 2879 2880 if self._match(TokenType.OR): 2881 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2882 2883 self._match(TokenType.INTO) 2884 comments += ensure_list(self._prev_comments) 2885 self._match(TokenType.TABLE) 2886 is_function = self._match(TokenType.FUNCTION) 2887 2888 this = ( 2889 self._parse_table(schema=True, parse_partition=True) 2890 if not is_function 2891 else self._parse_function() 2892 ) 2893 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2894 this.set("alias", self._parse_table_alias()) 2895 2896 returning = self._parse_returning() 2897 2898 return self.expression( 2899 exp.Insert, 2900 comments=comments, 2901 hint=hint, 2902 is_function=is_function, 2903 this=this, 2904 stored=self._match_text_seq("STORED") and self._parse_stored(), 2905 by_name=self._match_text_seq("BY", "NAME"), 2906 exists=self._parse_exists(), 2907 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2908 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2909 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2910 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2911 conflict=self._parse_on_conflict(), 2912 returning=returning or self._parse_returning(), 2913 overwrite=overwrite, 2914 alternative=alternative, 2915 ignore=ignore, 2916 source=self._match(TokenType.TABLE) and self._parse_table(), 2917 ) 2918 2919 def _parse_kill(self) -> exp.Kill: 2920 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2921 2922 return self.expression( 2923 exp.Kill, 2924 this=self._parse_primary(), 2925 kind=kind, 2926 ) 2927 2928 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2929 conflict = self._match_text_seq("ON", "CONFLICT") 2930 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2931 2932 if not conflict and not duplicate: 2933 return None 2934 2935 conflict_keys = None 2936 constraint = None 2937 2938 if conflict: 2939 if self._match_text_seq("ON", "CONSTRAINT"): 2940 constraint = self._parse_id_var() 2941 elif self._match(TokenType.L_PAREN): 2942 conflict_keys = self._parse_csv(self._parse_id_var) 2943 self._match_r_paren() 2944 2945 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2946 if self._prev.token_type == TokenType.UPDATE: 2947 self._match(TokenType.SET) 2948 expressions = self._parse_csv(self._parse_equality) 2949 else: 2950 expressions = None 2951 2952 return self.expression( 2953 exp.OnConflict, 2954 duplicate=duplicate, 2955 expressions=expressions, 2956 action=action, 2957 conflict_keys=conflict_keys, 2958 constraint=constraint, 2959 where=self._parse_where(), 2960 ) 2961 2962 def _parse_returning(self) -> t.Optional[exp.Returning]: 2963 if not self._match(TokenType.RETURNING): 2964 return None 2965 return self.expression( 2966 exp.Returning, 2967 expressions=self._parse_csv(self._parse_expression), 2968 into=self._match(TokenType.INTO) and self._parse_table_part(), 2969 ) 2970 2971 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2972 if not self._match(TokenType.FORMAT): 2973 return None 2974 return self._parse_row_format() 2975 2976 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2977 index = self._index 2978 with_ = with_ or self._match_text_seq("WITH") 2979 2980 if not self._match(TokenType.SERDE_PROPERTIES): 2981 self._retreat(index) 2982 return None 2983 return self.expression( 2984 exp.SerdeProperties, 2985 **{ # type: ignore 2986 "expressions": self._parse_wrapped_properties(), 2987 "with": with_, 2988 }, 2989 ) 2990 2991 def _parse_row_format( 2992 self, match_row: bool = False 2993 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2994 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2995 return None 2996 2997 if self._match_text_seq("SERDE"): 2998 this = self._parse_string() 2999 3000 serde_properties = self._parse_serde_properties() 3001 3002 return self.expression( 3003 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3004 ) 3005 3006 self._match_text_seq("DELIMITED") 3007 3008 kwargs = {} 3009 3010 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3011 kwargs["fields"] = self._parse_string() 3012 if self._match_text_seq("ESCAPED", "BY"): 3013 kwargs["escaped"] = self._parse_string() 3014 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3015 kwargs["collection_items"] = self._parse_string() 3016 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3017 kwargs["map_keys"] = self._parse_string() 3018 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3019 kwargs["lines"] = self._parse_string() 3020 if self._match_text_seq("NULL", "DEFINED", "AS"): 3021 kwargs["null"] = self._parse_string() 3022 3023 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3024 3025 def _parse_load(self) -> exp.LoadData | exp.Command: 3026 if self._match_text_seq("DATA"): 3027 local = self._match_text_seq("LOCAL") 3028 self._match_text_seq("INPATH") 3029 inpath = self._parse_string() 3030 overwrite = self._match(TokenType.OVERWRITE) 3031 self._match_pair(TokenType.INTO, TokenType.TABLE) 3032 3033 return self.expression( 3034 exp.LoadData, 3035 this=self._parse_table(schema=True), 3036 local=local, 3037 overwrite=overwrite, 3038 inpath=inpath, 3039 partition=self._parse_partition(), 3040 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3041 serde=self._match_text_seq("SERDE") and self._parse_string(), 3042 ) 3043 return self._parse_as_command(self._prev) 3044 3045 def _parse_delete(self) -> exp.Delete: 3046 # This handles MySQL's "Multiple-Table Syntax" 3047 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3048 tables = None 3049 if not self._match(TokenType.FROM, advance=False): 3050 tables = self._parse_csv(self._parse_table) or None 3051 3052 returning = self._parse_returning() 3053 3054 return self.expression( 3055 exp.Delete, 3056 tables=tables, 3057 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3058 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3059 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3060 where=self._parse_where(), 3061 returning=returning or self._parse_returning(), 3062 limit=self._parse_limit(), 3063 ) 3064 3065 def _parse_update(self) -> exp.Update: 3066 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3067 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3068 returning = self._parse_returning() 3069 return self.expression( 3070 exp.Update, 3071 **{ # type: ignore 3072 "this": this, 3073 "expressions": expressions, 3074 "from": self._parse_from(joins=True), 3075 "where": self._parse_where(), 3076 "returning": returning or self._parse_returning(), 3077 "order": self._parse_order(), 3078 "limit": self._parse_limit(), 3079 }, 3080 ) 3081 3082 def _parse_use(self) -> exp.Use: 3083 return self.expression( 3084 exp.Use, 3085 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3086 this=self._parse_table(schema=False), 3087 ) 3088 3089 def _parse_uncache(self) -> exp.Uncache: 3090 if not self._match(TokenType.TABLE): 3091 self.raise_error("Expecting TABLE after UNCACHE") 3092 3093 return self.expression( 3094 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3095 ) 3096 3097 def _parse_cache(self) -> exp.Cache: 3098 lazy = self._match_text_seq("LAZY") 3099 self._match(TokenType.TABLE) 3100 table = self._parse_table(schema=True) 3101 3102 options = [] 3103 if self._match_text_seq("OPTIONS"): 3104 self._match_l_paren() 3105 k = self._parse_string() 3106 self._match(TokenType.EQ) 3107 v = self._parse_string() 3108 options = [k, v] 3109 self._match_r_paren() 3110 3111 self._match(TokenType.ALIAS) 3112 return self.expression( 3113 exp.Cache, 3114 this=table, 3115 lazy=lazy, 3116 options=options, 3117 expression=self._parse_select(nested=True), 3118 ) 3119 3120 def _parse_partition(self) -> t.Optional[exp.Partition]: 3121 if not self._match_texts(self.PARTITION_KEYWORDS): 3122 return None 3123 3124 return self.expression( 3125 exp.Partition, 3126 subpartition=self._prev.text.upper() == "SUBPARTITION", 3127 expressions=self._parse_wrapped_csv(self._parse_assignment), 3128 ) 3129 3130 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3131 def _parse_value_expression() -> t.Optional[exp.Expression]: 3132 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3133 return exp.var(self._prev.text.upper()) 3134 return self._parse_expression() 3135 3136 if self._match(TokenType.L_PAREN): 3137 expressions = self._parse_csv(_parse_value_expression) 3138 self._match_r_paren() 3139 return self.expression(exp.Tuple, expressions=expressions) 3140 3141 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3142 expression = self._parse_expression() 3143 if expression: 3144 return self.expression(exp.Tuple, expressions=[expression]) 3145 return None 3146 3147 def _parse_projections(self) -> t.List[exp.Expression]: 3148 return self._parse_expressions() 3149 3150 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3151 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3152 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3153 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3154 ) 3155 elif self._match(TokenType.FROM): 3156 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3157 # Support parentheses for duckdb FROM-first syntax 3158 select = self._parse_select() 3159 if select: 3160 select.set("from", from_) 3161 this = select 3162 else: 3163 this = exp.select("*").from_(t.cast(exp.From, from_)) 3164 else: 3165 this = ( 3166 self._parse_table(consume_pipe=True) 3167 if table 3168 else self._parse_select(nested=True, parse_set_operation=False) 3169 ) 3170 3171 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3172 # in case a modifier (e.g. join) is following 3173 if table and isinstance(this, exp.Values) and this.alias: 3174 alias = this.args["alias"].pop() 3175 this = exp.Table(this=this, alias=alias) 3176 3177 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3178 3179 return this 3180 3181 def _parse_select( 3182 self, 3183 nested: bool = False, 3184 table: bool = False, 3185 parse_subquery_alias: bool = True, 3186 parse_set_operation: bool = True, 3187 consume_pipe: bool = True, 3188 ) -> t.Optional[exp.Expression]: 3189 query = self._parse_select_query( 3190 nested=nested, 3191 table=table, 3192 parse_subquery_alias=parse_subquery_alias, 3193 parse_set_operation=parse_set_operation, 3194 ) 3195 3196 if ( 3197 consume_pipe 3198 and self._match(TokenType.PIPE_GT, advance=False) 3199 and isinstance(query, exp.Query) 3200 ): 3201 query = self._parse_pipe_syntax_query(query) 3202 query = query.subquery(copy=False) if query and table else query 3203 3204 return query 3205 3206 def _parse_select_query( 3207 self, 3208 nested: bool = False, 3209 table: bool = False, 3210 parse_subquery_alias: bool = True, 3211 parse_set_operation: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 cte = self._parse_with() 3214 3215 if cte: 3216 this = self._parse_statement() 3217 3218 if not this: 3219 self.raise_error("Failed to parse any statement following CTE") 3220 return cte 3221 3222 if "with" in this.arg_types: 3223 this.set("with", cte) 3224 else: 3225 self.raise_error(f"{this.key} does not support CTE") 3226 this = cte 3227 3228 return this 3229 3230 # duckdb supports leading with FROM x 3231 from_ = ( 3232 self._parse_from(consume_pipe=True) 3233 if self._match(TokenType.FROM, advance=False) 3234 else None 3235 ) 3236 3237 if self._match(TokenType.SELECT): 3238 comments = self._prev_comments 3239 3240 hint = self._parse_hint() 3241 3242 if self._next and not self._next.token_type == TokenType.DOT: 3243 all_ = self._match(TokenType.ALL) 3244 distinct = self._match_set(self.DISTINCT_TOKENS) 3245 else: 3246 all_, distinct = None, None 3247 3248 kind = ( 3249 self._match(TokenType.ALIAS) 3250 and self._match_texts(("STRUCT", "VALUE")) 3251 and self._prev.text.upper() 3252 ) 3253 3254 if distinct: 3255 distinct = self.expression( 3256 exp.Distinct, 3257 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3258 ) 3259 3260 if all_ and distinct: 3261 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3262 3263 operation_modifiers = [] 3264 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3265 operation_modifiers.append(exp.var(self._prev.text.upper())) 3266 3267 limit = self._parse_limit(top=True) 3268 projections = self._parse_projections() 3269 3270 this = self.expression( 3271 exp.Select, 3272 kind=kind, 3273 hint=hint, 3274 distinct=distinct, 3275 expressions=projections, 3276 limit=limit, 3277 operation_modifiers=operation_modifiers or None, 3278 ) 3279 this.comments = comments 3280 3281 into = self._parse_into() 3282 if into: 3283 this.set("into", into) 3284 3285 if not from_: 3286 from_ = self._parse_from() 3287 3288 if from_: 3289 this.set("from", from_) 3290 3291 this = self._parse_query_modifiers(this) 3292 elif (table or nested) and self._match(TokenType.L_PAREN): 3293 this = self._parse_wrapped_select(table=table) 3294 3295 # We return early here so that the UNION isn't attached to the subquery by the 3296 # following call to _parse_set_operations, but instead becomes the parent node 3297 self._match_r_paren() 3298 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3299 elif self._match(TokenType.VALUES, advance=False): 3300 this = self._parse_derived_table_values() 3301 elif from_: 3302 this = exp.select("*").from_(from_.this, copy=False) 3303 elif self._match(TokenType.SUMMARIZE): 3304 table = self._match(TokenType.TABLE) 3305 this = self._parse_select() or self._parse_string() or self._parse_table() 3306 return self.expression(exp.Summarize, this=this, table=table) 3307 elif self._match(TokenType.DESCRIBE): 3308 this = self._parse_describe() 3309 elif self._match_text_seq("STREAM"): 3310 this = self._parse_function() 3311 if this: 3312 this = self.expression(exp.Stream, this=this) 3313 else: 3314 self._retreat(self._index - 1) 3315 else: 3316 this = None 3317 3318 return self._parse_set_operations(this) if parse_set_operation else this 3319 3320 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3321 self._match_text_seq("SEARCH") 3322 3323 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3324 3325 if not kind: 3326 return None 3327 3328 self._match_text_seq("FIRST", "BY") 3329 3330 return self.expression( 3331 exp.RecursiveWithSearch, 3332 kind=kind, 3333 this=self._parse_id_var(), 3334 expression=self._match_text_seq("SET") and self._parse_id_var(), 3335 using=self._match_text_seq("USING") and self._parse_id_var(), 3336 ) 3337 3338 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3339 if not skip_with_token and not self._match(TokenType.WITH): 3340 return None 3341 3342 comments = self._prev_comments 3343 recursive = self._match(TokenType.RECURSIVE) 3344 3345 last_comments = None 3346 expressions = [] 3347 while True: 3348 cte = self._parse_cte() 3349 if isinstance(cte, exp.CTE): 3350 expressions.append(cte) 3351 if last_comments: 3352 cte.add_comments(last_comments) 3353 3354 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3355 break 3356 else: 3357 self._match(TokenType.WITH) 3358 3359 last_comments = self._prev_comments 3360 3361 return self.expression( 3362 exp.With, 3363 comments=comments, 3364 expressions=expressions, 3365 recursive=recursive, 3366 search=self._parse_recursive_with_search(), 3367 ) 3368 3369 def _parse_cte(self) -> t.Optional[exp.CTE]: 3370 index = self._index 3371 3372 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3373 if not alias or not alias.this: 3374 self.raise_error("Expected CTE to have alias") 3375 3376 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3377 self._retreat(index) 3378 return None 3379 3380 comments = self._prev_comments 3381 3382 if self._match_text_seq("NOT", "MATERIALIZED"): 3383 materialized = False 3384 elif self._match_text_seq("MATERIALIZED"): 3385 materialized = True 3386 else: 3387 materialized = None 3388 3389 cte = self.expression( 3390 exp.CTE, 3391 this=self._parse_wrapped(self._parse_statement), 3392 alias=alias, 3393 materialized=materialized, 3394 comments=comments, 3395 ) 3396 3397 if isinstance(cte.this, exp.Values): 3398 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3399 3400 return cte 3401 3402 def _parse_table_alias( 3403 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3404 ) -> t.Optional[exp.TableAlias]: 3405 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3406 # so this section tries to parse the clause version and if it fails, it treats the token 3407 # as an identifier (alias) 3408 if self._can_parse_limit_or_offset(): 3409 return None 3410 3411 any_token = self._match(TokenType.ALIAS) 3412 alias = ( 3413 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3414 or self._parse_string_as_identifier() 3415 ) 3416 3417 index = self._index 3418 if self._match(TokenType.L_PAREN): 3419 columns = self._parse_csv(self._parse_function_parameter) 3420 self._match_r_paren() if columns else self._retreat(index) 3421 else: 3422 columns = None 3423 3424 if not alias and not columns: 3425 return None 3426 3427 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3428 3429 # We bubble up comments from the Identifier to the TableAlias 3430 if isinstance(alias, exp.Identifier): 3431 table_alias.add_comments(alias.pop_comments()) 3432 3433 return table_alias 3434 3435 def _parse_subquery( 3436 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3437 ) -> t.Optional[exp.Subquery]: 3438 if not this: 3439 return None 3440 3441 return self.expression( 3442 exp.Subquery, 3443 this=this, 3444 pivots=self._parse_pivots(), 3445 alias=self._parse_table_alias() if parse_alias else None, 3446 sample=self._parse_table_sample(), 3447 ) 3448 3449 def _implicit_unnests_to_explicit(self, this: E) -> E: 3450 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3451 3452 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3453 for i, join in enumerate(this.args.get("joins") or []): 3454 table = join.this 3455 normalized_table = table.copy() 3456 normalized_table.meta["maybe_column"] = True 3457 normalized_table = _norm(normalized_table, dialect=self.dialect) 3458 3459 if isinstance(table, exp.Table) and not join.args.get("on"): 3460 if normalized_table.parts[0].name in refs: 3461 table_as_column = table.to_column() 3462 unnest = exp.Unnest(expressions=[table_as_column]) 3463 3464 # Table.to_column creates a parent Alias node that we want to convert to 3465 # a TableAlias and attach to the Unnest, so it matches the parser's output 3466 if isinstance(table.args.get("alias"), exp.TableAlias): 3467 table_as_column.replace(table_as_column.this) 3468 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3469 3470 table.replace(unnest) 3471 3472 refs.add(normalized_table.alias_or_name) 3473 3474 return this 3475 3476 def _parse_query_modifiers( 3477 self, this: t.Optional[exp.Expression] 3478 ) -> t.Optional[exp.Expression]: 3479 if isinstance(this, self.MODIFIABLES): 3480 for join in self._parse_joins(): 3481 this.append("joins", join) 3482 for lateral in iter(self._parse_lateral, None): 3483 this.append("laterals", lateral) 3484 3485 while True: 3486 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3487 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3488 key, expression = parser(self) 3489 3490 if expression: 3491 this.set(key, expression) 3492 if key == "limit": 3493 offset = expression.args.pop("offset", None) 3494 3495 if offset: 3496 offset = exp.Offset(expression=offset) 3497 this.set("offset", offset) 3498 3499 limit_by_expressions = expression.expressions 3500 expression.set("expressions", None) 3501 offset.set("expressions", limit_by_expressions) 3502 continue 3503 break 3504 3505 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3506 this = self._implicit_unnests_to_explicit(this) 3507 3508 return this 3509 3510 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3511 start = self._curr 3512 while self._curr: 3513 self._advance() 3514 3515 end = self._tokens[self._index - 1] 3516 return exp.Hint(expressions=[self._find_sql(start, end)]) 3517 3518 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3519 return self._parse_function_call() 3520 3521 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3522 start_index = self._index 3523 should_fallback_to_string = False 3524 3525 hints = [] 3526 try: 3527 for hint in iter( 3528 lambda: self._parse_csv( 3529 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3530 ), 3531 [], 3532 ): 3533 hints.extend(hint) 3534 except ParseError: 3535 should_fallback_to_string = True 3536 3537 if should_fallback_to_string or self._curr: 3538 self._retreat(start_index) 3539 return self._parse_hint_fallback_to_string() 3540 3541 return self.expression(exp.Hint, expressions=hints) 3542 3543 def _parse_hint(self) -> t.Optional[exp.Hint]: 3544 if self._match(TokenType.HINT) and self._prev_comments: 3545 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3546 3547 return None 3548 3549 def _parse_into(self) -> t.Optional[exp.Into]: 3550 if not self._match(TokenType.INTO): 3551 return None 3552 3553 temp = self._match(TokenType.TEMPORARY) 3554 unlogged = self._match_text_seq("UNLOGGED") 3555 self._match(TokenType.TABLE) 3556 3557 return self.expression( 3558 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3559 ) 3560 3561 def _parse_from( 3562 self, 3563 joins: bool = False, 3564 skip_from_token: bool = False, 3565 consume_pipe: bool = False, 3566 ) -> t.Optional[exp.From]: 3567 if not skip_from_token and not self._match(TokenType.FROM): 3568 return None 3569 3570 return self.expression( 3571 exp.From, 3572 comments=self._prev_comments, 3573 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3574 ) 3575 3576 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3577 return self.expression( 3578 exp.MatchRecognizeMeasure, 3579 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3580 this=self._parse_expression(), 3581 ) 3582 3583 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3584 if not self._match(TokenType.MATCH_RECOGNIZE): 3585 return None 3586 3587 self._match_l_paren() 3588 3589 partition = self._parse_partition_by() 3590 order = self._parse_order() 3591 3592 measures = ( 3593 self._parse_csv(self._parse_match_recognize_measure) 3594 if self._match_text_seq("MEASURES") 3595 else None 3596 ) 3597 3598 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3599 rows = exp.var("ONE ROW PER MATCH") 3600 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3601 text = "ALL ROWS PER MATCH" 3602 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3603 text += " SHOW EMPTY MATCHES" 3604 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3605 text += " OMIT EMPTY MATCHES" 3606 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3607 text += " WITH UNMATCHED ROWS" 3608 rows = exp.var(text) 3609 else: 3610 rows = None 3611 3612 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3613 text = "AFTER MATCH SKIP" 3614 if self._match_text_seq("PAST", "LAST", "ROW"): 3615 text += " PAST LAST ROW" 3616 elif self._match_text_seq("TO", "NEXT", "ROW"): 3617 text += " TO NEXT ROW" 3618 elif self._match_text_seq("TO", "FIRST"): 3619 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3620 elif self._match_text_seq("TO", "LAST"): 3621 text += f" TO LAST {self._advance_any().text}" # type: ignore 3622 after = exp.var(text) 3623 else: 3624 after = None 3625 3626 if self._match_text_seq("PATTERN"): 3627 self._match_l_paren() 3628 3629 if not self._curr: 3630 self.raise_error("Expecting )", self._curr) 3631 3632 paren = 1 3633 start = self._curr 3634 3635 while self._curr and paren > 0: 3636 if self._curr.token_type == TokenType.L_PAREN: 3637 paren += 1 3638 if self._curr.token_type == TokenType.R_PAREN: 3639 paren -= 1 3640 3641 end = self._prev 3642 self._advance() 3643 3644 if paren > 0: 3645 self.raise_error("Expecting )", self._curr) 3646 3647 pattern = exp.var(self._find_sql(start, end)) 3648 else: 3649 pattern = None 3650 3651 define = ( 3652 self._parse_csv(self._parse_name_as_expression) 3653 if self._match_text_seq("DEFINE") 3654 else None 3655 ) 3656 3657 self._match_r_paren() 3658 3659 return self.expression( 3660 exp.MatchRecognize, 3661 partition_by=partition, 3662 order=order, 3663 measures=measures, 3664 rows=rows, 3665 after=after, 3666 pattern=pattern, 3667 define=define, 3668 alias=self._parse_table_alias(), 3669 ) 3670 3671 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3672 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3673 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3674 cross_apply = False 3675 3676 if cross_apply is not None: 3677 this = self._parse_select(table=True) 3678 view = None 3679 outer = None 3680 elif self._match(TokenType.LATERAL): 3681 this = self._parse_select(table=True) 3682 view = self._match(TokenType.VIEW) 3683 outer = self._match(TokenType.OUTER) 3684 else: 3685 return None 3686 3687 if not this: 3688 this = ( 3689 self._parse_unnest() 3690 or self._parse_function() 3691 or self._parse_id_var(any_token=False) 3692 ) 3693 3694 while self._match(TokenType.DOT): 3695 this = exp.Dot( 3696 this=this, 3697 expression=self._parse_function() or self._parse_id_var(any_token=False), 3698 ) 3699 3700 ordinality: t.Optional[bool] = None 3701 3702 if view: 3703 table = self._parse_id_var(any_token=False) 3704 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3705 table_alias: t.Optional[exp.TableAlias] = self.expression( 3706 exp.TableAlias, this=table, columns=columns 3707 ) 3708 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3709 # We move the alias from the lateral's child node to the lateral itself 3710 table_alias = this.args["alias"].pop() 3711 else: 3712 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3713 table_alias = self._parse_table_alias() 3714 3715 return self.expression( 3716 exp.Lateral, 3717 this=this, 3718 view=view, 3719 outer=outer, 3720 alias=table_alias, 3721 cross_apply=cross_apply, 3722 ordinality=ordinality, 3723 ) 3724 3725 def _parse_join_parts( 3726 self, 3727 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3728 return ( 3729 self._match_set(self.JOIN_METHODS) and self._prev, 3730 self._match_set(self.JOIN_SIDES) and self._prev, 3731 self._match_set(self.JOIN_KINDS) and self._prev, 3732 ) 3733 3734 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3735 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3736 this = self._parse_column() 3737 if isinstance(this, exp.Column): 3738 return this.this 3739 return this 3740 3741 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3742 3743 def _parse_join( 3744 self, skip_join_token: bool = False, parse_bracket: bool = False 3745 ) -> t.Optional[exp.Join]: 3746 if self._match(TokenType.COMMA): 3747 table = self._try_parse(self._parse_table) 3748 cross_join = self.expression(exp.Join, this=table) if table else None 3749 3750 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3751 cross_join.set("kind", "CROSS") 3752 3753 return cross_join 3754 3755 index = self._index 3756 method, side, kind = self._parse_join_parts() 3757 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3758 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3759 3760 if not skip_join_token and not join: 3761 self._retreat(index) 3762 kind = None 3763 method = None 3764 side = None 3765 3766 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3767 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3768 3769 if not skip_join_token and not join and not outer_apply and not cross_apply: 3770 return None 3771 3772 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3773 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3774 kwargs["expressions"] = self._parse_csv( 3775 lambda: self._parse_table(parse_bracket=parse_bracket) 3776 ) 3777 3778 if method: 3779 kwargs["method"] = method.text 3780 if side: 3781 kwargs["side"] = side.text 3782 if kind: 3783 kwargs["kind"] = kind.text 3784 if hint: 3785 kwargs["hint"] = hint 3786 3787 if self._match(TokenType.MATCH_CONDITION): 3788 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3789 3790 if self._match(TokenType.ON): 3791 kwargs["on"] = self._parse_assignment() 3792 elif self._match(TokenType.USING): 3793 kwargs["using"] = self._parse_using_identifiers() 3794 elif ( 3795 not (outer_apply or cross_apply) 3796 and not isinstance(kwargs["this"], exp.Unnest) 3797 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3798 ): 3799 index = self._index 3800 joins: t.Optional[list] = list(self._parse_joins()) 3801 3802 if joins and self._match(TokenType.ON): 3803 kwargs["on"] = self._parse_assignment() 3804 elif joins and self._match(TokenType.USING): 3805 kwargs["using"] = self._parse_using_identifiers() 3806 else: 3807 joins = None 3808 self._retreat(index) 3809 3810 kwargs["this"].set("joins", joins if joins else None) 3811 3812 kwargs["pivots"] = self._parse_pivots() 3813 3814 comments = [c for token in (method, side, kind) if token for c in token.comments] 3815 return self.expression(exp.Join, comments=comments, **kwargs) 3816 3817 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3818 this = self._parse_assignment() 3819 3820 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3821 return this 3822 3823 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3824 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3825 3826 return this 3827 3828 def _parse_index_params(self) -> exp.IndexParameters: 3829 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3830 3831 if self._match(TokenType.L_PAREN, advance=False): 3832 columns = self._parse_wrapped_csv(self._parse_with_operator) 3833 else: 3834 columns = None 3835 3836 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3837 partition_by = self._parse_partition_by() 3838 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3839 tablespace = ( 3840 self._parse_var(any_token=True) 3841 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3842 else None 3843 ) 3844 where = self._parse_where() 3845 3846 on = self._parse_field() if self._match(TokenType.ON) else None 3847 3848 return self.expression( 3849 exp.IndexParameters, 3850 using=using, 3851 columns=columns, 3852 include=include, 3853 partition_by=partition_by, 3854 where=where, 3855 with_storage=with_storage, 3856 tablespace=tablespace, 3857 on=on, 3858 ) 3859 3860 def _parse_index( 3861 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3862 ) -> t.Optional[exp.Index]: 3863 if index or anonymous: 3864 unique = None 3865 primary = None 3866 amp = None 3867 3868 self._match(TokenType.ON) 3869 self._match(TokenType.TABLE) # hive 3870 table = self._parse_table_parts(schema=True) 3871 else: 3872 unique = self._match(TokenType.UNIQUE) 3873 primary = self._match_text_seq("PRIMARY") 3874 amp = self._match_text_seq("AMP") 3875 3876 if not self._match(TokenType.INDEX): 3877 return None 3878 3879 index = self._parse_id_var() 3880 table = None 3881 3882 params = self._parse_index_params() 3883 3884 return self.expression( 3885 exp.Index, 3886 this=index, 3887 table=table, 3888 unique=unique, 3889 primary=primary, 3890 amp=amp, 3891 params=params, 3892 ) 3893 3894 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3895 hints: t.List[exp.Expression] = [] 3896 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3897 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3898 hints.append( 3899 self.expression( 3900 exp.WithTableHint, 3901 expressions=self._parse_csv( 3902 lambda: self._parse_function() or self._parse_var(any_token=True) 3903 ), 3904 ) 3905 ) 3906 self._match_r_paren() 3907 else: 3908 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3909 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3910 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3911 3912 self._match_set((TokenType.INDEX, TokenType.KEY)) 3913 if self._match(TokenType.FOR): 3914 hint.set("target", self._advance_any() and self._prev.text.upper()) 3915 3916 hint.set("expressions", self._parse_wrapped_id_vars()) 3917 hints.append(hint) 3918 3919 return hints or None 3920 3921 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3922 return ( 3923 (not schema and self._parse_function(optional_parens=False)) 3924 or self._parse_id_var(any_token=False) 3925 or self._parse_string_as_identifier() 3926 or self._parse_placeholder() 3927 ) 3928 3929 def _parse_table_parts( 3930 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3931 ) -> exp.Table: 3932 catalog = None 3933 db = None 3934 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3935 3936 while self._match(TokenType.DOT): 3937 if catalog: 3938 # This allows nesting the table in arbitrarily many dot expressions if needed 3939 table = self.expression( 3940 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3941 ) 3942 else: 3943 catalog = db 3944 db = table 3945 # "" used for tsql FROM a..b case 3946 table = self._parse_table_part(schema=schema) or "" 3947 3948 if ( 3949 wildcard 3950 and self._is_connected() 3951 and (isinstance(table, exp.Identifier) or not table) 3952 and self._match(TokenType.STAR) 3953 ): 3954 if isinstance(table, exp.Identifier): 3955 table.args["this"] += "*" 3956 else: 3957 table = exp.Identifier(this="*") 3958 3959 # We bubble up comments from the Identifier to the Table 3960 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3961 3962 if is_db_reference: 3963 catalog = db 3964 db = table 3965 table = None 3966 3967 if not table and not is_db_reference: 3968 self.raise_error(f"Expected table name but got {self._curr}") 3969 if not db and is_db_reference: 3970 self.raise_error(f"Expected database name but got {self._curr}") 3971 3972 table = self.expression( 3973 exp.Table, 3974 comments=comments, 3975 this=table, 3976 db=db, 3977 catalog=catalog, 3978 ) 3979 3980 changes = self._parse_changes() 3981 if changes: 3982 table.set("changes", changes) 3983 3984 at_before = self._parse_historical_data() 3985 if at_before: 3986 table.set("when", at_before) 3987 3988 pivots = self._parse_pivots() 3989 if pivots: 3990 table.set("pivots", pivots) 3991 3992 return table 3993 3994 def _parse_table( 3995 self, 3996 schema: bool = False, 3997 joins: bool = False, 3998 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3999 parse_bracket: bool = False, 4000 is_db_reference: bool = False, 4001 parse_partition: bool = False, 4002 consume_pipe: bool = False, 4003 ) -> t.Optional[exp.Expression]: 4004 lateral = self._parse_lateral() 4005 if lateral: 4006 return lateral 4007 4008 unnest = self._parse_unnest() 4009 if unnest: 4010 return unnest 4011 4012 values = self._parse_derived_table_values() 4013 if values: 4014 return values 4015 4016 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4017 if subquery: 4018 if not subquery.args.get("pivots"): 4019 subquery.set("pivots", self._parse_pivots()) 4020 return subquery 4021 4022 bracket = parse_bracket and self._parse_bracket(None) 4023 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4024 4025 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4026 self._parse_table 4027 ) 4028 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4029 4030 only = self._match(TokenType.ONLY) 4031 4032 this = t.cast( 4033 exp.Expression, 4034 bracket 4035 or rows_from 4036 or self._parse_bracket( 4037 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4038 ), 4039 ) 4040 4041 if only: 4042 this.set("only", only) 4043 4044 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4045 self._match_text_seq("*") 4046 4047 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4048 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4049 this.set("partition", self._parse_partition()) 4050 4051 if schema: 4052 return self._parse_schema(this=this) 4053 4054 version = self._parse_version() 4055 4056 if version: 4057 this.set("version", version) 4058 4059 if self.dialect.ALIAS_POST_TABLESAMPLE: 4060 this.set("sample", self._parse_table_sample()) 4061 4062 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4063 if alias: 4064 this.set("alias", alias) 4065 4066 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4067 return self.expression( 4068 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4069 ) 4070 4071 this.set("hints", self._parse_table_hints()) 4072 4073 if not this.args.get("pivots"): 4074 this.set("pivots", self._parse_pivots()) 4075 4076 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4077 this.set("sample", self._parse_table_sample()) 4078 4079 if joins: 4080 for join in self._parse_joins(): 4081 this.append("joins", join) 4082 4083 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4084 this.set("ordinality", True) 4085 this.set("alias", self._parse_table_alias()) 4086 4087 return this 4088 4089 def _parse_version(self) -> t.Optional[exp.Version]: 4090 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4091 this = "TIMESTAMP" 4092 elif self._match(TokenType.VERSION_SNAPSHOT): 4093 this = "VERSION" 4094 else: 4095 return None 4096 4097 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4098 kind = self._prev.text.upper() 4099 start = self._parse_bitwise() 4100 self._match_texts(("TO", "AND")) 4101 end = self._parse_bitwise() 4102 expression: t.Optional[exp.Expression] = self.expression( 4103 exp.Tuple, expressions=[start, end] 4104 ) 4105 elif self._match_text_seq("CONTAINED", "IN"): 4106 kind = "CONTAINED IN" 4107 expression = self.expression( 4108 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4109 ) 4110 elif self._match(TokenType.ALL): 4111 kind = "ALL" 4112 expression = None 4113 else: 4114 self._match_text_seq("AS", "OF") 4115 kind = "AS OF" 4116 expression = self._parse_type() 4117 4118 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4119 4120 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4121 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4122 index = self._index 4123 historical_data = None 4124 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4125 this = self._prev.text.upper() 4126 kind = ( 4127 self._match(TokenType.L_PAREN) 4128 and self._match_texts(self.HISTORICAL_DATA_KIND) 4129 and self._prev.text.upper() 4130 ) 4131 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4132 4133 if expression: 4134 self._match_r_paren() 4135 historical_data = self.expression( 4136 exp.HistoricalData, this=this, kind=kind, expression=expression 4137 ) 4138 else: 4139 self._retreat(index) 4140 4141 return historical_data 4142 4143 def _parse_changes(self) -> t.Optional[exp.Changes]: 4144 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4145 return None 4146 4147 information = self._parse_var(any_token=True) 4148 self._match_r_paren() 4149 4150 return self.expression( 4151 exp.Changes, 4152 information=information, 4153 at_before=self._parse_historical_data(), 4154 end=self._parse_historical_data(), 4155 ) 4156 4157 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4158 if not self._match(TokenType.UNNEST): 4159 return None 4160 4161 expressions = self._parse_wrapped_csv(self._parse_equality) 4162 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4163 4164 alias = self._parse_table_alias() if with_alias else None 4165 4166 if alias: 4167 if self.dialect.UNNEST_COLUMN_ONLY: 4168 if alias.args.get("columns"): 4169 self.raise_error("Unexpected extra column alias in unnest.") 4170 4171 alias.set("columns", [alias.this]) 4172 alias.set("this", None) 4173 4174 columns = alias.args.get("columns") or [] 4175 if offset and len(expressions) < len(columns): 4176 offset = columns.pop() 4177 4178 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4179 self._match(TokenType.ALIAS) 4180 offset = self._parse_id_var( 4181 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4182 ) or exp.to_identifier("offset") 4183 4184 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4185 4186 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4187 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4188 if not is_derived and not ( 4189 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4190 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4191 ): 4192 return None 4193 4194 expressions = self._parse_csv(self._parse_value) 4195 alias = self._parse_table_alias() 4196 4197 if is_derived: 4198 self._match_r_paren() 4199 4200 return self.expression( 4201 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4202 ) 4203 4204 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4205 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4206 as_modifier and self._match_text_seq("USING", "SAMPLE") 4207 ): 4208 return None 4209 4210 bucket_numerator = None 4211 bucket_denominator = None 4212 bucket_field = None 4213 percent = None 4214 size = None 4215 seed = None 4216 4217 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4218 matched_l_paren = self._match(TokenType.L_PAREN) 4219 4220 if self.TABLESAMPLE_CSV: 4221 num = None 4222 expressions = self._parse_csv(self._parse_primary) 4223 else: 4224 expressions = None 4225 num = ( 4226 self._parse_factor() 4227 if self._match(TokenType.NUMBER, advance=False) 4228 else self._parse_primary() or self._parse_placeholder() 4229 ) 4230 4231 if self._match_text_seq("BUCKET"): 4232 bucket_numerator = self._parse_number() 4233 self._match_text_seq("OUT", "OF") 4234 bucket_denominator = bucket_denominator = self._parse_number() 4235 self._match(TokenType.ON) 4236 bucket_field = self._parse_field() 4237 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4238 percent = num 4239 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4240 size = num 4241 else: 4242 percent = num 4243 4244 if matched_l_paren: 4245 self._match_r_paren() 4246 4247 if self._match(TokenType.L_PAREN): 4248 method = self._parse_var(upper=True) 4249 seed = self._match(TokenType.COMMA) and self._parse_number() 4250 self._match_r_paren() 4251 elif self._match_texts(("SEED", "REPEATABLE")): 4252 seed = self._parse_wrapped(self._parse_number) 4253 4254 if not method and self.DEFAULT_SAMPLING_METHOD: 4255 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4256 4257 return self.expression( 4258 exp.TableSample, 4259 expressions=expressions, 4260 method=method, 4261 bucket_numerator=bucket_numerator, 4262 bucket_denominator=bucket_denominator, 4263 bucket_field=bucket_field, 4264 percent=percent, 4265 size=size, 4266 seed=seed, 4267 ) 4268 4269 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4270 return list(iter(self._parse_pivot, None)) or None 4271 4272 def _parse_joins(self) -> t.Iterator[exp.Join]: 4273 return iter(self._parse_join, None) 4274 4275 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4276 if not self._match(TokenType.INTO): 4277 return None 4278 4279 return self.expression( 4280 exp.UnpivotColumns, 4281 this=self._match_text_seq("NAME") and self._parse_column(), 4282 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4283 ) 4284 4285 # https://duckdb.org/docs/sql/statements/pivot 4286 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4287 def _parse_on() -> t.Optional[exp.Expression]: 4288 this = self._parse_bitwise() 4289 4290 if self._match(TokenType.IN): 4291 # PIVOT ... ON col IN (row_val1, row_val2) 4292 return self._parse_in(this) 4293 if self._match(TokenType.ALIAS, advance=False): 4294 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4295 return self._parse_alias(this) 4296 4297 return this 4298 4299 this = self._parse_table() 4300 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4301 into = self._parse_unpivot_columns() 4302 using = self._match(TokenType.USING) and self._parse_csv( 4303 lambda: self._parse_alias(self._parse_function()) 4304 ) 4305 group = self._parse_group() 4306 4307 return self.expression( 4308 exp.Pivot, 4309 this=this, 4310 expressions=expressions, 4311 using=using, 4312 group=group, 4313 unpivot=is_unpivot, 4314 into=into, 4315 ) 4316 4317 def _parse_pivot_in(self) -> exp.In: 4318 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4319 this = self._parse_select_or_expression() 4320 4321 self._match(TokenType.ALIAS) 4322 alias = self._parse_bitwise() 4323 if alias: 4324 if isinstance(alias, exp.Column) and not alias.db: 4325 alias = alias.this 4326 return self.expression(exp.PivotAlias, this=this, alias=alias) 4327 4328 return this 4329 4330 value = self._parse_column() 4331 4332 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4333 self.raise_error("Expecting IN (") 4334 4335 if self._match(TokenType.ANY): 4336 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4337 else: 4338 exprs = self._parse_csv(_parse_aliased_expression) 4339 4340 self._match_r_paren() 4341 return self.expression(exp.In, this=value, expressions=exprs) 4342 4343 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4344 index = self._index 4345 include_nulls = None 4346 4347 if self._match(TokenType.PIVOT): 4348 unpivot = False 4349 elif self._match(TokenType.UNPIVOT): 4350 unpivot = True 4351 4352 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4353 if self._match_text_seq("INCLUDE", "NULLS"): 4354 include_nulls = True 4355 elif self._match_text_seq("EXCLUDE", "NULLS"): 4356 include_nulls = False 4357 else: 4358 return None 4359 4360 expressions = [] 4361 4362 if not self._match(TokenType.L_PAREN): 4363 self._retreat(index) 4364 return None 4365 4366 if unpivot: 4367 expressions = self._parse_csv(self._parse_column) 4368 else: 4369 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4370 4371 if not expressions: 4372 self.raise_error("Failed to parse PIVOT's aggregation list") 4373 4374 if not self._match(TokenType.FOR): 4375 self.raise_error("Expecting FOR") 4376 4377 fields = [] 4378 while True: 4379 field = self._try_parse(self._parse_pivot_in) 4380 if not field: 4381 break 4382 fields.append(field) 4383 4384 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4385 self._parse_bitwise 4386 ) 4387 4388 group = self._parse_group() 4389 4390 self._match_r_paren() 4391 4392 pivot = self.expression( 4393 exp.Pivot, 4394 expressions=expressions, 4395 fields=fields, 4396 unpivot=unpivot, 4397 include_nulls=include_nulls, 4398 default_on_null=default_on_null, 4399 group=group, 4400 ) 4401 4402 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4403 pivot.set("alias", self._parse_table_alias()) 4404 4405 if not unpivot: 4406 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4407 4408 columns: t.List[exp.Expression] = [] 4409 all_fields = [] 4410 for pivot_field in pivot.fields: 4411 pivot_field_expressions = pivot_field.expressions 4412 4413 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4414 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4415 continue 4416 4417 all_fields.append( 4418 [ 4419 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4420 for fld in pivot_field_expressions 4421 ] 4422 ) 4423 4424 if all_fields: 4425 if names: 4426 all_fields.append(names) 4427 4428 # Generate all possible combinations of the pivot columns 4429 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4430 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4431 for fld_parts_tuple in itertools.product(*all_fields): 4432 fld_parts = list(fld_parts_tuple) 4433 4434 if names and self.PREFIXED_PIVOT_COLUMNS: 4435 # Move the "name" to the front of the list 4436 fld_parts.insert(0, fld_parts.pop(-1)) 4437 4438 columns.append(exp.to_identifier("_".join(fld_parts))) 4439 4440 pivot.set("columns", columns) 4441 4442 return pivot 4443 4444 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4445 return [agg.alias for agg in aggregations if agg.alias] 4446 4447 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4448 if not skip_where_token and not self._match(TokenType.PREWHERE): 4449 return None 4450 4451 return self.expression( 4452 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4453 ) 4454 4455 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4456 if not skip_where_token and not self._match(TokenType.WHERE): 4457 return None 4458 4459 return self.expression( 4460 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4461 ) 4462 4463 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4464 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4465 return None 4466 4467 elements: t.Dict[str, t.Any] = defaultdict(list) 4468 4469 if self._match(TokenType.ALL): 4470 elements["all"] = True 4471 elif self._match(TokenType.DISTINCT): 4472 elements["all"] = False 4473 4474 while True: 4475 index = self._index 4476 4477 elements["expressions"].extend( 4478 self._parse_csv( 4479 lambda: None 4480 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4481 else self._parse_assignment() 4482 ) 4483 ) 4484 4485 before_with_index = self._index 4486 with_prefix = self._match(TokenType.WITH) 4487 4488 if self._match(TokenType.ROLLUP): 4489 elements["rollup"].append( 4490 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4491 ) 4492 elif self._match(TokenType.CUBE): 4493 elements["cube"].append( 4494 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4495 ) 4496 elif self._match(TokenType.GROUPING_SETS): 4497 elements["grouping_sets"].append( 4498 self.expression( 4499 exp.GroupingSets, 4500 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4501 ) 4502 ) 4503 elif self._match_text_seq("TOTALS"): 4504 elements["totals"] = True # type: ignore 4505 4506 if before_with_index <= self._index <= before_with_index + 1: 4507 self._retreat(before_with_index) 4508 break 4509 4510 if index == self._index: 4511 break 4512 4513 return self.expression(exp.Group, **elements) # type: ignore 4514 4515 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4516 return self.expression( 4517 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4518 ) 4519 4520 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4521 if self._match(TokenType.L_PAREN): 4522 grouping_set = self._parse_csv(self._parse_column) 4523 self._match_r_paren() 4524 return self.expression(exp.Tuple, expressions=grouping_set) 4525 4526 return self._parse_column() 4527 4528 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4529 if not skip_having_token and not self._match(TokenType.HAVING): 4530 return None 4531 return self.expression(exp.Having, this=self._parse_assignment()) 4532 4533 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4534 if not self._match(TokenType.QUALIFY): 4535 return None 4536 return self.expression(exp.Qualify, this=self._parse_assignment()) 4537 4538 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4539 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4540 exp.Prior, this=self._parse_bitwise() 4541 ) 4542 connect = self._parse_assignment() 4543 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4544 return connect 4545 4546 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4547 if skip_start_token: 4548 start = None 4549 elif self._match(TokenType.START_WITH): 4550 start = self._parse_assignment() 4551 else: 4552 return None 4553 4554 self._match(TokenType.CONNECT_BY) 4555 nocycle = self._match_text_seq("NOCYCLE") 4556 connect = self._parse_connect_with_prior() 4557 4558 if not start and self._match(TokenType.START_WITH): 4559 start = self._parse_assignment() 4560 4561 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4562 4563 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4564 this = self._parse_id_var(any_token=True) 4565 if self._match(TokenType.ALIAS): 4566 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4567 return this 4568 4569 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4570 if self._match_text_seq("INTERPOLATE"): 4571 return self._parse_wrapped_csv(self._parse_name_as_expression) 4572 return None 4573 4574 def _parse_order( 4575 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4576 ) -> t.Optional[exp.Expression]: 4577 siblings = None 4578 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4579 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4580 return this 4581 4582 siblings = True 4583 4584 return self.expression( 4585 exp.Order, 4586 this=this, 4587 expressions=self._parse_csv(self._parse_ordered), 4588 siblings=siblings, 4589 ) 4590 4591 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4592 if not self._match(token): 4593 return None 4594 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4595 4596 def _parse_ordered( 4597 self, parse_method: t.Optional[t.Callable] = None 4598 ) -> t.Optional[exp.Ordered]: 4599 this = parse_method() if parse_method else self._parse_assignment() 4600 if not this: 4601 return None 4602 4603 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4604 this = exp.var("ALL") 4605 4606 asc = self._match(TokenType.ASC) 4607 desc = self._match(TokenType.DESC) or (asc and False) 4608 4609 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4610 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4611 4612 nulls_first = is_nulls_first or False 4613 explicitly_null_ordered = is_nulls_first or is_nulls_last 4614 4615 if ( 4616 not explicitly_null_ordered 4617 and ( 4618 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4619 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4620 ) 4621 and self.dialect.NULL_ORDERING != "nulls_are_last" 4622 ): 4623 nulls_first = True 4624 4625 if self._match_text_seq("WITH", "FILL"): 4626 with_fill = self.expression( 4627 exp.WithFill, 4628 **{ # type: ignore 4629 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4630 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4631 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4632 "interpolate": self._parse_interpolate(), 4633 }, 4634 ) 4635 else: 4636 with_fill = None 4637 4638 return self.expression( 4639 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4640 ) 4641 4642 def _parse_limit_options(self) -> exp.LimitOptions: 4643 percent = self._match(TokenType.PERCENT) 4644 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4645 self._match_text_seq("ONLY") 4646 with_ties = self._match_text_seq("WITH", "TIES") 4647 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4648 4649 def _parse_limit( 4650 self, 4651 this: t.Optional[exp.Expression] = None, 4652 top: bool = False, 4653 skip_limit_token: bool = False, 4654 ) -> t.Optional[exp.Expression]: 4655 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4656 comments = self._prev_comments 4657 if top: 4658 limit_paren = self._match(TokenType.L_PAREN) 4659 expression = self._parse_term() if limit_paren else self._parse_number() 4660 4661 if limit_paren: 4662 self._match_r_paren() 4663 4664 limit_options = self._parse_limit_options() 4665 else: 4666 limit_options = None 4667 expression = self._parse_term() 4668 4669 if self._match(TokenType.COMMA): 4670 offset = expression 4671 expression = self._parse_term() 4672 else: 4673 offset = None 4674 4675 limit_exp = self.expression( 4676 exp.Limit, 4677 this=this, 4678 expression=expression, 4679 offset=offset, 4680 comments=comments, 4681 limit_options=limit_options, 4682 expressions=self._parse_limit_by(), 4683 ) 4684 4685 return limit_exp 4686 4687 if self._match(TokenType.FETCH): 4688 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4689 direction = self._prev.text.upper() if direction else "FIRST" 4690 4691 count = self._parse_field(tokens=self.FETCH_TOKENS) 4692 4693 return self.expression( 4694 exp.Fetch, 4695 direction=direction, 4696 count=count, 4697 limit_options=self._parse_limit_options(), 4698 ) 4699 4700 return this 4701 4702 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4703 if not self._match(TokenType.OFFSET): 4704 return this 4705 4706 count = self._parse_term() 4707 self._match_set((TokenType.ROW, TokenType.ROWS)) 4708 4709 return self.expression( 4710 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4711 ) 4712 4713 def _can_parse_limit_or_offset(self) -> bool: 4714 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4715 return False 4716 4717 index = self._index 4718 result = bool( 4719 self._try_parse(self._parse_limit, retreat=True) 4720 or self._try_parse(self._parse_offset, retreat=True) 4721 ) 4722 self._retreat(index) 4723 return result 4724 4725 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4726 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4727 4728 def _parse_locks(self) -> t.List[exp.Lock]: 4729 locks = [] 4730 while True: 4731 if self._match_text_seq("FOR", "UPDATE"): 4732 update = True 4733 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4734 "LOCK", "IN", "SHARE", "MODE" 4735 ): 4736 update = False 4737 else: 4738 break 4739 4740 expressions = None 4741 if self._match_text_seq("OF"): 4742 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4743 4744 wait: t.Optional[bool | exp.Expression] = None 4745 if self._match_text_seq("NOWAIT"): 4746 wait = True 4747 elif self._match_text_seq("WAIT"): 4748 wait = self._parse_primary() 4749 elif self._match_text_seq("SKIP", "LOCKED"): 4750 wait = False 4751 4752 locks.append( 4753 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4754 ) 4755 4756 return locks 4757 4758 def parse_set_operation( 4759 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4760 ) -> t.Optional[exp.Expression]: 4761 start = self._index 4762 _, side_token, kind_token = self._parse_join_parts() 4763 4764 side = side_token.text if side_token else None 4765 kind = kind_token.text if kind_token else None 4766 4767 if not self._match_set(self.SET_OPERATIONS): 4768 self._retreat(start) 4769 return None 4770 4771 token_type = self._prev.token_type 4772 4773 if token_type == TokenType.UNION: 4774 operation: t.Type[exp.SetOperation] = exp.Union 4775 elif token_type == TokenType.EXCEPT: 4776 operation = exp.Except 4777 else: 4778 operation = exp.Intersect 4779 4780 comments = self._prev.comments 4781 4782 if self._match(TokenType.DISTINCT): 4783 distinct: t.Optional[bool] = True 4784 elif self._match(TokenType.ALL): 4785 distinct = False 4786 else: 4787 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4788 if distinct is None: 4789 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4790 4791 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4792 "STRICT", "CORRESPONDING" 4793 ) 4794 if self._match_text_seq("CORRESPONDING"): 4795 by_name = True 4796 if not side and not kind: 4797 kind = "INNER" 4798 4799 on_column_list = None 4800 if by_name and self._match_texts(("ON", "BY")): 4801 on_column_list = self._parse_wrapped_csv(self._parse_column) 4802 4803 expression = self._parse_select( 4804 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4805 ) 4806 4807 return self.expression( 4808 operation, 4809 comments=comments, 4810 this=this, 4811 distinct=distinct, 4812 by_name=by_name, 4813 expression=expression, 4814 side=side, 4815 kind=kind, 4816 on=on_column_list, 4817 ) 4818 4819 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4820 while this: 4821 setop = self.parse_set_operation(this) 4822 if not setop: 4823 break 4824 this = setop 4825 4826 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4827 expression = this.expression 4828 4829 if expression: 4830 for arg in self.SET_OP_MODIFIERS: 4831 expr = expression.args.get(arg) 4832 if expr: 4833 this.set(arg, expr.pop()) 4834 4835 return this 4836 4837 def _parse_expression(self) -> t.Optional[exp.Expression]: 4838 return self._parse_alias(self._parse_assignment()) 4839 4840 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4841 this = self._parse_disjunction() 4842 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4843 # This allows us to parse <non-identifier token> := <expr> 4844 this = exp.column( 4845 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4846 ) 4847 4848 while self._match_set(self.ASSIGNMENT): 4849 if isinstance(this, exp.Column) and len(this.parts) == 1: 4850 this = this.this 4851 4852 this = self.expression( 4853 self.ASSIGNMENT[self._prev.token_type], 4854 this=this, 4855 comments=self._prev_comments, 4856 expression=self._parse_assignment(), 4857 ) 4858 4859 return this 4860 4861 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4862 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4863 4864 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4865 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4866 4867 def _parse_equality(self) -> t.Optional[exp.Expression]: 4868 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4869 4870 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4871 return self._parse_tokens(self._parse_range, self.COMPARISON) 4872 4873 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4874 this = this or self._parse_bitwise() 4875 negate = self._match(TokenType.NOT) 4876 4877 if self._match_set(self.RANGE_PARSERS): 4878 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4879 if not expression: 4880 return this 4881 4882 this = expression 4883 elif self._match(TokenType.ISNULL): 4884 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4885 4886 # Postgres supports ISNULL and NOTNULL for conditions. 4887 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4888 if self._match(TokenType.NOTNULL): 4889 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4890 this = self.expression(exp.Not, this=this) 4891 4892 if negate: 4893 this = self._negate_range(this) 4894 4895 if self._match(TokenType.IS): 4896 this = self._parse_is(this) 4897 4898 return this 4899 4900 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4901 if not this: 4902 return this 4903 4904 return self.expression(exp.Not, this=this) 4905 4906 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4907 index = self._index - 1 4908 negate = self._match(TokenType.NOT) 4909 4910 if self._match_text_seq("DISTINCT", "FROM"): 4911 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4912 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4913 4914 if self._match(TokenType.JSON): 4915 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4916 4917 if self._match_text_seq("WITH"): 4918 _with = True 4919 elif self._match_text_seq("WITHOUT"): 4920 _with = False 4921 else: 4922 _with = None 4923 4924 unique = self._match(TokenType.UNIQUE) 4925 self._match_text_seq("KEYS") 4926 expression: t.Optional[exp.Expression] = self.expression( 4927 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4928 ) 4929 else: 4930 expression = self._parse_primary() or self._parse_null() 4931 if not expression: 4932 self._retreat(index) 4933 return None 4934 4935 this = self.expression(exp.Is, this=this, expression=expression) 4936 return self.expression(exp.Not, this=this) if negate else this 4937 4938 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4939 unnest = self._parse_unnest(with_alias=False) 4940 if unnest: 4941 this = self.expression(exp.In, this=this, unnest=unnest) 4942 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4943 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4944 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4945 4946 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4947 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4948 else: 4949 this = self.expression(exp.In, this=this, expressions=expressions) 4950 4951 if matched_l_paren: 4952 self._match_r_paren(this) 4953 elif not self._match(TokenType.R_BRACKET, expression=this): 4954 self.raise_error("Expecting ]") 4955 else: 4956 this = self.expression(exp.In, this=this, field=self._parse_column()) 4957 4958 return this 4959 4960 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4961 low = self._parse_bitwise() 4962 self._match(TokenType.AND) 4963 high = self._parse_bitwise() 4964 return self.expression(exp.Between, this=this, low=low, high=high) 4965 4966 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4967 if not self._match(TokenType.ESCAPE): 4968 return this 4969 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4970 4971 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4972 index = self._index 4973 4974 if not self._match(TokenType.INTERVAL) and match_interval: 4975 return None 4976 4977 if self._match(TokenType.STRING, advance=False): 4978 this = self._parse_primary() 4979 else: 4980 this = self._parse_term() 4981 4982 if not this or ( 4983 isinstance(this, exp.Column) 4984 and not this.table 4985 and not this.this.quoted 4986 and this.name.upper() == "IS" 4987 ): 4988 self._retreat(index) 4989 return None 4990 4991 unit = self._parse_function() or ( 4992 not self._match(TokenType.ALIAS, advance=False) 4993 and self._parse_var(any_token=True, upper=True) 4994 ) 4995 4996 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4997 # each INTERVAL expression into this canonical form so it's easy to transpile 4998 if this and this.is_number: 4999 this = exp.Literal.string(this.to_py()) 5000 elif this and this.is_string: 5001 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5002 if parts and unit: 5003 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5004 unit = None 5005 self._retreat(self._index - 1) 5006 5007 if len(parts) == 1: 5008 this = exp.Literal.string(parts[0][0]) 5009 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5010 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5011 unit = self.expression( 5012 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5013 ) 5014 5015 interval = self.expression(exp.Interval, this=this, unit=unit) 5016 5017 index = self._index 5018 self._match(TokenType.PLUS) 5019 5020 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5021 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5022 return self.expression( 5023 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5024 ) 5025 5026 self._retreat(index) 5027 return interval 5028 5029 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5030 this = self._parse_term() 5031 5032 while True: 5033 if self._match_set(self.BITWISE): 5034 this = self.expression( 5035 self.BITWISE[self._prev.token_type], 5036 this=this, 5037 expression=self._parse_term(), 5038 ) 5039 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5040 this = self.expression( 5041 exp.DPipe, 5042 this=this, 5043 expression=self._parse_term(), 5044 safe=not self.dialect.STRICT_STRING_CONCAT, 5045 ) 5046 elif self._match(TokenType.DQMARK): 5047 this = self.expression( 5048 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5049 ) 5050 elif self._match_pair(TokenType.LT, TokenType.LT): 5051 this = self.expression( 5052 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5053 ) 5054 elif self._match_pair(TokenType.GT, TokenType.GT): 5055 this = self.expression( 5056 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5057 ) 5058 else: 5059 break 5060 5061 return this 5062 5063 def _parse_term(self) -> t.Optional[exp.Expression]: 5064 this = self._parse_factor() 5065 5066 while self._match_set(self.TERM): 5067 klass = self.TERM[self._prev.token_type] 5068 comments = self._prev_comments 5069 expression = self._parse_factor() 5070 5071 this = self.expression(klass, this=this, comments=comments, expression=expression) 5072 5073 if isinstance(this, exp.Collate): 5074 expr = this.expression 5075 5076 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5077 # fallback to Identifier / Var 5078 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5079 ident = expr.this 5080 if isinstance(ident, exp.Identifier): 5081 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5082 5083 return this 5084 5085 def _parse_factor(self) -> t.Optional[exp.Expression]: 5086 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5087 this = parse_method() 5088 5089 while self._match_set(self.FACTOR): 5090 klass = self.FACTOR[self._prev.token_type] 5091 comments = self._prev_comments 5092 expression = parse_method() 5093 5094 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5095 self._retreat(self._index - 1) 5096 return this 5097 5098 this = self.expression(klass, this=this, comments=comments, expression=expression) 5099 5100 if isinstance(this, exp.Div): 5101 this.args["typed"] = self.dialect.TYPED_DIVISION 5102 this.args["safe"] = self.dialect.SAFE_DIVISION 5103 5104 return this 5105 5106 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5107 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5108 5109 def _parse_unary(self) -> t.Optional[exp.Expression]: 5110 if self._match_set(self.UNARY_PARSERS): 5111 return self.UNARY_PARSERS[self._prev.token_type](self) 5112 return self._parse_at_time_zone(self._parse_type()) 5113 5114 def _parse_type( 5115 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5116 ) -> t.Optional[exp.Expression]: 5117 interval = parse_interval and self._parse_interval() 5118 if interval: 5119 return interval 5120 5121 index = self._index 5122 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5123 5124 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5125 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5126 if isinstance(data_type, exp.Cast): 5127 # This constructor can contain ops directly after it, for instance struct unnesting: 5128 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5129 return self._parse_column_ops(data_type) 5130 5131 if data_type: 5132 index2 = self._index 5133 this = self._parse_primary() 5134 5135 if isinstance(this, exp.Literal): 5136 literal = this.name 5137 this = self._parse_column_ops(this) 5138 5139 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5140 if parser: 5141 return parser(self, this, data_type) 5142 5143 if ( 5144 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5145 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5146 and TIME_ZONE_RE.search(literal) 5147 ): 5148 data_type = exp.DataType.build("TIMESTAMPTZ") 5149 5150 return self.expression(exp.Cast, this=this, to=data_type) 5151 5152 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5153 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5154 # 5155 # If the index difference here is greater than 1, that means the parser itself must have 5156 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5157 # 5158 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5159 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5160 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5161 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5162 # 5163 # In these cases, we don't really want to return the converted type, but instead retreat 5164 # and try to parse a Column or Identifier in the section below. 5165 if data_type.expressions and index2 - index > 1: 5166 self._retreat(index2) 5167 return self._parse_column_ops(data_type) 5168 5169 self._retreat(index) 5170 5171 if fallback_to_identifier: 5172 return self._parse_id_var() 5173 5174 this = self._parse_column() 5175 return this and self._parse_column_ops(this) 5176 5177 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5178 this = self._parse_type() 5179 if not this: 5180 return None 5181 5182 if isinstance(this, exp.Column) and not this.table: 5183 this = exp.var(this.name.upper()) 5184 5185 return self.expression( 5186 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5187 ) 5188 5189 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5190 type_name = identifier.name 5191 5192 while self._match(TokenType.DOT): 5193 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5194 5195 return exp.DataType.build(type_name, udt=True) 5196 5197 def _parse_types( 5198 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5199 ) -> t.Optional[exp.Expression]: 5200 index = self._index 5201 5202 this: t.Optional[exp.Expression] = None 5203 prefix = self._match_text_seq("SYSUDTLIB", ".") 5204 5205 if not self._match_set(self.TYPE_TOKENS): 5206 identifier = allow_identifiers and self._parse_id_var( 5207 any_token=False, tokens=(TokenType.VAR,) 5208 ) 5209 if isinstance(identifier, exp.Identifier): 5210 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5211 5212 if len(tokens) != 1: 5213 self.raise_error("Unexpected identifier", self._prev) 5214 5215 if tokens[0].token_type in self.TYPE_TOKENS: 5216 self._prev = tokens[0] 5217 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5218 this = self._parse_user_defined_type(identifier) 5219 else: 5220 self._retreat(self._index - 1) 5221 return None 5222 else: 5223 return None 5224 5225 type_token = self._prev.token_type 5226 5227 if type_token == TokenType.PSEUDO_TYPE: 5228 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5229 5230 if type_token == TokenType.OBJECT_IDENTIFIER: 5231 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5232 5233 # https://materialize.com/docs/sql/types/map/ 5234 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5235 key_type = self._parse_types( 5236 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5237 ) 5238 if not self._match(TokenType.FARROW): 5239 self._retreat(index) 5240 return None 5241 5242 value_type = self._parse_types( 5243 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5244 ) 5245 if not self._match(TokenType.R_BRACKET): 5246 self._retreat(index) 5247 return None 5248 5249 return exp.DataType( 5250 this=exp.DataType.Type.MAP, 5251 expressions=[key_type, value_type], 5252 nested=True, 5253 prefix=prefix, 5254 ) 5255 5256 nested = type_token in self.NESTED_TYPE_TOKENS 5257 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5258 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5259 expressions = None 5260 maybe_func = False 5261 5262 if self._match(TokenType.L_PAREN): 5263 if is_struct: 5264 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5265 elif nested: 5266 expressions = self._parse_csv( 5267 lambda: self._parse_types( 5268 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5269 ) 5270 ) 5271 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5272 this = expressions[0] 5273 this.set("nullable", True) 5274 self._match_r_paren() 5275 return this 5276 elif type_token in self.ENUM_TYPE_TOKENS: 5277 expressions = self._parse_csv(self._parse_equality) 5278 elif is_aggregate: 5279 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5280 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5281 ) 5282 if not func_or_ident: 5283 return None 5284 expressions = [func_or_ident] 5285 if self._match(TokenType.COMMA): 5286 expressions.extend( 5287 self._parse_csv( 5288 lambda: self._parse_types( 5289 check_func=check_func, 5290 schema=schema, 5291 allow_identifiers=allow_identifiers, 5292 ) 5293 ) 5294 ) 5295 else: 5296 expressions = self._parse_csv(self._parse_type_size) 5297 5298 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5299 if type_token == TokenType.VECTOR and len(expressions) == 2: 5300 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5301 5302 if not expressions or not self._match(TokenType.R_PAREN): 5303 self._retreat(index) 5304 return None 5305 5306 maybe_func = True 5307 5308 values: t.Optional[t.List[exp.Expression]] = None 5309 5310 if nested and self._match(TokenType.LT): 5311 if is_struct: 5312 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5313 else: 5314 expressions = self._parse_csv( 5315 lambda: self._parse_types( 5316 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5317 ) 5318 ) 5319 5320 if not self._match(TokenType.GT): 5321 self.raise_error("Expecting >") 5322 5323 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5324 values = self._parse_csv(self._parse_assignment) 5325 if not values and is_struct: 5326 values = None 5327 self._retreat(self._index - 1) 5328 else: 5329 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5330 5331 if type_token in self.TIMESTAMPS: 5332 if self._match_text_seq("WITH", "TIME", "ZONE"): 5333 maybe_func = False 5334 tz_type = ( 5335 exp.DataType.Type.TIMETZ 5336 if type_token in self.TIMES 5337 else exp.DataType.Type.TIMESTAMPTZ 5338 ) 5339 this = exp.DataType(this=tz_type, expressions=expressions) 5340 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5341 maybe_func = False 5342 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5343 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5344 maybe_func = False 5345 elif type_token == TokenType.INTERVAL: 5346 unit = self._parse_var(upper=True) 5347 if unit: 5348 if self._match_text_seq("TO"): 5349 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5350 5351 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5352 else: 5353 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5354 elif type_token == TokenType.VOID: 5355 this = exp.DataType(this=exp.DataType.Type.NULL) 5356 5357 if maybe_func and check_func: 5358 index2 = self._index 5359 peek = self._parse_string() 5360 5361 if not peek: 5362 self._retreat(index) 5363 return None 5364 5365 self._retreat(index2) 5366 5367 if not this: 5368 if self._match_text_seq("UNSIGNED"): 5369 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5370 if not unsigned_type_token: 5371 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5372 5373 type_token = unsigned_type_token or type_token 5374 5375 this = exp.DataType( 5376 this=exp.DataType.Type[type_token.value], 5377 expressions=expressions, 5378 nested=nested, 5379 prefix=prefix, 5380 ) 5381 5382 # Empty arrays/structs are allowed 5383 if values is not None: 5384 cls = exp.Struct if is_struct else exp.Array 5385 this = exp.cast(cls(expressions=values), this, copy=False) 5386 5387 elif expressions: 5388 this.set("expressions", expressions) 5389 5390 # https://materialize.com/docs/sql/types/list/#type-name 5391 while self._match(TokenType.LIST): 5392 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5393 5394 index = self._index 5395 5396 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5397 matched_array = self._match(TokenType.ARRAY) 5398 5399 while self._curr: 5400 datatype_token = self._prev.token_type 5401 matched_l_bracket = self._match(TokenType.L_BRACKET) 5402 5403 if (not matched_l_bracket and not matched_array) or ( 5404 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5405 ): 5406 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5407 # not to be confused with the fixed size array parsing 5408 break 5409 5410 matched_array = False 5411 values = self._parse_csv(self._parse_assignment) or None 5412 if ( 5413 values 5414 and not schema 5415 and ( 5416 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5417 ) 5418 ): 5419 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5420 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5421 self._retreat(index) 5422 break 5423 5424 this = exp.DataType( 5425 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5426 ) 5427 self._match(TokenType.R_BRACKET) 5428 5429 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5430 converter = self.TYPE_CONVERTERS.get(this.this) 5431 if converter: 5432 this = converter(t.cast(exp.DataType, this)) 5433 5434 return this 5435 5436 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5437 index = self._index 5438 5439 if ( 5440 self._curr 5441 and self._next 5442 and self._curr.token_type in self.TYPE_TOKENS 5443 and self._next.token_type in self.TYPE_TOKENS 5444 ): 5445 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5446 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5447 this = self._parse_id_var() 5448 else: 5449 this = ( 5450 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5451 or self._parse_id_var() 5452 ) 5453 5454 self._match(TokenType.COLON) 5455 5456 if ( 5457 type_required 5458 and not isinstance(this, exp.DataType) 5459 and not self._match_set(self.TYPE_TOKENS, advance=False) 5460 ): 5461 self._retreat(index) 5462 return self._parse_types() 5463 5464 return self._parse_column_def(this) 5465 5466 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5467 if not self._match_text_seq("AT", "TIME", "ZONE"): 5468 return this 5469 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5470 5471 def _parse_column(self) -> t.Optional[exp.Expression]: 5472 this = self._parse_column_reference() 5473 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5474 5475 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5476 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5477 5478 return column 5479 5480 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5481 this = self._parse_field() 5482 if ( 5483 not this 5484 and self._match(TokenType.VALUES, advance=False) 5485 and self.VALUES_FOLLOWED_BY_PAREN 5486 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5487 ): 5488 this = self._parse_id_var() 5489 5490 if isinstance(this, exp.Identifier): 5491 # We bubble up comments from the Identifier to the Column 5492 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5493 5494 return this 5495 5496 def _parse_colon_as_variant_extract( 5497 self, this: t.Optional[exp.Expression] 5498 ) -> t.Optional[exp.Expression]: 5499 casts = [] 5500 json_path = [] 5501 escape = None 5502 5503 while self._match(TokenType.COLON): 5504 start_index = self._index 5505 5506 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5507 path = self._parse_column_ops( 5508 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5509 ) 5510 5511 # The cast :: operator has a lower precedence than the extraction operator :, so 5512 # we rearrange the AST appropriately to avoid casting the JSON path 5513 while isinstance(path, exp.Cast): 5514 casts.append(path.to) 5515 path = path.this 5516 5517 if casts: 5518 dcolon_offset = next( 5519 i 5520 for i, t in enumerate(self._tokens[start_index:]) 5521 if t.token_type == TokenType.DCOLON 5522 ) 5523 end_token = self._tokens[start_index + dcolon_offset - 1] 5524 else: 5525 end_token = self._prev 5526 5527 if path: 5528 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5529 # it'll roundtrip to a string literal in GET_PATH 5530 if isinstance(path, exp.Identifier) and path.quoted: 5531 escape = True 5532 5533 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5534 5535 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5536 # Databricks transforms it back to the colon/dot notation 5537 if json_path: 5538 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5539 5540 if json_path_expr: 5541 json_path_expr.set("escape", escape) 5542 5543 this = self.expression( 5544 exp.JSONExtract, 5545 this=this, 5546 expression=json_path_expr, 5547 variant_extract=True, 5548 ) 5549 5550 while casts: 5551 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5552 5553 return this 5554 5555 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5556 return self._parse_types() 5557 5558 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5559 this = self._parse_bracket(this) 5560 5561 while self._match_set(self.COLUMN_OPERATORS): 5562 op_token = self._prev.token_type 5563 op = self.COLUMN_OPERATORS.get(op_token) 5564 5565 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5566 field = self._parse_dcolon() 5567 if not field: 5568 self.raise_error("Expected type") 5569 elif op and self._curr: 5570 field = self._parse_column_reference() or self._parse_bracket() 5571 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5572 field = self._parse_column_ops(field) 5573 else: 5574 field = self._parse_field(any_token=True, anonymous_func=True) 5575 5576 # Function calls can be qualified, e.g., x.y.FOO() 5577 # This converts the final AST to a series of Dots leading to the function call 5578 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5579 if isinstance(field, (exp.Func, exp.Window)) and this: 5580 this = this.transform( 5581 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5582 ) 5583 5584 if op: 5585 this = op(self, this, field) 5586 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5587 this = self.expression( 5588 exp.Column, 5589 comments=this.comments, 5590 this=field, 5591 table=this.this, 5592 db=this.args.get("table"), 5593 catalog=this.args.get("db"), 5594 ) 5595 elif isinstance(field, exp.Window): 5596 # Move the exp.Dot's to the window's function 5597 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5598 field.set("this", window_func) 5599 this = field 5600 else: 5601 this = self.expression(exp.Dot, this=this, expression=field) 5602 5603 if field and field.comments: 5604 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5605 5606 this = self._parse_bracket(this) 5607 5608 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5609 5610 def _parse_paren(self) -> t.Optional[exp.Expression]: 5611 if not self._match(TokenType.L_PAREN): 5612 return None 5613 5614 comments = self._prev_comments 5615 query = self._parse_select() 5616 5617 if query: 5618 expressions = [query] 5619 else: 5620 expressions = self._parse_expressions() 5621 5622 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5623 5624 if not this and self._match(TokenType.R_PAREN, advance=False): 5625 this = self.expression(exp.Tuple) 5626 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5627 this = self._parse_subquery(this=this, parse_alias=False) 5628 elif isinstance(this, exp.Subquery): 5629 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5630 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5631 this = self.expression(exp.Tuple, expressions=expressions) 5632 else: 5633 this = self.expression(exp.Paren, this=this) 5634 5635 if this: 5636 this.add_comments(comments) 5637 5638 self._match_r_paren(expression=this) 5639 return this 5640 5641 def _parse_primary(self) -> t.Optional[exp.Expression]: 5642 if self._match_set(self.PRIMARY_PARSERS): 5643 token_type = self._prev.token_type 5644 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5645 5646 if token_type == TokenType.STRING: 5647 expressions = [primary] 5648 while self._match(TokenType.STRING): 5649 expressions.append(exp.Literal.string(self._prev.text)) 5650 5651 if len(expressions) > 1: 5652 return self.expression(exp.Concat, expressions=expressions) 5653 5654 return primary 5655 5656 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5657 return exp.Literal.number(f"0.{self._prev.text}") 5658 5659 return self._parse_paren() 5660 5661 def _parse_field( 5662 self, 5663 any_token: bool = False, 5664 tokens: t.Optional[t.Collection[TokenType]] = None, 5665 anonymous_func: bool = False, 5666 ) -> t.Optional[exp.Expression]: 5667 if anonymous_func: 5668 field = ( 5669 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5670 or self._parse_primary() 5671 ) 5672 else: 5673 field = self._parse_primary() or self._parse_function( 5674 anonymous=anonymous_func, any_token=any_token 5675 ) 5676 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5677 5678 def _parse_function( 5679 self, 5680 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5681 anonymous: bool = False, 5682 optional_parens: bool = True, 5683 any_token: bool = False, 5684 ) -> t.Optional[exp.Expression]: 5685 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5686 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5687 fn_syntax = False 5688 if ( 5689 self._match(TokenType.L_BRACE, advance=False) 5690 and self._next 5691 and self._next.text.upper() == "FN" 5692 ): 5693 self._advance(2) 5694 fn_syntax = True 5695 5696 func = self._parse_function_call( 5697 functions=functions, 5698 anonymous=anonymous, 5699 optional_parens=optional_parens, 5700 any_token=any_token, 5701 ) 5702 5703 if fn_syntax: 5704 self._match(TokenType.R_BRACE) 5705 5706 return func 5707 5708 def _parse_function_call( 5709 self, 5710 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5711 anonymous: bool = False, 5712 optional_parens: bool = True, 5713 any_token: bool = False, 5714 ) -> t.Optional[exp.Expression]: 5715 if not self._curr: 5716 return None 5717 5718 comments = self._curr.comments 5719 token = self._curr 5720 token_type = self._curr.token_type 5721 this = self._curr.text 5722 upper = this.upper() 5723 5724 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5725 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5726 self._advance() 5727 return self._parse_window(parser(self)) 5728 5729 if not self._next or self._next.token_type != TokenType.L_PAREN: 5730 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5731 self._advance() 5732 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5733 5734 return None 5735 5736 if any_token: 5737 if token_type in self.RESERVED_TOKENS: 5738 return None 5739 elif token_type not in self.FUNC_TOKENS: 5740 return None 5741 5742 self._advance(2) 5743 5744 parser = self.FUNCTION_PARSERS.get(upper) 5745 if parser and not anonymous: 5746 this = parser(self) 5747 else: 5748 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5749 5750 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5751 this = self.expression( 5752 subquery_predicate, comments=comments, this=self._parse_select() 5753 ) 5754 self._match_r_paren() 5755 return this 5756 5757 if functions is None: 5758 functions = self.FUNCTIONS 5759 5760 function = functions.get(upper) 5761 known_function = function and not anonymous 5762 5763 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5764 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5765 5766 post_func_comments = self._curr and self._curr.comments 5767 if known_function and post_func_comments: 5768 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5769 # call we'll construct it as exp.Anonymous, even if it's "known" 5770 if any( 5771 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5772 for comment in post_func_comments 5773 ): 5774 known_function = False 5775 5776 if alias and known_function: 5777 args = self._kv_to_prop_eq(args) 5778 5779 if known_function: 5780 func_builder = t.cast(t.Callable, function) 5781 5782 if "dialect" in func_builder.__code__.co_varnames: 5783 func = func_builder(args, dialect=self.dialect) 5784 else: 5785 func = func_builder(args) 5786 5787 func = self.validate_expression(func, args) 5788 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5789 func.meta["name"] = this 5790 5791 this = func 5792 else: 5793 if token_type == TokenType.IDENTIFIER: 5794 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5795 5796 this = self.expression(exp.Anonymous, this=this, expressions=args) 5797 this = this.update_positions(token) 5798 5799 if isinstance(this, exp.Expression): 5800 this.add_comments(comments) 5801 5802 self._match_r_paren(this) 5803 return self._parse_window(this) 5804 5805 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5806 return expression 5807 5808 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5809 transformed = [] 5810 5811 for index, e in enumerate(expressions): 5812 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5813 if isinstance(e, exp.Alias): 5814 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5815 5816 if not isinstance(e, exp.PropertyEQ): 5817 e = self.expression( 5818 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5819 ) 5820 5821 if isinstance(e.this, exp.Column): 5822 e.this.replace(e.this.this) 5823 else: 5824 e = self._to_prop_eq(e, index) 5825 5826 transformed.append(e) 5827 5828 return transformed 5829 5830 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5831 return self._parse_statement() 5832 5833 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5834 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5835 5836 def _parse_user_defined_function( 5837 self, kind: t.Optional[TokenType] = None 5838 ) -> t.Optional[exp.Expression]: 5839 this = self._parse_table_parts(schema=True) 5840 5841 if not self._match(TokenType.L_PAREN): 5842 return this 5843 5844 expressions = self._parse_csv(self._parse_function_parameter) 5845 self._match_r_paren() 5846 return self.expression( 5847 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5848 ) 5849 5850 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5851 literal = self._parse_primary() 5852 if literal: 5853 return self.expression(exp.Introducer, this=token.text, expression=literal) 5854 5855 return self._identifier_expression(token) 5856 5857 def _parse_session_parameter(self) -> exp.SessionParameter: 5858 kind = None 5859 this = self._parse_id_var() or self._parse_primary() 5860 5861 if this and self._match(TokenType.DOT): 5862 kind = this.name 5863 this = self._parse_var() or self._parse_primary() 5864 5865 return self.expression(exp.SessionParameter, this=this, kind=kind) 5866 5867 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5868 return self._parse_id_var() 5869 5870 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5871 index = self._index 5872 5873 if self._match(TokenType.L_PAREN): 5874 expressions = t.cast( 5875 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5876 ) 5877 5878 if not self._match(TokenType.R_PAREN): 5879 self._retreat(index) 5880 else: 5881 expressions = [self._parse_lambda_arg()] 5882 5883 if self._match_set(self.LAMBDAS): 5884 return self.LAMBDAS[self._prev.token_type](self, expressions) 5885 5886 self._retreat(index) 5887 5888 this: t.Optional[exp.Expression] 5889 5890 if self._match(TokenType.DISTINCT): 5891 this = self.expression( 5892 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5893 ) 5894 else: 5895 this = self._parse_select_or_expression(alias=alias) 5896 5897 return self._parse_limit( 5898 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5899 ) 5900 5901 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5902 index = self._index 5903 if not self._match(TokenType.L_PAREN): 5904 return this 5905 5906 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5907 # expr can be of both types 5908 if self._match_set(self.SELECT_START_TOKENS): 5909 self._retreat(index) 5910 return this 5911 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5912 self._match_r_paren() 5913 return self.expression(exp.Schema, this=this, expressions=args) 5914 5915 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5916 return self._parse_column_def(self._parse_field(any_token=True)) 5917 5918 def _parse_column_def( 5919 self, this: t.Optional[exp.Expression], computed_column: bool = True 5920 ) -> t.Optional[exp.Expression]: 5921 # column defs are not really columns, they're identifiers 5922 if isinstance(this, exp.Column): 5923 this = this.this 5924 5925 if not computed_column: 5926 self._match(TokenType.ALIAS) 5927 5928 kind = self._parse_types(schema=True) 5929 5930 if self._match_text_seq("FOR", "ORDINALITY"): 5931 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5932 5933 constraints: t.List[exp.Expression] = [] 5934 5935 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5936 ("ALIAS", "MATERIALIZED") 5937 ): 5938 persisted = self._prev.text.upper() == "MATERIALIZED" 5939 constraint_kind = exp.ComputedColumnConstraint( 5940 this=self._parse_assignment(), 5941 persisted=persisted or self._match_text_seq("PERSISTED"), 5942 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5943 ) 5944 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5945 elif ( 5946 kind 5947 and self._match(TokenType.ALIAS, advance=False) 5948 and ( 5949 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5950 or (self._next and self._next.token_type == TokenType.L_PAREN) 5951 ) 5952 ): 5953 self._advance() 5954 constraints.append( 5955 self.expression( 5956 exp.ColumnConstraint, 5957 kind=exp.ComputedColumnConstraint( 5958 this=self._parse_disjunction(), 5959 persisted=self._match_texts(("STORED", "VIRTUAL")) 5960 and self._prev.text.upper() == "STORED", 5961 ), 5962 ) 5963 ) 5964 5965 while True: 5966 constraint = self._parse_column_constraint() 5967 if not constraint: 5968 break 5969 constraints.append(constraint) 5970 5971 if not kind and not constraints: 5972 return this 5973 5974 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5975 5976 def _parse_auto_increment( 5977 self, 5978 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5979 start = None 5980 increment = None 5981 order = None 5982 5983 if self._match(TokenType.L_PAREN, advance=False): 5984 args = self._parse_wrapped_csv(self._parse_bitwise) 5985 start = seq_get(args, 0) 5986 increment = seq_get(args, 1) 5987 elif self._match_text_seq("START"): 5988 start = self._parse_bitwise() 5989 self._match_text_seq("INCREMENT") 5990 increment = self._parse_bitwise() 5991 if self._match_text_seq("ORDER"): 5992 order = True 5993 elif self._match_text_seq("NOORDER"): 5994 order = False 5995 5996 if start and increment: 5997 return exp.GeneratedAsIdentityColumnConstraint( 5998 start=start, increment=increment, this=False, order=order 5999 ) 6000 6001 return exp.AutoIncrementColumnConstraint() 6002 6003 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6004 if not self._match_text_seq("REFRESH"): 6005 self._retreat(self._index - 1) 6006 return None 6007 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6008 6009 def _parse_compress(self) -> exp.CompressColumnConstraint: 6010 if self._match(TokenType.L_PAREN, advance=False): 6011 return self.expression( 6012 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6013 ) 6014 6015 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6016 6017 def _parse_generated_as_identity( 6018 self, 6019 ) -> ( 6020 exp.GeneratedAsIdentityColumnConstraint 6021 | exp.ComputedColumnConstraint 6022 | exp.GeneratedAsRowColumnConstraint 6023 ): 6024 if self._match_text_seq("BY", "DEFAULT"): 6025 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6026 this = self.expression( 6027 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6028 ) 6029 else: 6030 self._match_text_seq("ALWAYS") 6031 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6032 6033 self._match(TokenType.ALIAS) 6034 6035 if self._match_text_seq("ROW"): 6036 start = self._match_text_seq("START") 6037 if not start: 6038 self._match(TokenType.END) 6039 hidden = self._match_text_seq("HIDDEN") 6040 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6041 6042 identity = self._match_text_seq("IDENTITY") 6043 6044 if self._match(TokenType.L_PAREN): 6045 if self._match(TokenType.START_WITH): 6046 this.set("start", self._parse_bitwise()) 6047 if self._match_text_seq("INCREMENT", "BY"): 6048 this.set("increment", self._parse_bitwise()) 6049 if self._match_text_seq("MINVALUE"): 6050 this.set("minvalue", self._parse_bitwise()) 6051 if self._match_text_seq("MAXVALUE"): 6052 this.set("maxvalue", self._parse_bitwise()) 6053 6054 if self._match_text_seq("CYCLE"): 6055 this.set("cycle", True) 6056 elif self._match_text_seq("NO", "CYCLE"): 6057 this.set("cycle", False) 6058 6059 if not identity: 6060 this.set("expression", self._parse_range()) 6061 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6062 args = self._parse_csv(self._parse_bitwise) 6063 this.set("start", seq_get(args, 0)) 6064 this.set("increment", seq_get(args, 1)) 6065 6066 self._match_r_paren() 6067 6068 return this 6069 6070 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6071 self._match_text_seq("LENGTH") 6072 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6073 6074 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6075 if self._match_text_seq("NULL"): 6076 return self.expression(exp.NotNullColumnConstraint) 6077 if self._match_text_seq("CASESPECIFIC"): 6078 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6079 if self._match_text_seq("FOR", "REPLICATION"): 6080 return self.expression(exp.NotForReplicationColumnConstraint) 6081 6082 # Unconsume the `NOT` token 6083 self._retreat(self._index - 1) 6084 return None 6085 6086 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6087 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6088 6089 procedure_option_follows = ( 6090 self._match(TokenType.WITH, advance=False) 6091 and self._next 6092 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6093 ) 6094 6095 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6096 return self.expression( 6097 exp.ColumnConstraint, 6098 this=this, 6099 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6100 ) 6101 6102 return this 6103 6104 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6105 if not self._match(TokenType.CONSTRAINT): 6106 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6107 6108 return self.expression( 6109 exp.Constraint, 6110 this=self._parse_id_var(), 6111 expressions=self._parse_unnamed_constraints(), 6112 ) 6113 6114 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6115 constraints = [] 6116 while True: 6117 constraint = self._parse_unnamed_constraint() or self._parse_function() 6118 if not constraint: 6119 break 6120 constraints.append(constraint) 6121 6122 return constraints 6123 6124 def _parse_unnamed_constraint( 6125 self, constraints: t.Optional[t.Collection[str]] = None 6126 ) -> t.Optional[exp.Expression]: 6127 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6128 constraints or self.CONSTRAINT_PARSERS 6129 ): 6130 return None 6131 6132 constraint = self._prev.text.upper() 6133 if constraint not in self.CONSTRAINT_PARSERS: 6134 self.raise_error(f"No parser found for schema constraint {constraint}.") 6135 6136 return self.CONSTRAINT_PARSERS[constraint](self) 6137 6138 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6139 return self._parse_id_var(any_token=False) 6140 6141 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6142 self._match_text_seq("KEY") 6143 return self.expression( 6144 exp.UniqueColumnConstraint, 6145 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6146 this=self._parse_schema(self._parse_unique_key()), 6147 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6148 on_conflict=self._parse_on_conflict(), 6149 options=self._parse_key_constraint_options(), 6150 ) 6151 6152 def _parse_key_constraint_options(self) -> t.List[str]: 6153 options = [] 6154 while True: 6155 if not self._curr: 6156 break 6157 6158 if self._match(TokenType.ON): 6159 action = None 6160 on = self._advance_any() and self._prev.text 6161 6162 if self._match_text_seq("NO", "ACTION"): 6163 action = "NO ACTION" 6164 elif self._match_text_seq("CASCADE"): 6165 action = "CASCADE" 6166 elif self._match_text_seq("RESTRICT"): 6167 action = "RESTRICT" 6168 elif self._match_pair(TokenType.SET, TokenType.NULL): 6169 action = "SET NULL" 6170 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6171 action = "SET DEFAULT" 6172 else: 6173 self.raise_error("Invalid key constraint") 6174 6175 options.append(f"ON {on} {action}") 6176 else: 6177 var = self._parse_var_from_options( 6178 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6179 ) 6180 if not var: 6181 break 6182 options.append(var.name) 6183 6184 return options 6185 6186 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6187 if match and not self._match(TokenType.REFERENCES): 6188 return None 6189 6190 expressions = None 6191 this = self._parse_table(schema=True) 6192 options = self._parse_key_constraint_options() 6193 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6194 6195 def _parse_foreign_key(self) -> exp.ForeignKey: 6196 expressions = ( 6197 self._parse_wrapped_id_vars() 6198 if not self._match(TokenType.REFERENCES, advance=False) 6199 else None 6200 ) 6201 reference = self._parse_references() 6202 on_options = {} 6203 6204 while self._match(TokenType.ON): 6205 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6206 self.raise_error("Expected DELETE or UPDATE") 6207 6208 kind = self._prev.text.lower() 6209 6210 if self._match_text_seq("NO", "ACTION"): 6211 action = "NO ACTION" 6212 elif self._match(TokenType.SET): 6213 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6214 action = "SET " + self._prev.text.upper() 6215 else: 6216 self._advance() 6217 action = self._prev.text.upper() 6218 6219 on_options[kind] = action 6220 6221 return self.expression( 6222 exp.ForeignKey, 6223 expressions=expressions, 6224 reference=reference, 6225 options=self._parse_key_constraint_options(), 6226 **on_options, # type: ignore 6227 ) 6228 6229 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6230 return self._parse_ordered() or self._parse_field() 6231 6232 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6233 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6234 self._retreat(self._index - 1) 6235 return None 6236 6237 id_vars = self._parse_wrapped_id_vars() 6238 return self.expression( 6239 exp.PeriodForSystemTimeConstraint, 6240 this=seq_get(id_vars, 0), 6241 expression=seq_get(id_vars, 1), 6242 ) 6243 6244 def _parse_primary_key( 6245 self, wrapped_optional: bool = False, in_props: bool = False 6246 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6247 desc = ( 6248 self._match_set((TokenType.ASC, TokenType.DESC)) 6249 and self._prev.token_type == TokenType.DESC 6250 ) 6251 6252 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6253 return self.expression( 6254 exp.PrimaryKeyColumnConstraint, 6255 desc=desc, 6256 options=self._parse_key_constraint_options(), 6257 ) 6258 6259 expressions = self._parse_wrapped_csv( 6260 self._parse_primary_key_part, optional=wrapped_optional 6261 ) 6262 options = self._parse_key_constraint_options() 6263 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6264 6265 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6266 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6267 6268 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6269 """ 6270 Parses a datetime column in ODBC format. We parse the column into the corresponding 6271 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6272 same as we did for `DATE('yyyy-mm-dd')`. 6273 6274 Reference: 6275 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6276 """ 6277 self._match(TokenType.VAR) 6278 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6279 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6280 if not self._match(TokenType.R_BRACE): 6281 self.raise_error("Expected }") 6282 return expression 6283 6284 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6285 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6286 return this 6287 6288 bracket_kind = self._prev.token_type 6289 if ( 6290 bracket_kind == TokenType.L_BRACE 6291 and self._curr 6292 and self._curr.token_type == TokenType.VAR 6293 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6294 ): 6295 return self._parse_odbc_datetime_literal() 6296 6297 expressions = self._parse_csv( 6298 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6299 ) 6300 6301 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6302 self.raise_error("Expected ]") 6303 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6304 self.raise_error("Expected }") 6305 6306 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6307 if bracket_kind == TokenType.L_BRACE: 6308 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6309 elif not this: 6310 this = build_array_constructor( 6311 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6312 ) 6313 else: 6314 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6315 if constructor_type: 6316 return build_array_constructor( 6317 constructor_type, 6318 args=expressions, 6319 bracket_kind=bracket_kind, 6320 dialect=self.dialect, 6321 ) 6322 6323 expressions = apply_index_offset( 6324 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6325 ) 6326 this = self.expression( 6327 exp.Bracket, 6328 this=this, 6329 expressions=expressions, 6330 comments=this.pop_comments(), 6331 ) 6332 6333 self._add_comments(this) 6334 return self._parse_bracket(this) 6335 6336 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6337 if self._match(TokenType.COLON): 6338 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6339 return this 6340 6341 def _parse_case(self) -> t.Optional[exp.Expression]: 6342 ifs = [] 6343 default = None 6344 6345 comments = self._prev_comments 6346 expression = self._parse_assignment() 6347 6348 while self._match(TokenType.WHEN): 6349 this = self._parse_assignment() 6350 self._match(TokenType.THEN) 6351 then = self._parse_assignment() 6352 ifs.append(self.expression(exp.If, this=this, true=then)) 6353 6354 if self._match(TokenType.ELSE): 6355 default = self._parse_assignment() 6356 6357 if not self._match(TokenType.END): 6358 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6359 default = exp.column("interval") 6360 else: 6361 self.raise_error("Expected END after CASE", self._prev) 6362 6363 return self.expression( 6364 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6365 ) 6366 6367 def _parse_if(self) -> t.Optional[exp.Expression]: 6368 if self._match(TokenType.L_PAREN): 6369 args = self._parse_csv( 6370 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6371 ) 6372 this = self.validate_expression(exp.If.from_arg_list(args), args) 6373 self._match_r_paren() 6374 else: 6375 index = self._index - 1 6376 6377 if self.NO_PAREN_IF_COMMANDS and index == 0: 6378 return self._parse_as_command(self._prev) 6379 6380 condition = self._parse_assignment() 6381 6382 if not condition: 6383 self._retreat(index) 6384 return None 6385 6386 self._match(TokenType.THEN) 6387 true = self._parse_assignment() 6388 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6389 self._match(TokenType.END) 6390 this = self.expression(exp.If, this=condition, true=true, false=false) 6391 6392 return this 6393 6394 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6395 if not self._match_text_seq("VALUE", "FOR"): 6396 self._retreat(self._index - 1) 6397 return None 6398 6399 return self.expression( 6400 exp.NextValueFor, 6401 this=self._parse_column(), 6402 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6403 ) 6404 6405 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6406 this = self._parse_function() or self._parse_var_or_string(upper=True) 6407 6408 if self._match(TokenType.FROM): 6409 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6410 6411 if not self._match(TokenType.COMMA): 6412 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6413 6414 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6415 6416 def _parse_gap_fill(self) -> exp.GapFill: 6417 self._match(TokenType.TABLE) 6418 this = self._parse_table() 6419 6420 self._match(TokenType.COMMA) 6421 args = [this, *self._parse_csv(self._parse_lambda)] 6422 6423 gap_fill = exp.GapFill.from_arg_list(args) 6424 return self.validate_expression(gap_fill, args) 6425 6426 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6427 this = self._parse_assignment() 6428 6429 if not self._match(TokenType.ALIAS): 6430 if self._match(TokenType.COMMA): 6431 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6432 6433 self.raise_error("Expected AS after CAST") 6434 6435 fmt = None 6436 to = self._parse_types() 6437 6438 default = self._match(TokenType.DEFAULT) 6439 if default: 6440 default = self._parse_bitwise() 6441 self._match_text_seq("ON", "CONVERSION", "ERROR") 6442 6443 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6444 fmt_string = self._parse_string() 6445 fmt = self._parse_at_time_zone(fmt_string) 6446 6447 if not to: 6448 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6449 if to.this in exp.DataType.TEMPORAL_TYPES: 6450 this = self.expression( 6451 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6452 this=this, 6453 format=exp.Literal.string( 6454 format_time( 6455 fmt_string.this if fmt_string else "", 6456 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6457 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6458 ) 6459 ), 6460 safe=safe, 6461 ) 6462 6463 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6464 this.set("zone", fmt.args["zone"]) 6465 return this 6466 elif not to: 6467 self.raise_error("Expected TYPE after CAST") 6468 elif isinstance(to, exp.Identifier): 6469 to = exp.DataType.build(to.name, udt=True) 6470 elif to.this == exp.DataType.Type.CHAR: 6471 if self._match(TokenType.CHARACTER_SET): 6472 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6473 6474 return self.expression( 6475 exp.Cast if strict else exp.TryCast, 6476 this=this, 6477 to=to, 6478 format=fmt, 6479 safe=safe, 6480 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6481 default=default, 6482 ) 6483 6484 def _parse_string_agg(self) -> exp.GroupConcat: 6485 if self._match(TokenType.DISTINCT): 6486 args: t.List[t.Optional[exp.Expression]] = [ 6487 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6488 ] 6489 if self._match(TokenType.COMMA): 6490 args.extend(self._parse_csv(self._parse_assignment)) 6491 else: 6492 args = self._parse_csv(self._parse_assignment) # type: ignore 6493 6494 if self._match_text_seq("ON", "OVERFLOW"): 6495 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6496 if self._match_text_seq("ERROR"): 6497 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6498 else: 6499 self._match_text_seq("TRUNCATE") 6500 on_overflow = self.expression( 6501 exp.OverflowTruncateBehavior, 6502 this=self._parse_string(), 6503 with_count=( 6504 self._match_text_seq("WITH", "COUNT") 6505 or not self._match_text_seq("WITHOUT", "COUNT") 6506 ), 6507 ) 6508 else: 6509 on_overflow = None 6510 6511 index = self._index 6512 if not self._match(TokenType.R_PAREN) and args: 6513 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6514 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6515 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6516 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6517 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6518 6519 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6520 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6521 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6522 if not self._match_text_seq("WITHIN", "GROUP"): 6523 self._retreat(index) 6524 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6525 6526 # The corresponding match_r_paren will be called in parse_function (caller) 6527 self._match_l_paren() 6528 6529 return self.expression( 6530 exp.GroupConcat, 6531 this=self._parse_order(this=seq_get(args, 0)), 6532 separator=seq_get(args, 1), 6533 on_overflow=on_overflow, 6534 ) 6535 6536 def _parse_convert( 6537 self, strict: bool, safe: t.Optional[bool] = None 6538 ) -> t.Optional[exp.Expression]: 6539 this = self._parse_bitwise() 6540 6541 if self._match(TokenType.USING): 6542 to: t.Optional[exp.Expression] = self.expression( 6543 exp.CharacterSet, this=self._parse_var() 6544 ) 6545 elif self._match(TokenType.COMMA): 6546 to = self._parse_types() 6547 else: 6548 to = None 6549 6550 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6551 6552 def _parse_xml_table(self) -> exp.XMLTable: 6553 namespaces = None 6554 passing = None 6555 columns = None 6556 6557 if self._match_text_seq("XMLNAMESPACES", "("): 6558 namespaces = self._parse_xml_namespace() 6559 self._match_text_seq(")", ",") 6560 6561 this = self._parse_string() 6562 6563 if self._match_text_seq("PASSING"): 6564 # The BY VALUE keywords are optional and are provided for semantic clarity 6565 self._match_text_seq("BY", "VALUE") 6566 passing = self._parse_csv(self._parse_column) 6567 6568 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6569 6570 if self._match_text_seq("COLUMNS"): 6571 columns = self._parse_csv(self._parse_field_def) 6572 6573 return self.expression( 6574 exp.XMLTable, 6575 this=this, 6576 namespaces=namespaces, 6577 passing=passing, 6578 columns=columns, 6579 by_ref=by_ref, 6580 ) 6581 6582 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6583 namespaces = [] 6584 6585 while True: 6586 if self._match(TokenType.DEFAULT): 6587 uri = self._parse_string() 6588 else: 6589 uri = self._parse_alias(self._parse_string()) 6590 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6591 if not self._match(TokenType.COMMA): 6592 break 6593 6594 return namespaces 6595 6596 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6597 """ 6598 There are generally two variants of the DECODE function: 6599 6600 - DECODE(bin, charset) 6601 - DECODE(expression, search, result [, search, result] ... [, default]) 6602 6603 The second variant will always be parsed into a CASE expression. Note that NULL 6604 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6605 instead of relying on pattern matching. 6606 """ 6607 args = self._parse_csv(self._parse_assignment) 6608 6609 if len(args) < 3: 6610 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6611 6612 expression, *expressions = args 6613 if not expression: 6614 return None 6615 6616 ifs = [] 6617 for search, result in zip(expressions[::2], expressions[1::2]): 6618 if not search or not result: 6619 return None 6620 6621 if isinstance(search, exp.Literal): 6622 ifs.append( 6623 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6624 ) 6625 elif isinstance(search, exp.Null): 6626 ifs.append( 6627 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6628 ) 6629 else: 6630 cond = exp.or_( 6631 exp.EQ(this=expression.copy(), expression=search), 6632 exp.and_( 6633 exp.Is(this=expression.copy(), expression=exp.Null()), 6634 exp.Is(this=search.copy(), expression=exp.Null()), 6635 copy=False, 6636 ), 6637 copy=False, 6638 ) 6639 ifs.append(exp.If(this=cond, true=result)) 6640 6641 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6642 6643 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6644 self._match_text_seq("KEY") 6645 key = self._parse_column() 6646 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6647 self._match_text_seq("VALUE") 6648 value = self._parse_bitwise() 6649 6650 if not key and not value: 6651 return None 6652 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6653 6654 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6655 if not this or not self._match_text_seq("FORMAT", "JSON"): 6656 return this 6657 6658 return self.expression(exp.FormatJson, this=this) 6659 6660 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6661 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6662 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6663 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6664 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6665 else: 6666 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6667 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6668 6669 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6670 6671 if not empty and not error and not null: 6672 return None 6673 6674 return self.expression( 6675 exp.OnCondition, 6676 empty=empty, 6677 error=error, 6678 null=null, 6679 ) 6680 6681 def _parse_on_handling( 6682 self, on: str, *values: str 6683 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6684 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6685 for value in values: 6686 if self._match_text_seq(value, "ON", on): 6687 return f"{value} ON {on}" 6688 6689 index = self._index 6690 if self._match(TokenType.DEFAULT): 6691 default_value = self._parse_bitwise() 6692 if self._match_text_seq("ON", on): 6693 return default_value 6694 6695 self._retreat(index) 6696 6697 return None 6698 6699 @t.overload 6700 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6701 6702 @t.overload 6703 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6704 6705 def _parse_json_object(self, agg=False): 6706 star = self._parse_star() 6707 expressions = ( 6708 [star] 6709 if star 6710 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6711 ) 6712 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6713 6714 unique_keys = None 6715 if self._match_text_seq("WITH", "UNIQUE"): 6716 unique_keys = True 6717 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6718 unique_keys = False 6719 6720 self._match_text_seq("KEYS") 6721 6722 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6723 self._parse_type() 6724 ) 6725 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6726 6727 return self.expression( 6728 exp.JSONObjectAgg if agg else exp.JSONObject, 6729 expressions=expressions, 6730 null_handling=null_handling, 6731 unique_keys=unique_keys, 6732 return_type=return_type, 6733 encoding=encoding, 6734 ) 6735 6736 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6737 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6738 if not self._match_text_seq("NESTED"): 6739 this = self._parse_id_var() 6740 kind = self._parse_types(allow_identifiers=False) 6741 nested = None 6742 else: 6743 this = None 6744 kind = None 6745 nested = True 6746 6747 path = self._match_text_seq("PATH") and self._parse_string() 6748 nested_schema = nested and self._parse_json_schema() 6749 6750 return self.expression( 6751 exp.JSONColumnDef, 6752 this=this, 6753 kind=kind, 6754 path=path, 6755 nested_schema=nested_schema, 6756 ) 6757 6758 def _parse_json_schema(self) -> exp.JSONSchema: 6759 self._match_text_seq("COLUMNS") 6760 return self.expression( 6761 exp.JSONSchema, 6762 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6763 ) 6764 6765 def _parse_json_table(self) -> exp.JSONTable: 6766 this = self._parse_format_json(self._parse_bitwise()) 6767 path = self._match(TokenType.COMMA) and self._parse_string() 6768 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6769 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6770 schema = self._parse_json_schema() 6771 6772 return exp.JSONTable( 6773 this=this, 6774 schema=schema, 6775 path=path, 6776 error_handling=error_handling, 6777 empty_handling=empty_handling, 6778 ) 6779 6780 def _parse_match_against(self) -> exp.MatchAgainst: 6781 expressions = self._parse_csv(self._parse_column) 6782 6783 self._match_text_seq(")", "AGAINST", "(") 6784 6785 this = self._parse_string() 6786 6787 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6788 modifier = "IN NATURAL LANGUAGE MODE" 6789 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6790 modifier = f"{modifier} WITH QUERY EXPANSION" 6791 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6792 modifier = "IN BOOLEAN MODE" 6793 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6794 modifier = "WITH QUERY EXPANSION" 6795 else: 6796 modifier = None 6797 6798 return self.expression( 6799 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6800 ) 6801 6802 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6803 def _parse_open_json(self) -> exp.OpenJSON: 6804 this = self._parse_bitwise() 6805 path = self._match(TokenType.COMMA) and self._parse_string() 6806 6807 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6808 this = self._parse_field(any_token=True) 6809 kind = self._parse_types() 6810 path = self._parse_string() 6811 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6812 6813 return self.expression( 6814 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6815 ) 6816 6817 expressions = None 6818 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6819 self._match_l_paren() 6820 expressions = self._parse_csv(_parse_open_json_column_def) 6821 6822 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6823 6824 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6825 args = self._parse_csv(self._parse_bitwise) 6826 6827 if self._match(TokenType.IN): 6828 return self.expression( 6829 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6830 ) 6831 6832 if haystack_first: 6833 haystack = seq_get(args, 0) 6834 needle = seq_get(args, 1) 6835 else: 6836 haystack = seq_get(args, 1) 6837 needle = seq_get(args, 0) 6838 6839 return self.expression( 6840 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6841 ) 6842 6843 def _parse_predict(self) -> exp.Predict: 6844 self._match_text_seq("MODEL") 6845 this = self._parse_table() 6846 6847 self._match(TokenType.COMMA) 6848 self._match_text_seq("TABLE") 6849 6850 return self.expression( 6851 exp.Predict, 6852 this=this, 6853 expression=self._parse_table(), 6854 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6855 ) 6856 6857 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6858 args = self._parse_csv(self._parse_table) 6859 return exp.JoinHint(this=func_name.upper(), expressions=args) 6860 6861 def _parse_substring(self) -> exp.Substring: 6862 # Postgres supports the form: substring(string [from int] [for int]) 6863 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6864 6865 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6866 6867 if self._match(TokenType.FROM): 6868 args.append(self._parse_bitwise()) 6869 if self._match(TokenType.FOR): 6870 if len(args) == 1: 6871 args.append(exp.Literal.number(1)) 6872 args.append(self._parse_bitwise()) 6873 6874 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6875 6876 def _parse_trim(self) -> exp.Trim: 6877 # https://www.w3resource.com/sql/character-functions/trim.php 6878 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6879 6880 position = None 6881 collation = None 6882 expression = None 6883 6884 if self._match_texts(self.TRIM_TYPES): 6885 position = self._prev.text.upper() 6886 6887 this = self._parse_bitwise() 6888 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6889 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6890 expression = self._parse_bitwise() 6891 6892 if invert_order: 6893 this, expression = expression, this 6894 6895 if self._match(TokenType.COLLATE): 6896 collation = self._parse_bitwise() 6897 6898 return self.expression( 6899 exp.Trim, this=this, position=position, expression=expression, collation=collation 6900 ) 6901 6902 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6903 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6904 6905 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6906 return self._parse_window(self._parse_id_var(), alias=True) 6907 6908 def _parse_respect_or_ignore_nulls( 6909 self, this: t.Optional[exp.Expression] 6910 ) -> t.Optional[exp.Expression]: 6911 if self._match_text_seq("IGNORE", "NULLS"): 6912 return self.expression(exp.IgnoreNulls, this=this) 6913 if self._match_text_seq("RESPECT", "NULLS"): 6914 return self.expression(exp.RespectNulls, this=this) 6915 return this 6916 6917 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6918 if self._match(TokenType.HAVING): 6919 self._match_texts(("MAX", "MIN")) 6920 max = self._prev.text.upper() != "MIN" 6921 return self.expression( 6922 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6923 ) 6924 6925 return this 6926 6927 def _parse_window( 6928 self, this: t.Optional[exp.Expression], alias: bool = False 6929 ) -> t.Optional[exp.Expression]: 6930 func = this 6931 comments = func.comments if isinstance(func, exp.Expression) else None 6932 6933 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6934 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6935 if self._match_text_seq("WITHIN", "GROUP"): 6936 order = self._parse_wrapped(self._parse_order) 6937 this = self.expression(exp.WithinGroup, this=this, expression=order) 6938 6939 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6940 self._match(TokenType.WHERE) 6941 this = self.expression( 6942 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6943 ) 6944 self._match_r_paren() 6945 6946 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6947 # Some dialects choose to implement and some do not. 6948 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6949 6950 # There is some code above in _parse_lambda that handles 6951 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6952 6953 # The below changes handle 6954 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6955 6956 # Oracle allows both formats 6957 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6958 # and Snowflake chose to do the same for familiarity 6959 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6960 if isinstance(this, exp.AggFunc): 6961 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6962 6963 if ignore_respect and ignore_respect is not this: 6964 ignore_respect.replace(ignore_respect.this) 6965 this = self.expression(ignore_respect.__class__, this=this) 6966 6967 this = self._parse_respect_or_ignore_nulls(this) 6968 6969 # bigquery select from window x AS (partition by ...) 6970 if alias: 6971 over = None 6972 self._match(TokenType.ALIAS) 6973 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6974 return this 6975 else: 6976 over = self._prev.text.upper() 6977 6978 if comments and isinstance(func, exp.Expression): 6979 func.pop_comments() 6980 6981 if not self._match(TokenType.L_PAREN): 6982 return self.expression( 6983 exp.Window, 6984 comments=comments, 6985 this=this, 6986 alias=self._parse_id_var(False), 6987 over=over, 6988 ) 6989 6990 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6991 6992 first = self._match(TokenType.FIRST) 6993 if self._match_text_seq("LAST"): 6994 first = False 6995 6996 partition, order = self._parse_partition_and_order() 6997 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6998 6999 if kind: 7000 self._match(TokenType.BETWEEN) 7001 start = self._parse_window_spec() 7002 self._match(TokenType.AND) 7003 end = self._parse_window_spec() 7004 exclude = ( 7005 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7006 if self._match_text_seq("EXCLUDE") 7007 else None 7008 ) 7009 7010 spec = self.expression( 7011 exp.WindowSpec, 7012 kind=kind, 7013 start=start["value"], 7014 start_side=start["side"], 7015 end=end["value"], 7016 end_side=end["side"], 7017 exclude=exclude, 7018 ) 7019 else: 7020 spec = None 7021 7022 self._match_r_paren() 7023 7024 window = self.expression( 7025 exp.Window, 7026 comments=comments, 7027 this=this, 7028 partition_by=partition, 7029 order=order, 7030 spec=spec, 7031 alias=window_alias, 7032 over=over, 7033 first=first, 7034 ) 7035 7036 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7037 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7038 return self._parse_window(window, alias=alias) 7039 7040 return window 7041 7042 def _parse_partition_and_order( 7043 self, 7044 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7045 return self._parse_partition_by(), self._parse_order() 7046 7047 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7048 self._match(TokenType.BETWEEN) 7049 7050 return { 7051 "value": ( 7052 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7053 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7054 or self._parse_bitwise() 7055 ), 7056 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7057 } 7058 7059 def _parse_alias( 7060 self, this: t.Optional[exp.Expression], explicit: bool = False 7061 ) -> t.Optional[exp.Expression]: 7062 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7063 # so this section tries to parse the clause version and if it fails, it treats the token 7064 # as an identifier (alias) 7065 if self._can_parse_limit_or_offset(): 7066 return this 7067 7068 any_token = self._match(TokenType.ALIAS) 7069 comments = self._prev_comments or [] 7070 7071 if explicit and not any_token: 7072 return this 7073 7074 if self._match(TokenType.L_PAREN): 7075 aliases = self.expression( 7076 exp.Aliases, 7077 comments=comments, 7078 this=this, 7079 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7080 ) 7081 self._match_r_paren(aliases) 7082 return aliases 7083 7084 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7085 self.STRING_ALIASES and self._parse_string_as_identifier() 7086 ) 7087 7088 if alias: 7089 comments.extend(alias.pop_comments()) 7090 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7091 column = this.this 7092 7093 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7094 if not this.comments and column and column.comments: 7095 this.comments = column.pop_comments() 7096 7097 return this 7098 7099 def _parse_id_var( 7100 self, 7101 any_token: bool = True, 7102 tokens: t.Optional[t.Collection[TokenType]] = None, 7103 ) -> t.Optional[exp.Expression]: 7104 expression = self._parse_identifier() 7105 if not expression and ( 7106 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7107 ): 7108 quoted = self._prev.token_type == TokenType.STRING 7109 expression = self._identifier_expression(quoted=quoted) 7110 7111 return expression 7112 7113 def _parse_string(self) -> t.Optional[exp.Expression]: 7114 if self._match_set(self.STRING_PARSERS): 7115 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7116 return self._parse_placeholder() 7117 7118 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7119 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7120 if output: 7121 output.update_positions(self._prev) 7122 return output 7123 7124 def _parse_number(self) -> t.Optional[exp.Expression]: 7125 if self._match_set(self.NUMERIC_PARSERS): 7126 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7127 return self._parse_placeholder() 7128 7129 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7130 if self._match(TokenType.IDENTIFIER): 7131 return self._identifier_expression(quoted=True) 7132 return self._parse_placeholder() 7133 7134 def _parse_var( 7135 self, 7136 any_token: bool = False, 7137 tokens: t.Optional[t.Collection[TokenType]] = None, 7138 upper: bool = False, 7139 ) -> t.Optional[exp.Expression]: 7140 if ( 7141 (any_token and self._advance_any()) 7142 or self._match(TokenType.VAR) 7143 or (self._match_set(tokens) if tokens else False) 7144 ): 7145 return self.expression( 7146 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7147 ) 7148 return self._parse_placeholder() 7149 7150 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7151 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7152 self._advance() 7153 return self._prev 7154 return None 7155 7156 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7157 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7158 7159 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7160 return self._parse_primary() or self._parse_var(any_token=True) 7161 7162 def _parse_null(self) -> t.Optional[exp.Expression]: 7163 if self._match_set(self.NULL_TOKENS): 7164 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7165 return self._parse_placeholder() 7166 7167 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7168 if self._match(TokenType.TRUE): 7169 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7170 if self._match(TokenType.FALSE): 7171 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7172 return self._parse_placeholder() 7173 7174 def _parse_star(self) -> t.Optional[exp.Expression]: 7175 if self._match(TokenType.STAR): 7176 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7177 return self._parse_placeholder() 7178 7179 def _parse_parameter(self) -> exp.Parameter: 7180 this = self._parse_identifier() or self._parse_primary_or_var() 7181 return self.expression(exp.Parameter, this=this) 7182 7183 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7184 if self._match_set(self.PLACEHOLDER_PARSERS): 7185 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7186 if placeholder: 7187 return placeholder 7188 self._advance(-1) 7189 return None 7190 7191 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7192 if not self._match_texts(keywords): 7193 return None 7194 if self._match(TokenType.L_PAREN, advance=False): 7195 return self._parse_wrapped_csv(self._parse_expression) 7196 7197 expression = self._parse_expression() 7198 return [expression] if expression else None 7199 7200 def _parse_csv( 7201 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7202 ) -> t.List[exp.Expression]: 7203 parse_result = parse_method() 7204 items = [parse_result] if parse_result is not None else [] 7205 7206 while self._match(sep): 7207 self._add_comments(parse_result) 7208 parse_result = parse_method() 7209 if parse_result is not None: 7210 items.append(parse_result) 7211 7212 return items 7213 7214 def _parse_tokens( 7215 self, parse_method: t.Callable, expressions: t.Dict 7216 ) -> t.Optional[exp.Expression]: 7217 this = parse_method() 7218 7219 while self._match_set(expressions): 7220 this = self.expression( 7221 expressions[self._prev.token_type], 7222 this=this, 7223 comments=self._prev_comments, 7224 expression=parse_method(), 7225 ) 7226 7227 return this 7228 7229 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7230 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7231 7232 def _parse_wrapped_csv( 7233 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7234 ) -> t.List[exp.Expression]: 7235 return self._parse_wrapped( 7236 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7237 ) 7238 7239 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7240 wrapped = self._match(TokenType.L_PAREN) 7241 if not wrapped and not optional: 7242 self.raise_error("Expecting (") 7243 parse_result = parse_method() 7244 if wrapped: 7245 self._match_r_paren() 7246 return parse_result 7247 7248 def _parse_expressions(self) -> t.List[exp.Expression]: 7249 return self._parse_csv(self._parse_expression) 7250 7251 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7252 return self._parse_select() or self._parse_set_operations( 7253 self._parse_alias(self._parse_assignment(), explicit=True) 7254 if alias 7255 else self._parse_assignment() 7256 ) 7257 7258 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7259 return self._parse_query_modifiers( 7260 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7261 ) 7262 7263 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7264 this = None 7265 if self._match_texts(self.TRANSACTION_KIND): 7266 this = self._prev.text 7267 7268 self._match_texts(("TRANSACTION", "WORK")) 7269 7270 modes = [] 7271 while True: 7272 mode = [] 7273 while self._match(TokenType.VAR): 7274 mode.append(self._prev.text) 7275 7276 if mode: 7277 modes.append(" ".join(mode)) 7278 if not self._match(TokenType.COMMA): 7279 break 7280 7281 return self.expression(exp.Transaction, this=this, modes=modes) 7282 7283 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7284 chain = None 7285 savepoint = None 7286 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7287 7288 self._match_texts(("TRANSACTION", "WORK")) 7289 7290 if self._match_text_seq("TO"): 7291 self._match_text_seq("SAVEPOINT") 7292 savepoint = self._parse_id_var() 7293 7294 if self._match(TokenType.AND): 7295 chain = not self._match_text_seq("NO") 7296 self._match_text_seq("CHAIN") 7297 7298 if is_rollback: 7299 return self.expression(exp.Rollback, savepoint=savepoint) 7300 7301 return self.expression(exp.Commit, chain=chain) 7302 7303 def _parse_refresh(self) -> exp.Refresh: 7304 self._match(TokenType.TABLE) 7305 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7306 7307 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7308 if not self._prev.text.upper() == "ADD": 7309 return None 7310 7311 start = self._index 7312 self._match(TokenType.COLUMN) 7313 7314 exists_column = self._parse_exists(not_=True) 7315 expression = self._parse_field_def() 7316 7317 if not isinstance(expression, exp.ColumnDef): 7318 self._retreat(start) 7319 return None 7320 7321 expression.set("exists", exists_column) 7322 7323 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7324 if self._match_texts(("FIRST", "AFTER")): 7325 position = self._prev.text 7326 column_position = self.expression( 7327 exp.ColumnPosition, this=self._parse_column(), position=position 7328 ) 7329 expression.set("position", column_position) 7330 7331 return expression 7332 7333 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7334 drop = self._match(TokenType.DROP) and self._parse_drop() 7335 if drop and not isinstance(drop, exp.Command): 7336 drop.set("kind", drop.args.get("kind", "COLUMN")) 7337 return drop 7338 7339 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7340 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7341 return self.expression( 7342 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7343 ) 7344 7345 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7346 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7347 self._match_text_seq("ADD") 7348 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7349 return self.expression( 7350 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7351 ) 7352 7353 column_def = self._parse_add_column() 7354 if isinstance(column_def, exp.ColumnDef): 7355 return column_def 7356 7357 exists = self._parse_exists(not_=True) 7358 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7359 return self.expression( 7360 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7361 ) 7362 7363 return None 7364 7365 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7366 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7367 or self._match_text_seq("COLUMNS") 7368 ): 7369 schema = self._parse_schema() 7370 7371 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7372 7373 return self._parse_csv(_parse_add_alteration) 7374 7375 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7376 if self._match_texts(self.ALTER_ALTER_PARSERS): 7377 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7378 7379 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7380 # keyword after ALTER we default to parsing this statement 7381 self._match(TokenType.COLUMN) 7382 column = self._parse_field(any_token=True) 7383 7384 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7385 return self.expression(exp.AlterColumn, this=column, drop=True) 7386 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7387 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7388 if self._match(TokenType.COMMENT): 7389 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7390 if self._match_text_seq("DROP", "NOT", "NULL"): 7391 return self.expression( 7392 exp.AlterColumn, 7393 this=column, 7394 drop=True, 7395 allow_null=True, 7396 ) 7397 if self._match_text_seq("SET", "NOT", "NULL"): 7398 return self.expression( 7399 exp.AlterColumn, 7400 this=column, 7401 allow_null=False, 7402 ) 7403 7404 if self._match_text_seq("SET", "VISIBLE"): 7405 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7406 if self._match_text_seq("SET", "INVISIBLE"): 7407 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7408 7409 self._match_text_seq("SET", "DATA") 7410 self._match_text_seq("TYPE") 7411 return self.expression( 7412 exp.AlterColumn, 7413 this=column, 7414 dtype=self._parse_types(), 7415 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7416 using=self._match(TokenType.USING) and self._parse_assignment(), 7417 ) 7418 7419 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7420 if self._match_texts(("ALL", "EVEN", "AUTO")): 7421 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7422 7423 self._match_text_seq("KEY", "DISTKEY") 7424 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7425 7426 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7427 if compound: 7428 self._match_text_seq("SORTKEY") 7429 7430 if self._match(TokenType.L_PAREN, advance=False): 7431 return self.expression( 7432 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7433 ) 7434 7435 self._match_texts(("AUTO", "NONE")) 7436 return self.expression( 7437 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7438 ) 7439 7440 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7441 index = self._index - 1 7442 7443 partition_exists = self._parse_exists() 7444 if self._match(TokenType.PARTITION, advance=False): 7445 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7446 7447 self._retreat(index) 7448 return self._parse_csv(self._parse_drop_column) 7449 7450 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7451 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7452 exists = self._parse_exists() 7453 old_column = self._parse_column() 7454 to = self._match_text_seq("TO") 7455 new_column = self._parse_column() 7456 7457 if old_column is None or to is None or new_column is None: 7458 return None 7459 7460 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7461 7462 self._match_text_seq("TO") 7463 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7464 7465 def _parse_alter_table_set(self) -> exp.AlterSet: 7466 alter_set = self.expression(exp.AlterSet) 7467 7468 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7469 "TABLE", "PROPERTIES" 7470 ): 7471 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7472 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7473 alter_set.set("expressions", [self._parse_assignment()]) 7474 elif self._match_texts(("LOGGED", "UNLOGGED")): 7475 alter_set.set("option", exp.var(self._prev.text.upper())) 7476 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7477 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7478 elif self._match_text_seq("LOCATION"): 7479 alter_set.set("location", self._parse_field()) 7480 elif self._match_text_seq("ACCESS", "METHOD"): 7481 alter_set.set("access_method", self._parse_field()) 7482 elif self._match_text_seq("TABLESPACE"): 7483 alter_set.set("tablespace", self._parse_field()) 7484 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7485 alter_set.set("file_format", [self._parse_field()]) 7486 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7487 alter_set.set("file_format", self._parse_wrapped_options()) 7488 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7489 alter_set.set("copy_options", self._parse_wrapped_options()) 7490 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7491 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7492 else: 7493 if self._match_text_seq("SERDE"): 7494 alter_set.set("serde", self._parse_field()) 7495 7496 properties = self._parse_wrapped(self._parse_properties, optional=True) 7497 alter_set.set("expressions", [properties]) 7498 7499 return alter_set 7500 7501 def _parse_alter(self) -> exp.Alter | exp.Command: 7502 start = self._prev 7503 7504 alter_token = self._match_set(self.ALTERABLES) and self._prev 7505 if not alter_token: 7506 return self._parse_as_command(start) 7507 7508 exists = self._parse_exists() 7509 only = self._match_text_seq("ONLY") 7510 this = self._parse_table(schema=True) 7511 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7512 7513 if self._next: 7514 self._advance() 7515 7516 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7517 if parser: 7518 actions = ensure_list(parser(self)) 7519 not_valid = self._match_text_seq("NOT", "VALID") 7520 options = self._parse_csv(self._parse_property) 7521 7522 if not self._curr and actions: 7523 return self.expression( 7524 exp.Alter, 7525 this=this, 7526 kind=alter_token.text.upper(), 7527 exists=exists, 7528 actions=actions, 7529 only=only, 7530 options=options, 7531 cluster=cluster, 7532 not_valid=not_valid, 7533 ) 7534 7535 return self._parse_as_command(start) 7536 7537 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7538 start = self._prev 7539 # https://duckdb.org/docs/sql/statements/analyze 7540 if not self._curr: 7541 return self.expression(exp.Analyze) 7542 7543 options = [] 7544 while self._match_texts(self.ANALYZE_STYLES): 7545 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7546 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7547 else: 7548 options.append(self._prev.text.upper()) 7549 7550 this: t.Optional[exp.Expression] = None 7551 inner_expression: t.Optional[exp.Expression] = None 7552 7553 kind = self._curr and self._curr.text.upper() 7554 7555 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7556 this = self._parse_table_parts() 7557 elif self._match_text_seq("TABLES"): 7558 if self._match_set((TokenType.FROM, TokenType.IN)): 7559 kind = f"{kind} {self._prev.text.upper()}" 7560 this = self._parse_table(schema=True, is_db_reference=True) 7561 elif self._match_text_seq("DATABASE"): 7562 this = self._parse_table(schema=True, is_db_reference=True) 7563 elif self._match_text_seq("CLUSTER"): 7564 this = self._parse_table() 7565 # Try matching inner expr keywords before fallback to parse table. 7566 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7567 kind = None 7568 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7569 else: 7570 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7571 kind = None 7572 this = self._parse_table_parts() 7573 7574 partition = self._try_parse(self._parse_partition) 7575 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7576 return self._parse_as_command(start) 7577 7578 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7579 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7580 "WITH", "ASYNC", "MODE" 7581 ): 7582 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7583 else: 7584 mode = None 7585 7586 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7587 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7588 7589 properties = self._parse_properties() 7590 return self.expression( 7591 exp.Analyze, 7592 kind=kind, 7593 this=this, 7594 mode=mode, 7595 partition=partition, 7596 properties=properties, 7597 expression=inner_expression, 7598 options=options, 7599 ) 7600 7601 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7602 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7603 this = None 7604 kind = self._prev.text.upper() 7605 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7606 expressions = [] 7607 7608 if not self._match_text_seq("STATISTICS"): 7609 self.raise_error("Expecting token STATISTICS") 7610 7611 if self._match_text_seq("NOSCAN"): 7612 this = "NOSCAN" 7613 elif self._match(TokenType.FOR): 7614 if self._match_text_seq("ALL", "COLUMNS"): 7615 this = "FOR ALL COLUMNS" 7616 if self._match_texts("COLUMNS"): 7617 this = "FOR COLUMNS" 7618 expressions = self._parse_csv(self._parse_column_reference) 7619 elif self._match_text_seq("SAMPLE"): 7620 sample = self._parse_number() 7621 expressions = [ 7622 self.expression( 7623 exp.AnalyzeSample, 7624 sample=sample, 7625 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7626 ) 7627 ] 7628 7629 return self.expression( 7630 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7631 ) 7632 7633 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7634 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7635 kind = None 7636 this = None 7637 expression: t.Optional[exp.Expression] = None 7638 if self._match_text_seq("REF", "UPDATE"): 7639 kind = "REF" 7640 this = "UPDATE" 7641 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7642 this = "UPDATE SET DANGLING TO NULL" 7643 elif self._match_text_seq("STRUCTURE"): 7644 kind = "STRUCTURE" 7645 if self._match_text_seq("CASCADE", "FAST"): 7646 this = "CASCADE FAST" 7647 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7648 ("ONLINE", "OFFLINE") 7649 ): 7650 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7651 expression = self._parse_into() 7652 7653 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7654 7655 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7656 this = self._prev.text.upper() 7657 if self._match_text_seq("COLUMNS"): 7658 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7659 return None 7660 7661 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7662 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7663 if self._match_text_seq("STATISTICS"): 7664 return self.expression(exp.AnalyzeDelete, kind=kind) 7665 return None 7666 7667 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7668 if self._match_text_seq("CHAINED", "ROWS"): 7669 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7670 return None 7671 7672 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7673 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7674 this = self._prev.text.upper() 7675 expression: t.Optional[exp.Expression] = None 7676 expressions = [] 7677 update_options = None 7678 7679 if self._match_text_seq("HISTOGRAM", "ON"): 7680 expressions = self._parse_csv(self._parse_column_reference) 7681 with_expressions = [] 7682 while self._match(TokenType.WITH): 7683 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7684 if self._match_texts(("SYNC", "ASYNC")): 7685 if self._match_text_seq("MODE", advance=False): 7686 with_expressions.append(f"{self._prev.text.upper()} MODE") 7687 self._advance() 7688 else: 7689 buckets = self._parse_number() 7690 if self._match_text_seq("BUCKETS"): 7691 with_expressions.append(f"{buckets} BUCKETS") 7692 if with_expressions: 7693 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7694 7695 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7696 TokenType.UPDATE, advance=False 7697 ): 7698 update_options = self._prev.text.upper() 7699 self._advance() 7700 elif self._match_text_seq("USING", "DATA"): 7701 expression = self.expression(exp.UsingData, this=self._parse_string()) 7702 7703 return self.expression( 7704 exp.AnalyzeHistogram, 7705 this=this, 7706 expressions=expressions, 7707 expression=expression, 7708 update_options=update_options, 7709 ) 7710 7711 def _parse_merge(self) -> exp.Merge: 7712 self._match(TokenType.INTO) 7713 target = self._parse_table() 7714 7715 if target and self._match(TokenType.ALIAS, advance=False): 7716 target.set("alias", self._parse_table_alias()) 7717 7718 self._match(TokenType.USING) 7719 using = self._parse_table() 7720 7721 self._match(TokenType.ON) 7722 on = self._parse_assignment() 7723 7724 return self.expression( 7725 exp.Merge, 7726 this=target, 7727 using=using, 7728 on=on, 7729 whens=self._parse_when_matched(), 7730 returning=self._parse_returning(), 7731 ) 7732 7733 def _parse_when_matched(self) -> exp.Whens: 7734 whens = [] 7735 7736 while self._match(TokenType.WHEN): 7737 matched = not self._match(TokenType.NOT) 7738 self._match_text_seq("MATCHED") 7739 source = ( 7740 False 7741 if self._match_text_seq("BY", "TARGET") 7742 else self._match_text_seq("BY", "SOURCE") 7743 ) 7744 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7745 7746 self._match(TokenType.THEN) 7747 7748 if self._match(TokenType.INSERT): 7749 this = self._parse_star() 7750 if this: 7751 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7752 else: 7753 then = self.expression( 7754 exp.Insert, 7755 this=exp.var("ROW") 7756 if self._match_text_seq("ROW") 7757 else self._parse_value(values=False), 7758 expression=self._match_text_seq("VALUES") and self._parse_value(), 7759 ) 7760 elif self._match(TokenType.UPDATE): 7761 expressions = self._parse_star() 7762 if expressions: 7763 then = self.expression(exp.Update, expressions=expressions) 7764 else: 7765 then = self.expression( 7766 exp.Update, 7767 expressions=self._match(TokenType.SET) 7768 and self._parse_csv(self._parse_equality), 7769 ) 7770 elif self._match(TokenType.DELETE): 7771 then = self.expression(exp.Var, this=self._prev.text) 7772 else: 7773 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7774 7775 whens.append( 7776 self.expression( 7777 exp.When, 7778 matched=matched, 7779 source=source, 7780 condition=condition, 7781 then=then, 7782 ) 7783 ) 7784 return self.expression(exp.Whens, expressions=whens) 7785 7786 def _parse_show(self) -> t.Optional[exp.Expression]: 7787 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7788 if parser: 7789 return parser(self) 7790 return self._parse_as_command(self._prev) 7791 7792 def _parse_set_item_assignment( 7793 self, kind: t.Optional[str] = None 7794 ) -> t.Optional[exp.Expression]: 7795 index = self._index 7796 7797 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7798 return self._parse_set_transaction(global_=kind == "GLOBAL") 7799 7800 left = self._parse_primary() or self._parse_column() 7801 assignment_delimiter = self._match_texts(("=", "TO")) 7802 7803 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7804 self._retreat(index) 7805 return None 7806 7807 right = self._parse_statement() or self._parse_id_var() 7808 if isinstance(right, (exp.Column, exp.Identifier)): 7809 right = exp.var(right.name) 7810 7811 this = self.expression(exp.EQ, this=left, expression=right) 7812 return self.expression(exp.SetItem, this=this, kind=kind) 7813 7814 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7815 self._match_text_seq("TRANSACTION") 7816 characteristics = self._parse_csv( 7817 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7818 ) 7819 return self.expression( 7820 exp.SetItem, 7821 expressions=characteristics, 7822 kind="TRANSACTION", 7823 **{"global": global_}, # type: ignore 7824 ) 7825 7826 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7827 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7828 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7829 7830 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7831 index = self._index 7832 set_ = self.expression( 7833 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7834 ) 7835 7836 if self._curr: 7837 self._retreat(index) 7838 return self._parse_as_command(self._prev) 7839 7840 return set_ 7841 7842 def _parse_var_from_options( 7843 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7844 ) -> t.Optional[exp.Var]: 7845 start = self._curr 7846 if not start: 7847 return None 7848 7849 option = start.text.upper() 7850 continuations = options.get(option) 7851 7852 index = self._index 7853 self._advance() 7854 for keywords in continuations or []: 7855 if isinstance(keywords, str): 7856 keywords = (keywords,) 7857 7858 if self._match_text_seq(*keywords): 7859 option = f"{option} {' '.join(keywords)}" 7860 break 7861 else: 7862 if continuations or continuations is None: 7863 if raise_unmatched: 7864 self.raise_error(f"Unknown option {option}") 7865 7866 self._retreat(index) 7867 return None 7868 7869 return exp.var(option) 7870 7871 def _parse_as_command(self, start: Token) -> exp.Command: 7872 while self._curr: 7873 self._advance() 7874 text = self._find_sql(start, self._prev) 7875 size = len(start.text) 7876 self._warn_unsupported() 7877 return exp.Command(this=text[:size], expression=text[size:]) 7878 7879 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7880 settings = [] 7881 7882 self._match_l_paren() 7883 kind = self._parse_id_var() 7884 7885 if self._match(TokenType.L_PAREN): 7886 while True: 7887 key = self._parse_id_var() 7888 value = self._parse_primary() 7889 if not key and value is None: 7890 break 7891 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7892 self._match(TokenType.R_PAREN) 7893 7894 self._match_r_paren() 7895 7896 return self.expression( 7897 exp.DictProperty, 7898 this=this, 7899 kind=kind.this if kind else None, 7900 settings=settings, 7901 ) 7902 7903 def _parse_dict_range(self, this: str) -> exp.DictRange: 7904 self._match_l_paren() 7905 has_min = self._match_text_seq("MIN") 7906 if has_min: 7907 min = self._parse_var() or self._parse_primary() 7908 self._match_text_seq("MAX") 7909 max = self._parse_var() or self._parse_primary() 7910 else: 7911 max = self._parse_var() or self._parse_primary() 7912 min = exp.Literal.number(0) 7913 self._match_r_paren() 7914 return self.expression(exp.DictRange, this=this, min=min, max=max) 7915 7916 def _parse_comprehension( 7917 self, this: t.Optional[exp.Expression] 7918 ) -> t.Optional[exp.Comprehension]: 7919 index = self._index 7920 expression = self._parse_column() 7921 if not self._match(TokenType.IN): 7922 self._retreat(index - 1) 7923 return None 7924 iterator = self._parse_column() 7925 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7926 return self.expression( 7927 exp.Comprehension, 7928 this=this, 7929 expression=expression, 7930 iterator=iterator, 7931 condition=condition, 7932 ) 7933 7934 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7935 if self._match(TokenType.HEREDOC_STRING): 7936 return self.expression(exp.Heredoc, this=self._prev.text) 7937 7938 if not self._match_text_seq("$"): 7939 return None 7940 7941 tags = ["$"] 7942 tag_text = None 7943 7944 if self._is_connected(): 7945 self._advance() 7946 tags.append(self._prev.text.upper()) 7947 else: 7948 self.raise_error("No closing $ found") 7949 7950 if tags[-1] != "$": 7951 if self._is_connected() and self._match_text_seq("$"): 7952 tag_text = tags[-1] 7953 tags.append("$") 7954 else: 7955 self.raise_error("No closing $ found") 7956 7957 heredoc_start = self._curr 7958 7959 while self._curr: 7960 if self._match_text_seq(*tags, advance=False): 7961 this = self._find_sql(heredoc_start, self._prev) 7962 self._advance(len(tags)) 7963 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7964 7965 self._advance() 7966 7967 self.raise_error(f"No closing {''.join(tags)} found") 7968 return None 7969 7970 def _find_parser( 7971 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7972 ) -> t.Optional[t.Callable]: 7973 if not self._curr: 7974 return None 7975 7976 index = self._index 7977 this = [] 7978 while True: 7979 # The current token might be multiple words 7980 curr = self._curr.text.upper() 7981 key = curr.split(" ") 7982 this.append(curr) 7983 7984 self._advance() 7985 result, trie = in_trie(trie, key) 7986 if result == TrieResult.FAILED: 7987 break 7988 7989 if result == TrieResult.EXISTS: 7990 subparser = parsers[" ".join(this)] 7991 return subparser 7992 7993 self._retreat(index) 7994 return None 7995 7996 def _match(self, token_type, advance=True, expression=None): 7997 if not self._curr: 7998 return None 7999 8000 if self._curr.token_type == token_type: 8001 if advance: 8002 self._advance() 8003 self._add_comments(expression) 8004 return True 8005 8006 return None 8007 8008 def _match_set(self, types, advance=True): 8009 if not self._curr: 8010 return None 8011 8012 if self._curr.token_type in types: 8013 if advance: 8014 self._advance() 8015 return True 8016 8017 return None 8018 8019 def _match_pair(self, token_type_a, token_type_b, advance=True): 8020 if not self._curr or not self._next: 8021 return None 8022 8023 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8024 if advance: 8025 self._advance(2) 8026 return True 8027 8028 return None 8029 8030 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8031 if not self._match(TokenType.L_PAREN, expression=expression): 8032 self.raise_error("Expecting (") 8033 8034 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8035 if not self._match(TokenType.R_PAREN, expression=expression): 8036 self.raise_error("Expecting )") 8037 8038 def _match_texts(self, texts, advance=True): 8039 if ( 8040 self._curr 8041 and self._curr.token_type != TokenType.STRING 8042 and self._curr.text.upper() in texts 8043 ): 8044 if advance: 8045 self._advance() 8046 return True 8047 return None 8048 8049 def _match_text_seq(self, *texts, advance=True): 8050 index = self._index 8051 for text in texts: 8052 if ( 8053 self._curr 8054 and self._curr.token_type != TokenType.STRING 8055 and self._curr.text.upper() == text 8056 ): 8057 self._advance() 8058 else: 8059 self._retreat(index) 8060 return None 8061 8062 if not advance: 8063 self._retreat(index) 8064 8065 return True 8066 8067 def _replace_lambda( 8068 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8069 ) -> t.Optional[exp.Expression]: 8070 if not node: 8071 return node 8072 8073 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8074 8075 for column in node.find_all(exp.Column): 8076 typ = lambda_types.get(column.parts[0].name) 8077 if typ is not None: 8078 dot_or_id = column.to_dot() if column.table else column.this 8079 8080 if typ: 8081 dot_or_id = self.expression( 8082 exp.Cast, 8083 this=dot_or_id, 8084 to=typ, 8085 ) 8086 8087 parent = column.parent 8088 8089 while isinstance(parent, exp.Dot): 8090 if not isinstance(parent.parent, exp.Dot): 8091 parent.replace(dot_or_id) 8092 break 8093 parent = parent.parent 8094 else: 8095 if column is node: 8096 node = dot_or_id 8097 else: 8098 column.replace(dot_or_id) 8099 return node 8100 8101 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8102 start = self._prev 8103 8104 # Not to be confused with TRUNCATE(number, decimals) function call 8105 if self._match(TokenType.L_PAREN): 8106 self._retreat(self._index - 2) 8107 return self._parse_function() 8108 8109 # Clickhouse supports TRUNCATE DATABASE as well 8110 is_database = self._match(TokenType.DATABASE) 8111 8112 self._match(TokenType.TABLE) 8113 8114 exists = self._parse_exists(not_=False) 8115 8116 expressions = self._parse_csv( 8117 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8118 ) 8119 8120 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8121 8122 if self._match_text_seq("RESTART", "IDENTITY"): 8123 identity = "RESTART" 8124 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8125 identity = "CONTINUE" 8126 else: 8127 identity = None 8128 8129 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8130 option = self._prev.text 8131 else: 8132 option = None 8133 8134 partition = self._parse_partition() 8135 8136 # Fallback case 8137 if self._curr: 8138 return self._parse_as_command(start) 8139 8140 return self.expression( 8141 exp.TruncateTable, 8142 expressions=expressions, 8143 is_database=is_database, 8144 exists=exists, 8145 cluster=cluster, 8146 identity=identity, 8147 option=option, 8148 partition=partition, 8149 ) 8150 8151 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8152 this = self._parse_ordered(self._parse_opclass) 8153 8154 if not self._match(TokenType.WITH): 8155 return this 8156 8157 op = self._parse_var(any_token=True) 8158 8159 return self.expression(exp.WithOperator, this=this, op=op) 8160 8161 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8162 self._match(TokenType.EQ) 8163 self._match(TokenType.L_PAREN) 8164 8165 opts: t.List[t.Optional[exp.Expression]] = [] 8166 option: exp.Expression | None 8167 while self._curr and not self._match(TokenType.R_PAREN): 8168 if self._match_text_seq("FORMAT_NAME", "="): 8169 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8170 option = self._parse_format_name() 8171 else: 8172 option = self._parse_property() 8173 8174 if option is None: 8175 self.raise_error("Unable to parse option") 8176 break 8177 8178 opts.append(option) 8179 8180 return opts 8181 8182 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8183 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8184 8185 options = [] 8186 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8187 option = self._parse_var(any_token=True) 8188 prev = self._prev.text.upper() 8189 8190 # Different dialects might separate options and values by white space, "=" and "AS" 8191 self._match(TokenType.EQ) 8192 self._match(TokenType.ALIAS) 8193 8194 param = self.expression(exp.CopyParameter, this=option) 8195 8196 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8197 TokenType.L_PAREN, advance=False 8198 ): 8199 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8200 param.set("expressions", self._parse_wrapped_options()) 8201 elif prev == "FILE_FORMAT": 8202 # T-SQL's external file format case 8203 param.set("expression", self._parse_field()) 8204 else: 8205 param.set("expression", self._parse_unquoted_field()) 8206 8207 options.append(param) 8208 self._match(sep) 8209 8210 return options 8211 8212 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8213 expr = self.expression(exp.Credentials) 8214 8215 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8216 expr.set("storage", self._parse_field()) 8217 if self._match_text_seq("CREDENTIALS"): 8218 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8219 creds = ( 8220 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8221 ) 8222 expr.set("credentials", creds) 8223 if self._match_text_seq("ENCRYPTION"): 8224 expr.set("encryption", self._parse_wrapped_options()) 8225 if self._match_text_seq("IAM_ROLE"): 8226 expr.set("iam_role", self._parse_field()) 8227 if self._match_text_seq("REGION"): 8228 expr.set("region", self._parse_field()) 8229 8230 return expr 8231 8232 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8233 return self._parse_field() 8234 8235 def _parse_copy(self) -> exp.Copy | exp.Command: 8236 start = self._prev 8237 8238 self._match(TokenType.INTO) 8239 8240 this = ( 8241 self._parse_select(nested=True, parse_subquery_alias=False) 8242 if self._match(TokenType.L_PAREN, advance=False) 8243 else self._parse_table(schema=True) 8244 ) 8245 8246 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8247 8248 files = self._parse_csv(self._parse_file_location) 8249 credentials = self._parse_credentials() 8250 8251 self._match_text_seq("WITH") 8252 8253 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8254 8255 # Fallback case 8256 if self._curr: 8257 return self._parse_as_command(start) 8258 8259 return self.expression( 8260 exp.Copy, 8261 this=this, 8262 kind=kind, 8263 credentials=credentials, 8264 files=files, 8265 params=params, 8266 ) 8267 8268 def _parse_normalize(self) -> exp.Normalize: 8269 return self.expression( 8270 exp.Normalize, 8271 this=self._parse_bitwise(), 8272 form=self._match(TokenType.COMMA) and self._parse_var(), 8273 ) 8274 8275 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8276 args = self._parse_csv(lambda: self._parse_lambda()) 8277 8278 this = seq_get(args, 0) 8279 decimals = seq_get(args, 1) 8280 8281 return expr_type( 8282 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8283 ) 8284 8285 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8286 star_token = self._prev 8287 8288 if self._match_text_seq("COLUMNS", "(", advance=False): 8289 this = self._parse_function() 8290 if isinstance(this, exp.Columns): 8291 this.set("unpack", True) 8292 return this 8293 8294 return self.expression( 8295 exp.Star, 8296 **{ # type: ignore 8297 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8298 "replace": self._parse_star_op("REPLACE"), 8299 "rename": self._parse_star_op("RENAME"), 8300 }, 8301 ).update_positions(star_token) 8302 8303 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8304 privilege_parts = [] 8305 8306 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8307 # (end of privilege list) or L_PAREN (start of column list) are met 8308 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8309 privilege_parts.append(self._curr.text.upper()) 8310 self._advance() 8311 8312 this = exp.var(" ".join(privilege_parts)) 8313 expressions = ( 8314 self._parse_wrapped_csv(self._parse_column) 8315 if self._match(TokenType.L_PAREN, advance=False) 8316 else None 8317 ) 8318 8319 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8320 8321 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8322 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8323 principal = self._parse_id_var() 8324 8325 if not principal: 8326 return None 8327 8328 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8329 8330 def _parse_grant(self) -> exp.Grant | exp.Command: 8331 start = self._prev 8332 8333 privileges = self._parse_csv(self._parse_grant_privilege) 8334 8335 self._match(TokenType.ON) 8336 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8337 8338 # Attempt to parse the securable e.g. MySQL allows names 8339 # such as "foo.*", "*.*" which are not easily parseable yet 8340 securable = self._try_parse(self._parse_table_parts) 8341 8342 if not securable or not self._match_text_seq("TO"): 8343 return self._parse_as_command(start) 8344 8345 principals = self._parse_csv(self._parse_grant_principal) 8346 8347 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8348 8349 if self._curr: 8350 return self._parse_as_command(start) 8351 8352 return self.expression( 8353 exp.Grant, 8354 privileges=privileges, 8355 kind=kind, 8356 securable=securable, 8357 principals=principals, 8358 grant_option=grant_option, 8359 ) 8360 8361 def _parse_overlay(self) -> exp.Overlay: 8362 return self.expression( 8363 exp.Overlay, 8364 **{ # type: ignore 8365 "this": self._parse_bitwise(), 8366 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8367 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8368 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8369 }, 8370 ) 8371 8372 def _parse_format_name(self) -> exp.Property: 8373 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8374 # for FILE_FORMAT = <format_name> 8375 return self.expression( 8376 exp.Property, 8377 this=exp.var("FORMAT_NAME"), 8378 value=self._parse_string() or self._parse_table_parts(), 8379 ) 8380 8381 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8382 args: t.List[exp.Expression] = [] 8383 8384 if self._match(TokenType.DISTINCT): 8385 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8386 self._match(TokenType.COMMA) 8387 8388 args.extend(self._parse_csv(self._parse_assignment)) 8389 8390 return self.expression( 8391 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8392 ) 8393 8394 def _identifier_expression( 8395 self, token: t.Optional[Token] = None, **kwargs: t.Any 8396 ) -> exp.Identifier: 8397 token = token or self._prev 8398 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8399 expression.update_positions(token) 8400 return expression 8401 8402 def _build_pipe_cte( 8403 self, 8404 query: exp.Query, 8405 expressions: t.List[exp.Expression], 8406 alias_cte: t.Optional[exp.TableAlias] = None, 8407 ) -> exp.Select: 8408 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8409 if alias_cte: 8410 new_cte = alias_cte 8411 else: 8412 self._pipe_cte_counter += 1 8413 new_cte = f"__tmp{self._pipe_cte_counter}" 8414 8415 with_ = query.args.get("with") 8416 ctes = with_.pop() if with_ else None 8417 8418 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8419 if ctes: 8420 new_select.set("with", ctes) 8421 8422 return new_select.with_(new_cte, as_=query, copy=False) 8423 8424 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8425 select = self._parse_select(consume_pipe=False) 8426 if not select: 8427 return query 8428 8429 return self._build_pipe_cte( 8430 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8431 ) 8432 8433 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8434 limit = self._parse_limit() 8435 offset = self._parse_offset() 8436 if limit: 8437 curr_limit = query.args.get("limit", limit) 8438 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8439 query.limit(limit, copy=False) 8440 if offset: 8441 curr_offset = query.args.get("offset") 8442 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8443 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8444 8445 return query 8446 8447 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8448 this = self._parse_assignment() 8449 if self._match_text_seq("GROUP", "AND", advance=False): 8450 return this 8451 8452 this = self._parse_alias(this) 8453 8454 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8455 return self._parse_ordered(lambda: this) 8456 8457 return this 8458 8459 def _parse_pipe_syntax_aggregate_group_order_by( 8460 self, query: exp.Select, group_by_exists: bool = True 8461 ) -> exp.Select: 8462 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8463 aggregates_or_groups, orders = [], [] 8464 for element in expr: 8465 if isinstance(element, exp.Ordered): 8466 this = element.this 8467 if isinstance(this, exp.Alias): 8468 element.set("this", this.args["alias"]) 8469 orders.append(element) 8470 else: 8471 this = element 8472 aggregates_or_groups.append(this) 8473 8474 if group_by_exists: 8475 query.select(*aggregates_or_groups, copy=False).group_by( 8476 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8477 copy=False, 8478 ) 8479 else: 8480 query.select(*aggregates_or_groups, append=False, copy=False) 8481 8482 if orders: 8483 return query.order_by(*orders, append=False, copy=False) 8484 8485 return query 8486 8487 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8488 self._match_text_seq("AGGREGATE") 8489 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8490 8491 if self._match(TokenType.GROUP_BY) or ( 8492 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8493 ): 8494 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8495 8496 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8497 8498 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8499 first_setop = self.parse_set_operation(this=query) 8500 if not first_setop: 8501 return None 8502 8503 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8504 expr = self._parse_paren() 8505 return expr.assert_is(exp.Subquery).unnest() if expr else None 8506 8507 first_setop.this.pop() 8508 8509 setops = [ 8510 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8511 *self._parse_csv(_parse_and_unwrap_query), 8512 ] 8513 8514 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8515 with_ = query.args.get("with") 8516 ctes = with_.pop() if with_ else None 8517 8518 if isinstance(first_setop, exp.Union): 8519 query = query.union(*setops, copy=False, **first_setop.args) 8520 elif isinstance(first_setop, exp.Except): 8521 query = query.except_(*setops, copy=False, **first_setop.args) 8522 else: 8523 query = query.intersect(*setops, copy=False, **first_setop.args) 8524 8525 query.set("with", ctes) 8526 8527 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8528 8529 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8530 join = self._parse_join() 8531 if not join: 8532 return None 8533 8534 if isinstance(query, exp.Select): 8535 return query.join(join, copy=False) 8536 8537 return query 8538 8539 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8540 pivots = self._parse_pivots() 8541 if not pivots: 8542 return query 8543 8544 from_ = query.args.get("from") 8545 if from_: 8546 from_.this.set("pivots", pivots) 8547 8548 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8549 8550 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8551 self._match_text_seq("EXTEND") 8552 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8553 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8554 8555 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8556 sample = self._parse_table_sample() 8557 8558 with_ = query.args.get("with") 8559 if with_: 8560 with_.expressions[-1].this.set("sample", sample) 8561 else: 8562 query.set("sample", sample) 8563 8564 return query 8565 8566 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8567 if isinstance(query, exp.Subquery): 8568 query = exp.select("*").from_(query, copy=False) 8569 8570 if not query.args.get("from"): 8571 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8572 8573 while self._match(TokenType.PIPE_GT): 8574 start = self._curr 8575 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8576 if not parser: 8577 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8578 # keywords, making it tricky to disambiguate them without lookahead. The approach 8579 # here is to try and parse a set operation and if that fails, then try to parse a 8580 # join operator. If that fails as well, then the operator is not supported. 8581 parsed_query = self._parse_pipe_syntax_set_operator(query) 8582 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8583 if not parsed_query: 8584 self._retreat(start) 8585 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8586 break 8587 query = parsed_query 8588 else: 8589 query = parser(self, query) 8590 8591 return query
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEQUENCE, 456 TokenType.SINK, 457 TokenType.SOURCE, 458 TokenType.STAGE, 459 TokenType.STORAGE_INTEGRATION, 460 TokenType.STREAMLIT, 461 TokenType.TABLE, 462 TokenType.TAG, 463 TokenType.VIEW, 464 TokenType.WAREHOUSE, 465 } 466 467 CREATABLES = { 468 TokenType.COLUMN, 469 TokenType.CONSTRAINT, 470 TokenType.FOREIGN_KEY, 471 TokenType.FUNCTION, 472 TokenType.INDEX, 473 TokenType.PROCEDURE, 474 *DB_CREATABLES, 475 } 476 477 ALTERABLES = { 478 TokenType.INDEX, 479 TokenType.TABLE, 480 TokenType.VIEW, 481 } 482 483 # Tokens that can represent identifiers 484 ID_VAR_TOKENS = { 485 TokenType.ALL, 486 TokenType.ATTACH, 487 TokenType.VAR, 488 TokenType.ANTI, 489 TokenType.APPLY, 490 TokenType.ASC, 491 TokenType.ASOF, 492 TokenType.AUTO_INCREMENT, 493 TokenType.BEGIN, 494 TokenType.BPCHAR, 495 TokenType.CACHE, 496 TokenType.CASE, 497 TokenType.COLLATE, 498 TokenType.COMMAND, 499 TokenType.COMMENT, 500 TokenType.COMMIT, 501 TokenType.CONSTRAINT, 502 TokenType.COPY, 503 TokenType.CUBE, 504 TokenType.CURRENT_SCHEMA, 505 TokenType.DEFAULT, 506 TokenType.DELETE, 507 TokenType.DESC, 508 TokenType.DESCRIBE, 509 TokenType.DETACH, 510 TokenType.DICTIONARY, 511 TokenType.DIV, 512 TokenType.END, 513 TokenType.EXECUTE, 514 TokenType.EXPORT, 515 TokenType.ESCAPE, 516 TokenType.FALSE, 517 TokenType.FIRST, 518 TokenType.FILTER, 519 TokenType.FINAL, 520 TokenType.FORMAT, 521 TokenType.FULL, 522 TokenType.GET, 523 TokenType.IDENTIFIER, 524 TokenType.IS, 525 TokenType.ISNULL, 526 TokenType.INTERVAL, 527 TokenType.KEEP, 528 TokenType.KILL, 529 TokenType.LEFT, 530 TokenType.LIMIT, 531 TokenType.LOAD, 532 TokenType.MERGE, 533 TokenType.NATURAL, 534 TokenType.NEXT, 535 TokenType.OFFSET, 536 TokenType.OPERATOR, 537 TokenType.ORDINALITY, 538 TokenType.OVERLAPS, 539 TokenType.OVERWRITE, 540 TokenType.PARTITION, 541 TokenType.PERCENT, 542 TokenType.PIVOT, 543 TokenType.PRAGMA, 544 TokenType.PUT, 545 TokenType.RANGE, 546 TokenType.RECURSIVE, 547 TokenType.REFERENCES, 548 TokenType.REFRESH, 549 TokenType.RENAME, 550 TokenType.REPLACE, 551 TokenType.RIGHT, 552 TokenType.ROLLUP, 553 TokenType.ROW, 554 TokenType.ROWS, 555 TokenType.SEMI, 556 TokenType.SET, 557 TokenType.SETTINGS, 558 TokenType.SHOW, 559 TokenType.TEMPORARY, 560 TokenType.TOP, 561 TokenType.TRUE, 562 TokenType.TRUNCATE, 563 TokenType.UNIQUE, 564 TokenType.UNNEST, 565 TokenType.UNPIVOT, 566 TokenType.UPDATE, 567 TokenType.USE, 568 TokenType.VOLATILE, 569 TokenType.WINDOW, 570 *CREATABLES, 571 *SUBQUERY_PREDICATES, 572 *TYPE_TOKENS, 573 *NO_PAREN_FUNCTIONS, 574 } 575 ID_VAR_TOKENS.remove(TokenType.UNION) 576 577 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 578 TokenType.ANTI, 579 TokenType.APPLY, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.expression( 763 exp.Cast if self.STRICT_CAST else exp.TryCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 EXPRESSION_PARSERS = { 797 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 798 exp.Column: lambda self: self._parse_column(), 799 exp.Condition: lambda self: self._parse_assignment(), 800 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 801 exp.Expression: lambda self: self._parse_expression(), 802 exp.From: lambda self: self._parse_from(joins=True), 803 exp.Group: lambda self: self._parse_group(), 804 exp.Having: lambda self: self._parse_having(), 805 exp.Hint: lambda self: self._parse_hint_body(), 806 exp.Identifier: lambda self: self._parse_id_var(), 807 exp.Join: lambda self: self._parse_join(), 808 exp.Lambda: lambda self: self._parse_lambda(), 809 exp.Lateral: lambda self: self._parse_lateral(), 810 exp.Limit: lambda self: self._parse_limit(), 811 exp.Offset: lambda self: self._parse_offset(), 812 exp.Order: lambda self: self._parse_order(), 813 exp.Ordered: lambda self: self._parse_ordered(), 814 exp.Properties: lambda self: self._parse_properties(), 815 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 816 exp.Qualify: lambda self: self._parse_qualify(), 817 exp.Returning: lambda self: self._parse_returning(), 818 exp.Select: lambda self: self._parse_select(), 819 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 820 exp.Table: lambda self: self._parse_table_parts(), 821 exp.TableAlias: lambda self: self._parse_table_alias(), 822 exp.Tuple: lambda self: self._parse_value(values=False), 823 exp.Whens: lambda self: self._parse_when_matched(), 824 exp.Where: lambda self: self._parse_where(), 825 exp.Window: lambda self: self._parse_named_window(), 826 exp.With: lambda self: self._parse_with(), 827 "JOIN_TYPE": lambda self: self._parse_join_parts(), 828 } 829 830 STATEMENT_PARSERS = { 831 TokenType.ALTER: lambda self: self._parse_alter(), 832 TokenType.ANALYZE: lambda self: self._parse_analyze(), 833 TokenType.BEGIN: lambda self: self._parse_transaction(), 834 TokenType.CACHE: lambda self: self._parse_cache(), 835 TokenType.COMMENT: lambda self: self._parse_comment(), 836 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 837 TokenType.COPY: lambda self: self._parse_copy(), 838 TokenType.CREATE: lambda self: self._parse_create(), 839 TokenType.DELETE: lambda self: self._parse_delete(), 840 TokenType.DESC: lambda self: self._parse_describe(), 841 TokenType.DESCRIBE: lambda self: self._parse_describe(), 842 TokenType.DROP: lambda self: self._parse_drop(), 843 TokenType.GRANT: lambda self: self._parse_grant(), 844 TokenType.INSERT: lambda self: self._parse_insert(), 845 TokenType.KILL: lambda self: self._parse_kill(), 846 TokenType.LOAD: lambda self: self._parse_load(), 847 TokenType.MERGE: lambda self: self._parse_merge(), 848 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 849 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 850 TokenType.REFRESH: lambda self: self._parse_refresh(), 851 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 852 TokenType.SET: lambda self: self._parse_set(), 853 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 854 TokenType.UNCACHE: lambda self: self._parse_uncache(), 855 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 856 TokenType.UPDATE: lambda self: self._parse_update(), 857 TokenType.USE: lambda self: self._parse_use(), 858 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 859 } 860 861 UNARY_PARSERS = { 862 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 863 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 864 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 865 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 866 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 867 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 868 } 869 870 STRING_PARSERS = { 871 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 872 exp.RawString, this=token.text 873 ), 874 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 875 exp.National, this=token.text 876 ), 877 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 878 TokenType.STRING: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=True 880 ), 881 TokenType.UNICODE_STRING: lambda self, token: self.expression( 882 exp.UnicodeString, 883 this=token.text, 884 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 885 ), 886 } 887 888 NUMERIC_PARSERS = { 889 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 890 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 891 TokenType.HEX_STRING: lambda self, token: self.expression( 892 exp.HexString, 893 this=token.text, 894 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 895 ), 896 TokenType.NUMBER: lambda self, token: self.expression( 897 exp.Literal, this=token.text, is_string=False 898 ), 899 } 900 901 PRIMARY_PARSERS = { 902 **STRING_PARSERS, 903 **NUMERIC_PARSERS, 904 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 905 TokenType.NULL: lambda self, _: self.expression(exp.Null), 906 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 907 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 908 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 909 TokenType.STAR: lambda self, _: self._parse_star_ops(), 910 } 911 912 PLACEHOLDER_PARSERS = { 913 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 914 TokenType.PARAMETER: lambda self: self._parse_parameter(), 915 TokenType.COLON: lambda self: ( 916 self.expression(exp.Placeholder, this=self._prev.text) 917 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 918 else None 919 ), 920 } 921 922 RANGE_PARSERS = { 923 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 924 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 925 TokenType.GLOB: binary_range_parser(exp.Glob), 926 TokenType.ILIKE: binary_range_parser(exp.ILike), 927 TokenType.IN: lambda self, this: self._parse_in(this), 928 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 929 TokenType.IS: lambda self, this: self._parse_is(this), 930 TokenType.LIKE: binary_range_parser(exp.Like), 931 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 932 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 933 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 934 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 935 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 936 } 937 938 PIPE_SYNTAX_TRANSFORM_PARSERS = { 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 "AS": lambda self, query: self._build_pipe_cte( 941 query, [exp.Star()], self._parse_table_alias() 942 ), 943 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 944 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 945 "ORDER BY": lambda self, query: query.order_by( 946 self._parse_order(), append=False, copy=False 947 ), 948 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 950 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 951 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 953 } 954 955 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 956 "ALLOWED_VALUES": lambda self: self.expression( 957 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 958 ), 959 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 960 "AUTO": lambda self: self._parse_auto_property(), 961 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 962 "BACKUP": lambda self: self.expression( 963 exp.BackupProperty, this=self._parse_var(any_token=True) 964 ), 965 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 966 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 968 "CHECKSUM": lambda self: self._parse_checksum(), 969 "CLUSTER BY": lambda self: self._parse_cluster(), 970 "CLUSTERED": lambda self: self._parse_clustered_by(), 971 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 972 exp.CollateProperty, **kwargs 973 ), 974 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 975 "CONTAINS": lambda self: self._parse_contains_property(), 976 "COPY": lambda self: self._parse_copy_property(), 977 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 978 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 979 "DEFINER": lambda self: self._parse_definer(), 980 "DETERMINISTIC": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 984 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 985 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 986 "DISTKEY": lambda self: self._parse_distkey(), 987 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 988 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 989 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 990 "ENVIRONMENT": lambda self: self.expression( 991 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 992 ), 993 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 994 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 995 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 996 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "FREESPACE": lambda self: self._parse_freespace(), 998 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 999 "HEAP": lambda self: self.expression(exp.HeapProperty), 1000 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1001 "IMMUTABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1003 ), 1004 "INHERITS": lambda self: self.expression( 1005 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1006 ), 1007 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1008 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1009 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1010 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1011 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1014 "LOCK": lambda self: self._parse_locking(), 1015 "LOCKING": lambda self: self._parse_locking(), 1016 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1017 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1018 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1019 "MODIFIES": lambda self: self._parse_modifies_property(), 1020 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1021 "NO": lambda self: self._parse_no_property(), 1022 "ON": lambda self: self._parse_on_property(), 1023 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1024 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1025 "PARTITION": lambda self: self._parse_partitioned_of(), 1026 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1028 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1029 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1030 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1031 "READS": lambda self: self._parse_reads_property(), 1032 "REMOTE": lambda self: self._parse_remote_with_connection(), 1033 "RETURNS": lambda self: self._parse_returns(), 1034 "STRICT": lambda self: self.expression(exp.StrictProperty), 1035 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1036 "ROW": lambda self: self._parse_row(), 1037 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1038 "SAMPLE": lambda self: self.expression( 1039 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1040 ), 1041 "SECURE": lambda self: self.expression(exp.SecureProperty), 1042 "SECURITY": lambda self: self._parse_security(), 1043 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1044 "SETTINGS": lambda self: self._parse_settings_property(), 1045 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1046 "SORTKEY": lambda self: self._parse_sortkey(), 1047 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1048 "STABLE": lambda self: self.expression( 1049 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1050 ), 1051 "STORED": lambda self: self._parse_stored(), 1052 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1053 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1054 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1055 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1056 "TO": lambda self: self._parse_to_table(), 1057 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1058 "TRANSFORM": lambda self: self.expression( 1059 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1060 ), 1061 "TTL": lambda self: self._parse_ttl(), 1062 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1063 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1064 "VOLATILE": lambda self: self._parse_volatile_property(), 1065 "WITH": lambda self: self._parse_with_property(), 1066 } 1067 1068 CONSTRAINT_PARSERS = { 1069 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1070 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1071 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1072 "CHARACTER SET": lambda self: self.expression( 1073 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "CHECK": lambda self: self.expression( 1076 exp.CheckColumnConstraint, 1077 this=self._parse_wrapped(self._parse_assignment), 1078 enforced=self._match_text_seq("ENFORCED"), 1079 ), 1080 "COLLATE": lambda self: self.expression( 1081 exp.CollateColumnConstraint, 1082 this=self._parse_identifier() or self._parse_column(), 1083 ), 1084 "COMMENT": lambda self: self.expression( 1085 exp.CommentColumnConstraint, this=self._parse_string() 1086 ), 1087 "COMPRESS": lambda self: self._parse_compress(), 1088 "CLUSTERED": lambda self: self.expression( 1089 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "NONCLUSTERED": lambda self: self.expression( 1092 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "DEFAULT": lambda self: self.expression( 1095 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1096 ), 1097 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1098 "EPHEMERAL": lambda self: self.expression( 1099 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "EXCLUDE": lambda self: self.expression( 1102 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1103 ), 1104 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1105 "FORMAT": lambda self: self.expression( 1106 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1107 ), 1108 "GENERATED": lambda self: self._parse_generated_as_identity(), 1109 "IDENTITY": lambda self: self._parse_auto_increment(), 1110 "INLINE": lambda self: self._parse_inline(), 1111 "LIKE": lambda self: self._parse_create_like(), 1112 "NOT": lambda self: self._parse_not_constraint(), 1113 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1114 "ON": lambda self: ( 1115 self._match(TokenType.UPDATE) 1116 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1117 ) 1118 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1119 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1120 "PERIOD": lambda self: self._parse_period_for_system_time(), 1121 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1122 "REFERENCES": lambda self: self._parse_references(match=False), 1123 "TITLE": lambda self: self.expression( 1124 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1125 ), 1126 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1127 "UNIQUE": lambda self: self._parse_unique(), 1128 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1129 "WATERMARK": lambda self: self.expression( 1130 exp.WatermarkColumnConstraint, 1131 this=self._match(TokenType.FOR) and self._parse_column(), 1132 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1133 ), 1134 "WITH": lambda self: self.expression( 1135 exp.Properties, expressions=self._parse_wrapped_properties() 1136 ), 1137 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1139 } 1140 1141 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1142 klass = ( 1143 exp.PartitionedByBucket 1144 if self._prev.text.upper() == "BUCKET" 1145 else exp.PartitionByTruncate 1146 ) 1147 1148 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1149 this, expression = seq_get(args, 0), seq_get(args, 1) 1150 1151 if isinstance(this, exp.Literal): 1152 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1153 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1154 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1155 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1156 # 1157 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1158 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1159 this, expression = expression, this 1160 1161 return self.expression(klass, this=this, expression=expression) 1162 1163 ALTER_PARSERS = { 1164 "ADD": lambda self: self._parse_alter_table_add(), 1165 "AS": lambda self: self._parse_select(), 1166 "ALTER": lambda self: self._parse_alter_table_alter(), 1167 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1168 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1169 "DROP": lambda self: self._parse_alter_table_drop(), 1170 "RENAME": lambda self: self._parse_alter_table_rename(), 1171 "SET": lambda self: self._parse_alter_table_set(), 1172 "SWAP": lambda self: self.expression( 1173 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1174 ), 1175 } 1176 1177 ALTER_ALTER_PARSERS = { 1178 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1179 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1180 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1181 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1182 } 1183 1184 SCHEMA_UNNAMED_CONSTRAINTS = { 1185 "CHECK", 1186 "EXCLUDE", 1187 "FOREIGN KEY", 1188 "LIKE", 1189 "PERIOD", 1190 "PRIMARY KEY", 1191 "UNIQUE", 1192 "WATERMARK", 1193 "BUCKET", 1194 "TRUNCATE", 1195 } 1196 1197 NO_PAREN_FUNCTION_PARSERS = { 1198 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1199 "CASE": lambda self: self._parse_case(), 1200 "CONNECT_BY_ROOT": lambda self: self.expression( 1201 exp.ConnectByRoot, this=self._parse_column() 1202 ), 1203 "IF": lambda self: self._parse_if(), 1204 } 1205 1206 INVALID_FUNC_NAME_TOKENS = { 1207 TokenType.IDENTIFIER, 1208 TokenType.STRING, 1209 } 1210 1211 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1212 1213 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1214 1215 FUNCTION_PARSERS = { 1216 **{ 1217 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1218 }, 1219 **{ 1220 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1221 }, 1222 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1223 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1224 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1225 "DECODE": lambda self: self._parse_decode(), 1226 "EXTRACT": lambda self: self._parse_extract(), 1227 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1228 "GAP_FILL": lambda self: self._parse_gap_fill(), 1229 "JSON_OBJECT": lambda self: self._parse_json_object(), 1230 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1231 "JSON_TABLE": lambda self: self._parse_json_table(), 1232 "MATCH": lambda self: self._parse_match_against(), 1233 "NORMALIZE": lambda self: self._parse_normalize(), 1234 "OPENJSON": lambda self: self._parse_open_json(), 1235 "OVERLAY": lambda self: self._parse_overlay(), 1236 "POSITION": lambda self: self._parse_position(), 1237 "PREDICT": lambda self: self._parse_predict(), 1238 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1239 "STRING_AGG": lambda self: self._parse_string_agg(), 1240 "SUBSTRING": lambda self: self._parse_substring(), 1241 "TRIM": lambda self: self._parse_trim(), 1242 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1243 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1244 "XMLELEMENT": lambda self: self.expression( 1245 exp.XMLElement, 1246 this=self._match_text_seq("NAME") and self._parse_id_var(), 1247 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1248 ), 1249 "XMLTABLE": lambda self: self._parse_xml_table(), 1250 } 1251 1252 QUERY_MODIFIER_PARSERS = { 1253 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1254 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1255 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1256 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1257 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1258 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1259 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1260 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1261 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1262 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1263 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1264 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1265 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1266 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1268 TokenType.CLUSTER_BY: lambda self: ( 1269 "cluster", 1270 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1271 ), 1272 TokenType.DISTRIBUTE_BY: lambda self: ( 1273 "distribute", 1274 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1275 ), 1276 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1277 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1278 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1279 } 1280 1281 SET_PARSERS = { 1282 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1283 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1284 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1285 "TRANSACTION": lambda self: self._parse_set_transaction(), 1286 } 1287 1288 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1289 1290 TYPE_LITERAL_PARSERS = { 1291 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1292 } 1293 1294 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1295 1296 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1297 1298 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1299 1300 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1301 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1302 "ISOLATION": ( 1303 ("LEVEL", "REPEATABLE", "READ"), 1304 ("LEVEL", "READ", "COMMITTED"), 1305 ("LEVEL", "READ", "UNCOMITTED"), 1306 ("LEVEL", "SERIALIZABLE"), 1307 ), 1308 "READ": ("WRITE", "ONLY"), 1309 } 1310 1311 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1312 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1313 ) 1314 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1315 1316 CREATE_SEQUENCE: OPTIONS_TYPE = { 1317 "SCALE": ("EXTEND", "NOEXTEND"), 1318 "SHARD": ("EXTEND", "NOEXTEND"), 1319 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1320 **dict.fromkeys( 1321 ( 1322 "SESSION", 1323 "GLOBAL", 1324 "KEEP", 1325 "NOKEEP", 1326 "ORDER", 1327 "NOORDER", 1328 "NOCACHE", 1329 "CYCLE", 1330 "NOCYCLE", 1331 "NOMINVALUE", 1332 "NOMAXVALUE", 1333 "NOSCALE", 1334 "NOSHARD", 1335 ), 1336 tuple(), 1337 ), 1338 } 1339 1340 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1341 1342 USABLES: OPTIONS_TYPE = dict.fromkeys( 1343 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1344 ) 1345 1346 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1347 1348 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1349 "TYPE": ("EVOLUTION",), 1350 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1351 } 1352 1353 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1354 1355 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1356 1357 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1358 "NOT": ("ENFORCED",), 1359 "MATCH": ( 1360 "FULL", 1361 "PARTIAL", 1362 "SIMPLE", 1363 ), 1364 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1365 "USING": ( 1366 "BTREE", 1367 "HASH", 1368 ), 1369 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1370 } 1371 1372 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1373 "NO": ("OTHERS",), 1374 "CURRENT": ("ROW",), 1375 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1376 } 1377 1378 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1379 1380 CLONE_KEYWORDS = {"CLONE", "COPY"} 1381 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1382 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1383 1384 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1385 1386 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1387 1388 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1389 1390 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1391 1392 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1393 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1394 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1395 1396 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1397 1398 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1399 1400 ADD_CONSTRAINT_TOKENS = { 1401 TokenType.CONSTRAINT, 1402 TokenType.FOREIGN_KEY, 1403 TokenType.INDEX, 1404 TokenType.KEY, 1405 TokenType.PRIMARY_KEY, 1406 TokenType.UNIQUE, 1407 } 1408 1409 DISTINCT_TOKENS = {TokenType.DISTINCT} 1410 1411 NULL_TOKENS = {TokenType.NULL} 1412 1413 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1414 1415 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1416 1417 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1418 1419 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1420 1421 ODBC_DATETIME_LITERALS = { 1422 "d": exp.Date, 1423 "t": exp.Time, 1424 "ts": exp.Timestamp, 1425 } 1426 1427 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1428 1429 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1430 1431 # The style options for the DESCRIBE statement 1432 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1433 1434 # The style options for the ANALYZE statement 1435 ANALYZE_STYLES = { 1436 "BUFFER_USAGE_LIMIT", 1437 "FULL", 1438 "LOCAL", 1439 "NO_WRITE_TO_BINLOG", 1440 "SAMPLE", 1441 "SKIP_LOCKED", 1442 "VERBOSE", 1443 } 1444 1445 ANALYZE_EXPRESSION_PARSERS = { 1446 "ALL": lambda self: self._parse_analyze_columns(), 1447 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1448 "DELETE": lambda self: self._parse_analyze_delete(), 1449 "DROP": lambda self: self._parse_analyze_histogram(), 1450 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1451 "LIST": lambda self: self._parse_analyze_list(), 1452 "PREDICATE": lambda self: self._parse_analyze_columns(), 1453 "UPDATE": lambda self: self._parse_analyze_histogram(), 1454 "VALIDATE": lambda self: self._parse_analyze_validate(), 1455 } 1456 1457 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1458 1459 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1460 1461 OPERATION_MODIFIERS: t.Set[str] = set() 1462 1463 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1464 1465 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1466 1467 STRICT_CAST = True 1468 1469 PREFIXED_PIVOT_COLUMNS = False 1470 IDENTIFY_PIVOT_STRINGS = False 1471 1472 LOG_DEFAULTS_TO_LN = False 1473 1474 # Whether the table sample clause expects CSV syntax 1475 TABLESAMPLE_CSV = False 1476 1477 # The default method used for table sampling 1478 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1479 1480 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1481 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1482 1483 # Whether the TRIM function expects the characters to trim as its first argument 1484 TRIM_PATTERN_FIRST = False 1485 1486 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1487 STRING_ALIASES = False 1488 1489 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1490 MODIFIERS_ATTACHED_TO_SET_OP = True 1491 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1492 1493 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1494 NO_PAREN_IF_COMMANDS = True 1495 1496 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1497 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1498 1499 # Whether the `:` operator is used to extract a value from a VARIANT column 1500 COLON_IS_VARIANT_EXTRACT = False 1501 1502 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1503 # If this is True and '(' is not found, the keyword will be treated as an identifier 1504 VALUES_FOLLOWED_BY_PAREN = True 1505 1506 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1507 SUPPORTS_IMPLICIT_UNNEST = False 1508 1509 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1510 INTERVAL_SPANS = True 1511 1512 # Whether a PARTITION clause can follow a table reference 1513 SUPPORTS_PARTITION_SELECTION = False 1514 1515 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1516 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1517 1518 # Whether the 'AS' keyword is optional in the CTE definition syntax 1519 OPTIONAL_ALIAS_TOKEN_CTE = True 1520 1521 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1522 ALTER_RENAME_REQUIRES_COLUMN = True 1523 1524 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1525 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1526 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1527 # as BigQuery, where all joins have the same precedence. 1528 JOINS_HAVE_EQUAL_PRECEDENCE = False 1529 1530 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1531 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1532 1533 __slots__ = ( 1534 "error_level", 1535 "error_message_context", 1536 "max_errors", 1537 "dialect", 1538 "sql", 1539 "errors", 1540 "_tokens", 1541 "_index", 1542 "_curr", 1543 "_next", 1544 "_prev", 1545 "_prev_comments", 1546 "_pipe_cte_counter", 1547 ) 1548 1549 # Autofilled 1550 SHOW_TRIE: t.Dict = {} 1551 SET_TRIE: t.Dict = {} 1552 1553 def __init__( 1554 self, 1555 error_level: t.Optional[ErrorLevel] = None, 1556 error_message_context: int = 100, 1557 max_errors: int = 3, 1558 dialect: DialectType = None, 1559 ): 1560 from sqlglot.dialects import Dialect 1561 1562 self.error_level = error_level or ErrorLevel.IMMEDIATE 1563 self.error_message_context = error_message_context 1564 self.max_errors = max_errors 1565 self.dialect = Dialect.get_or_raise(dialect) 1566 self.reset() 1567 1568 def reset(self): 1569 self.sql = "" 1570 self.errors = [] 1571 self._tokens = [] 1572 self._index = 0 1573 self._curr = None 1574 self._next = None 1575 self._prev = None 1576 self._prev_comments = None 1577 self._pipe_cte_counter = 0 1578 1579 def parse( 1580 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 """ 1583 Parses a list of tokens and returns a list of syntax trees, one tree 1584 per parsed SQL statement. 1585 1586 Args: 1587 raw_tokens: The list of tokens. 1588 sql: The original SQL string, used to produce helpful debug messages. 1589 1590 Returns: 1591 The list of the produced syntax trees. 1592 """ 1593 return self._parse( 1594 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1595 ) 1596 1597 def parse_into( 1598 self, 1599 expression_types: exp.IntoType, 1600 raw_tokens: t.List[Token], 1601 sql: t.Optional[str] = None, 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens into a given Expression type. If a collection of Expression 1605 types is given instead, this method will try to parse the token list into each one 1606 of them, stopping at the first for which the parsing succeeds. 1607 1608 Args: 1609 expression_types: The expression type(s) to try and parse the token list into. 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The target Expression. 1615 """ 1616 errors = [] 1617 for expression_type in ensure_list(expression_types): 1618 parser = self.EXPRESSION_PARSERS.get(expression_type) 1619 if not parser: 1620 raise TypeError(f"No parser registered for {expression_type}") 1621 1622 try: 1623 return self._parse(parser, raw_tokens, sql) 1624 except ParseError as e: 1625 e.errors[0]["into_expression"] = expression_type 1626 errors.append(e) 1627 1628 raise ParseError( 1629 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1630 errors=merge_errors(errors), 1631 ) from errors[-1] 1632 1633 def _parse( 1634 self, 1635 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 self.reset() 1640 self.sql = sql or "" 1641 1642 total = len(raw_tokens) 1643 chunks: t.List[t.List[Token]] = [[]] 1644 1645 for i, token in enumerate(raw_tokens): 1646 if token.token_type == TokenType.SEMICOLON: 1647 if token.comments: 1648 chunks.append([token]) 1649 1650 if i < total - 1: 1651 chunks.append([]) 1652 else: 1653 chunks[-1].append(token) 1654 1655 expressions = [] 1656 1657 for tokens in chunks: 1658 self._index = -1 1659 self._tokens = tokens 1660 self._advance() 1661 1662 expressions.append(parse_method(self)) 1663 1664 if self._index < len(self._tokens): 1665 self.raise_error("Invalid expression / Unexpected token") 1666 1667 self.check_errors() 1668 1669 return expressions 1670 1671 def check_errors(self) -> None: 1672 """Logs or raises any found errors, depending on the chosen error level setting.""" 1673 if self.error_level == ErrorLevel.WARN: 1674 for error in self.errors: 1675 logger.error(str(error)) 1676 elif self.error_level == ErrorLevel.RAISE and self.errors: 1677 raise ParseError( 1678 concat_messages(self.errors, self.max_errors), 1679 errors=merge_errors(self.errors), 1680 ) 1681 1682 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1683 """ 1684 Appends an error in the list of recorded errors or raises it, depending on the chosen 1685 error level setting. 1686 """ 1687 token = token or self._curr or self._prev or Token.string("") 1688 start = token.start 1689 end = token.end + 1 1690 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1691 highlight = self.sql[start:end] 1692 end_context = self.sql[end : end + self.error_message_context] 1693 1694 error = ParseError.new( 1695 f"{message}. Line {token.line}, Col: {token.col}.\n" 1696 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1697 description=message, 1698 line=token.line, 1699 col=token.col, 1700 start_context=start_context, 1701 highlight=highlight, 1702 end_context=end_context, 1703 ) 1704 1705 if self.error_level == ErrorLevel.IMMEDIATE: 1706 raise error 1707 1708 self.errors.append(error) 1709 1710 def expression( 1711 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1712 ) -> E: 1713 """ 1714 Creates a new, validated Expression. 1715 1716 Args: 1717 exp_class: The expression class to instantiate. 1718 comments: An optional list of comments to attach to the expression. 1719 kwargs: The arguments to set for the expression along with their respective values. 1720 1721 Returns: 1722 The target expression. 1723 """ 1724 instance = exp_class(**kwargs) 1725 instance.add_comments(comments) if comments else self._add_comments(instance) 1726 return self.validate_expression(instance) 1727 1728 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1729 if expression and self._prev_comments: 1730 expression.add_comments(self._prev_comments) 1731 self._prev_comments = None 1732 1733 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1734 """ 1735 Validates an Expression, making sure that all its mandatory arguments are set. 1736 1737 Args: 1738 expression: The expression to validate. 1739 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1740 1741 Returns: 1742 The validated expression. 1743 """ 1744 if self.error_level != ErrorLevel.IGNORE: 1745 for error_message in expression.error_messages(args): 1746 self.raise_error(error_message) 1747 1748 return expression 1749 1750 def _find_sql(self, start: Token, end: Token) -> str: 1751 return self.sql[start.start : end.end + 1] 1752 1753 def _is_connected(self) -> bool: 1754 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1755 1756 def _advance(self, times: int = 1) -> None: 1757 self._index += times 1758 self._curr = seq_get(self._tokens, self._index) 1759 self._next = seq_get(self._tokens, self._index + 1) 1760 1761 if self._index > 0: 1762 self._prev = self._tokens[self._index - 1] 1763 self._prev_comments = self._prev.comments 1764 else: 1765 self._prev = None 1766 self._prev_comments = None 1767 1768 def _retreat(self, index: int) -> None: 1769 if index != self._index: 1770 self._advance(index - self._index) 1771 1772 def _warn_unsupported(self) -> None: 1773 if len(self._tokens) <= 1: 1774 return 1775 1776 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1777 # interested in emitting a warning for the one being currently processed. 1778 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1779 1780 logger.warning( 1781 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1782 ) 1783 1784 def _parse_command(self) -> exp.Command: 1785 self._warn_unsupported() 1786 return self.expression( 1787 exp.Command, 1788 comments=self._prev_comments, 1789 this=self._prev.text.upper(), 1790 expression=self._parse_string(), 1791 ) 1792 1793 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1794 """ 1795 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1796 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1797 solve this by setting & resetting the parser state accordingly 1798 """ 1799 index = self._index 1800 error_level = self.error_level 1801 1802 self.error_level = ErrorLevel.IMMEDIATE 1803 try: 1804 this = parse_method() 1805 except ParseError: 1806 this = None 1807 finally: 1808 if not this or retreat: 1809 self._retreat(index) 1810 self.error_level = error_level 1811 1812 return this 1813 1814 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1815 start = self._prev 1816 exists = self._parse_exists() if allow_exists else None 1817 1818 self._match(TokenType.ON) 1819 1820 materialized = self._match_text_seq("MATERIALIZED") 1821 kind = self._match_set(self.CREATABLES) and self._prev 1822 if not kind: 1823 return self._parse_as_command(start) 1824 1825 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1826 this = self._parse_user_defined_function(kind=kind.token_type) 1827 elif kind.token_type == TokenType.TABLE: 1828 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1829 elif kind.token_type == TokenType.COLUMN: 1830 this = self._parse_column() 1831 else: 1832 this = self._parse_id_var() 1833 1834 self._match(TokenType.IS) 1835 1836 return self.expression( 1837 exp.Comment, 1838 this=this, 1839 kind=kind.text, 1840 expression=self._parse_string(), 1841 exists=exists, 1842 materialized=materialized, 1843 ) 1844 1845 def _parse_to_table( 1846 self, 1847 ) -> exp.ToTableProperty: 1848 table = self._parse_table_parts(schema=True) 1849 return self.expression(exp.ToTableProperty, this=table) 1850 1851 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1852 def _parse_ttl(self) -> exp.Expression: 1853 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1854 this = self._parse_bitwise() 1855 1856 if self._match_text_seq("DELETE"): 1857 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1858 if self._match_text_seq("RECOMPRESS"): 1859 return self.expression( 1860 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1861 ) 1862 if self._match_text_seq("TO", "DISK"): 1863 return self.expression( 1864 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1865 ) 1866 if self._match_text_seq("TO", "VOLUME"): 1867 return self.expression( 1868 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1869 ) 1870 1871 return this 1872 1873 expressions = self._parse_csv(_parse_ttl_action) 1874 where = self._parse_where() 1875 group = self._parse_group() 1876 1877 aggregates = None 1878 if group and self._match(TokenType.SET): 1879 aggregates = self._parse_csv(self._parse_set_item) 1880 1881 return self.expression( 1882 exp.MergeTreeTTL, 1883 expressions=expressions, 1884 where=where, 1885 group=group, 1886 aggregates=aggregates, 1887 ) 1888 1889 def _parse_statement(self) -> t.Optional[exp.Expression]: 1890 if self._curr is None: 1891 return None 1892 1893 if self._match_set(self.STATEMENT_PARSERS): 1894 comments = self._prev_comments 1895 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1896 stmt.add_comments(comments, prepend=True) 1897 return stmt 1898 1899 if self._match_set(self.dialect.tokenizer.COMMANDS): 1900 return self._parse_command() 1901 1902 expression = self._parse_expression() 1903 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1904 return self._parse_query_modifiers(expression) 1905 1906 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1907 start = self._prev 1908 temporary = self._match(TokenType.TEMPORARY) 1909 materialized = self._match_text_seq("MATERIALIZED") 1910 1911 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1912 if not kind: 1913 return self._parse_as_command(start) 1914 1915 concurrently = self._match_text_seq("CONCURRENTLY") 1916 if_exists = exists or self._parse_exists() 1917 1918 if kind == "COLUMN": 1919 this = self._parse_column() 1920 else: 1921 this = self._parse_table_parts( 1922 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1923 ) 1924 1925 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1926 1927 if self._match(TokenType.L_PAREN, advance=False): 1928 expressions = self._parse_wrapped_csv(self._parse_types) 1929 else: 1930 expressions = None 1931 1932 return self.expression( 1933 exp.Drop, 1934 exists=if_exists, 1935 this=this, 1936 expressions=expressions, 1937 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1938 temporary=temporary, 1939 materialized=materialized, 1940 cascade=self._match_text_seq("CASCADE"), 1941 constraints=self._match_text_seq("CONSTRAINTS"), 1942 purge=self._match_text_seq("PURGE"), 1943 cluster=cluster, 1944 concurrently=concurrently, 1945 ) 1946 1947 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1948 return ( 1949 self._match_text_seq("IF") 1950 and (not not_ or self._match(TokenType.NOT)) 1951 and self._match(TokenType.EXISTS) 1952 ) 1953 1954 def _parse_create(self) -> exp.Create | exp.Command: 1955 # Note: this can't be None because we've matched a statement parser 1956 start = self._prev 1957 1958 replace = ( 1959 start.token_type == TokenType.REPLACE 1960 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1961 or self._match_pair(TokenType.OR, TokenType.ALTER) 1962 ) 1963 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1964 1965 unique = self._match(TokenType.UNIQUE) 1966 1967 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1968 clustered = True 1969 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1970 "COLUMNSTORE" 1971 ): 1972 clustered = False 1973 else: 1974 clustered = None 1975 1976 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1977 self._advance() 1978 1979 properties = None 1980 create_token = self._match_set(self.CREATABLES) and self._prev 1981 1982 if not create_token: 1983 # exp.Properties.Location.POST_CREATE 1984 properties = self._parse_properties() 1985 create_token = self._match_set(self.CREATABLES) and self._prev 1986 1987 if not properties or not create_token: 1988 return self._parse_as_command(start) 1989 1990 concurrently = self._match_text_seq("CONCURRENTLY") 1991 exists = self._parse_exists(not_=True) 1992 this = None 1993 expression: t.Optional[exp.Expression] = None 1994 indexes = None 1995 no_schema_binding = None 1996 begin = None 1997 end = None 1998 clone = None 1999 2000 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2001 nonlocal properties 2002 if properties and temp_props: 2003 properties.expressions.extend(temp_props.expressions) 2004 elif temp_props: 2005 properties = temp_props 2006 2007 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2008 this = self._parse_user_defined_function(kind=create_token.token_type) 2009 2010 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2011 extend_props(self._parse_properties()) 2012 2013 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2014 extend_props(self._parse_properties()) 2015 2016 if not expression: 2017 if self._match(TokenType.COMMAND): 2018 expression = self._parse_as_command(self._prev) 2019 else: 2020 begin = self._match(TokenType.BEGIN) 2021 return_ = self._match_text_seq("RETURN") 2022 2023 if self._match(TokenType.STRING, advance=False): 2024 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2025 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2026 expression = self._parse_string() 2027 extend_props(self._parse_properties()) 2028 else: 2029 expression = self._parse_user_defined_function_expression() 2030 2031 end = self._match_text_seq("END") 2032 2033 if return_: 2034 expression = self.expression(exp.Return, this=expression) 2035 elif create_token.token_type == TokenType.INDEX: 2036 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2037 if not self._match(TokenType.ON): 2038 index = self._parse_id_var() 2039 anonymous = False 2040 else: 2041 index = None 2042 anonymous = True 2043 2044 this = self._parse_index(index=index, anonymous=anonymous) 2045 elif create_token.token_type in self.DB_CREATABLES: 2046 table_parts = self._parse_table_parts( 2047 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2048 ) 2049 2050 # exp.Properties.Location.POST_NAME 2051 self._match(TokenType.COMMA) 2052 extend_props(self._parse_properties(before=True)) 2053 2054 this = self._parse_schema(this=table_parts) 2055 2056 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2057 extend_props(self._parse_properties()) 2058 2059 has_alias = self._match(TokenType.ALIAS) 2060 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2061 # exp.Properties.Location.POST_ALIAS 2062 extend_props(self._parse_properties()) 2063 2064 if create_token.token_type == TokenType.SEQUENCE: 2065 expression = self._parse_types() 2066 extend_props(self._parse_properties()) 2067 else: 2068 expression = self._parse_ddl_select() 2069 2070 # Some dialects also support using a table as an alias instead of a SELECT. 2071 # Here we fallback to this as an alternative. 2072 if not expression and has_alias: 2073 expression = self._try_parse(self._parse_table_parts) 2074 2075 if create_token.token_type == TokenType.TABLE: 2076 # exp.Properties.Location.POST_EXPRESSION 2077 extend_props(self._parse_properties()) 2078 2079 indexes = [] 2080 while True: 2081 index = self._parse_index() 2082 2083 # exp.Properties.Location.POST_INDEX 2084 extend_props(self._parse_properties()) 2085 if not index: 2086 break 2087 else: 2088 self._match(TokenType.COMMA) 2089 indexes.append(index) 2090 elif create_token.token_type == TokenType.VIEW: 2091 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2092 no_schema_binding = True 2093 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2094 extend_props(self._parse_properties()) 2095 2096 shallow = self._match_text_seq("SHALLOW") 2097 2098 if self._match_texts(self.CLONE_KEYWORDS): 2099 copy = self._prev.text.lower() == "copy" 2100 clone = self.expression( 2101 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2102 ) 2103 2104 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2105 return self._parse_as_command(start) 2106 2107 create_kind_text = create_token.text.upper() 2108 return self.expression( 2109 exp.Create, 2110 this=this, 2111 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2112 replace=replace, 2113 refresh=refresh, 2114 unique=unique, 2115 expression=expression, 2116 exists=exists, 2117 properties=properties, 2118 indexes=indexes, 2119 no_schema_binding=no_schema_binding, 2120 begin=begin, 2121 end=end, 2122 clone=clone, 2123 concurrently=concurrently, 2124 clustered=clustered, 2125 ) 2126 2127 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2128 seq = exp.SequenceProperties() 2129 2130 options = [] 2131 index = self._index 2132 2133 while self._curr: 2134 self._match(TokenType.COMMA) 2135 if self._match_text_seq("INCREMENT"): 2136 self._match_text_seq("BY") 2137 self._match_text_seq("=") 2138 seq.set("increment", self._parse_term()) 2139 elif self._match_text_seq("MINVALUE"): 2140 seq.set("minvalue", self._parse_term()) 2141 elif self._match_text_seq("MAXVALUE"): 2142 seq.set("maxvalue", self._parse_term()) 2143 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2144 self._match_text_seq("=") 2145 seq.set("start", self._parse_term()) 2146 elif self._match_text_seq("CACHE"): 2147 # T-SQL allows empty CACHE which is initialized dynamically 2148 seq.set("cache", self._parse_number() or True) 2149 elif self._match_text_seq("OWNED", "BY"): 2150 # "OWNED BY NONE" is the default 2151 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2152 else: 2153 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2154 if opt: 2155 options.append(opt) 2156 else: 2157 break 2158 2159 seq.set("options", options if options else None) 2160 return None if self._index == index else seq 2161 2162 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2163 # only used for teradata currently 2164 self._match(TokenType.COMMA) 2165 2166 kwargs = { 2167 "no": self._match_text_seq("NO"), 2168 "dual": self._match_text_seq("DUAL"), 2169 "before": self._match_text_seq("BEFORE"), 2170 "default": self._match_text_seq("DEFAULT"), 2171 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2172 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2173 "after": self._match_text_seq("AFTER"), 2174 "minimum": self._match_texts(("MIN", "MINIMUM")), 2175 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2176 } 2177 2178 if self._match_texts(self.PROPERTY_PARSERS): 2179 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2180 try: 2181 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2182 except TypeError: 2183 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2184 2185 return None 2186 2187 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2188 return self._parse_wrapped_csv(self._parse_property) 2189 2190 def _parse_property(self) -> t.Optional[exp.Expression]: 2191 if self._match_texts(self.PROPERTY_PARSERS): 2192 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2193 2194 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2195 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2196 2197 if self._match_text_seq("COMPOUND", "SORTKEY"): 2198 return self._parse_sortkey(compound=True) 2199 2200 if self._match_text_seq("SQL", "SECURITY"): 2201 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2202 2203 index = self._index 2204 key = self._parse_column() 2205 2206 if not self._match(TokenType.EQ): 2207 self._retreat(index) 2208 return self._parse_sequence_properties() 2209 2210 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2211 if isinstance(key, exp.Column): 2212 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2213 2214 value = self._parse_bitwise() or self._parse_var(any_token=True) 2215 2216 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2217 if isinstance(value, exp.Column): 2218 value = exp.var(value.name) 2219 2220 return self.expression(exp.Property, this=key, value=value) 2221 2222 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2223 if self._match_text_seq("BY"): 2224 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2225 2226 self._match(TokenType.ALIAS) 2227 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2228 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2229 2230 return self.expression( 2231 exp.FileFormatProperty, 2232 this=( 2233 self.expression( 2234 exp.InputOutputFormat, 2235 input_format=input_format, 2236 output_format=output_format, 2237 ) 2238 if input_format or output_format 2239 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2240 ), 2241 ) 2242 2243 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2244 field = self._parse_field() 2245 if isinstance(field, exp.Identifier) and not field.quoted: 2246 field = exp.var(field) 2247 2248 return field 2249 2250 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2251 self._match(TokenType.EQ) 2252 self._match(TokenType.ALIAS) 2253 2254 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2255 2256 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2257 properties = [] 2258 while True: 2259 if before: 2260 prop = self._parse_property_before() 2261 else: 2262 prop = self._parse_property() 2263 if not prop: 2264 break 2265 for p in ensure_list(prop): 2266 properties.append(p) 2267 2268 if properties: 2269 return self.expression(exp.Properties, expressions=properties) 2270 2271 return None 2272 2273 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2274 return self.expression( 2275 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2276 ) 2277 2278 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2279 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2280 security_specifier = self._prev.text.upper() 2281 return self.expression(exp.SecurityProperty, this=security_specifier) 2282 return None 2283 2284 def _parse_settings_property(self) -> exp.SettingsProperty: 2285 return self.expression( 2286 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2287 ) 2288 2289 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2290 if self._index >= 2: 2291 pre_volatile_token = self._tokens[self._index - 2] 2292 else: 2293 pre_volatile_token = None 2294 2295 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2296 return exp.VolatileProperty() 2297 2298 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2299 2300 def _parse_retention_period(self) -> exp.Var: 2301 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2302 number = self._parse_number() 2303 number_str = f"{number} " if number else "" 2304 unit = self._parse_var(any_token=True) 2305 return exp.var(f"{number_str}{unit}") 2306 2307 def _parse_system_versioning_property( 2308 self, with_: bool = False 2309 ) -> exp.WithSystemVersioningProperty: 2310 self._match(TokenType.EQ) 2311 prop = self.expression( 2312 exp.WithSystemVersioningProperty, 2313 **{ # type: ignore 2314 "on": True, 2315 "with": with_, 2316 }, 2317 ) 2318 2319 if self._match_text_seq("OFF"): 2320 prop.set("on", False) 2321 return prop 2322 2323 self._match(TokenType.ON) 2324 if self._match(TokenType.L_PAREN): 2325 while self._curr and not self._match(TokenType.R_PAREN): 2326 if self._match_text_seq("HISTORY_TABLE", "="): 2327 prop.set("this", self._parse_table_parts()) 2328 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2329 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2330 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2331 prop.set("retention_period", self._parse_retention_period()) 2332 2333 self._match(TokenType.COMMA) 2334 2335 return prop 2336 2337 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2338 self._match(TokenType.EQ) 2339 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2340 prop = self.expression(exp.DataDeletionProperty, on=on) 2341 2342 if self._match(TokenType.L_PAREN): 2343 while self._curr and not self._match(TokenType.R_PAREN): 2344 if self._match_text_seq("FILTER_COLUMN", "="): 2345 prop.set("filter_column", self._parse_column()) 2346 elif self._match_text_seq("RETENTION_PERIOD", "="): 2347 prop.set("retention_period", self._parse_retention_period()) 2348 2349 self._match(TokenType.COMMA) 2350 2351 return prop 2352 2353 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2354 kind = "HASH" 2355 expressions: t.Optional[t.List[exp.Expression]] = None 2356 if self._match_text_seq("BY", "HASH"): 2357 expressions = self._parse_wrapped_csv(self._parse_id_var) 2358 elif self._match_text_seq("BY", "RANDOM"): 2359 kind = "RANDOM" 2360 2361 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2362 buckets: t.Optional[exp.Expression] = None 2363 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2364 buckets = self._parse_number() 2365 2366 return self.expression( 2367 exp.DistributedByProperty, 2368 expressions=expressions, 2369 kind=kind, 2370 buckets=buckets, 2371 order=self._parse_order(), 2372 ) 2373 2374 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2375 self._match_text_seq("KEY") 2376 expressions = self._parse_wrapped_id_vars() 2377 return self.expression(expr_type, expressions=expressions) 2378 2379 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2380 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2381 prop = self._parse_system_versioning_property(with_=True) 2382 self._match_r_paren() 2383 return prop 2384 2385 if self._match(TokenType.L_PAREN, advance=False): 2386 return self._parse_wrapped_properties() 2387 2388 if self._match_text_seq("JOURNAL"): 2389 return self._parse_withjournaltable() 2390 2391 if self._match_texts(self.VIEW_ATTRIBUTES): 2392 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2393 2394 if self._match_text_seq("DATA"): 2395 return self._parse_withdata(no=False) 2396 elif self._match_text_seq("NO", "DATA"): 2397 return self._parse_withdata(no=True) 2398 2399 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2400 return self._parse_serde_properties(with_=True) 2401 2402 if self._match(TokenType.SCHEMA): 2403 return self.expression( 2404 exp.WithSchemaBindingProperty, 2405 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2406 ) 2407 2408 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2409 return self.expression( 2410 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2411 ) 2412 2413 if not self._next: 2414 return None 2415 2416 return self._parse_withisolatedloading() 2417 2418 def _parse_procedure_option(self) -> exp.Expression | None: 2419 if self._match_text_seq("EXECUTE", "AS"): 2420 return self.expression( 2421 exp.ExecuteAsProperty, 2422 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2423 or self._parse_string(), 2424 ) 2425 2426 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2427 2428 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2429 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2430 self._match(TokenType.EQ) 2431 2432 user = self._parse_id_var() 2433 self._match(TokenType.PARAMETER) 2434 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2435 2436 if not user or not host: 2437 return None 2438 2439 return exp.DefinerProperty(this=f"{user}@{host}") 2440 2441 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2442 self._match(TokenType.TABLE) 2443 self._match(TokenType.EQ) 2444 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2445 2446 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2447 return self.expression(exp.LogProperty, no=no) 2448 2449 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2450 return self.expression(exp.JournalProperty, **kwargs) 2451 2452 def _parse_checksum(self) -> exp.ChecksumProperty: 2453 self._match(TokenType.EQ) 2454 2455 on = None 2456 if self._match(TokenType.ON): 2457 on = True 2458 elif self._match_text_seq("OFF"): 2459 on = False 2460 2461 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2462 2463 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2464 return self.expression( 2465 exp.Cluster, 2466 expressions=( 2467 self._parse_wrapped_csv(self._parse_ordered) 2468 if wrapped 2469 else self._parse_csv(self._parse_ordered) 2470 ), 2471 ) 2472 2473 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2474 self._match_text_seq("BY") 2475 2476 self._match_l_paren() 2477 expressions = self._parse_csv(self._parse_column) 2478 self._match_r_paren() 2479 2480 if self._match_text_seq("SORTED", "BY"): 2481 self._match_l_paren() 2482 sorted_by = self._parse_csv(self._parse_ordered) 2483 self._match_r_paren() 2484 else: 2485 sorted_by = None 2486 2487 self._match(TokenType.INTO) 2488 buckets = self._parse_number() 2489 self._match_text_seq("BUCKETS") 2490 2491 return self.expression( 2492 exp.ClusteredByProperty, 2493 expressions=expressions, 2494 sorted_by=sorted_by, 2495 buckets=buckets, 2496 ) 2497 2498 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2499 if not self._match_text_seq("GRANTS"): 2500 self._retreat(self._index - 1) 2501 return None 2502 2503 return self.expression(exp.CopyGrantsProperty) 2504 2505 def _parse_freespace(self) -> exp.FreespaceProperty: 2506 self._match(TokenType.EQ) 2507 return self.expression( 2508 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2509 ) 2510 2511 def _parse_mergeblockratio( 2512 self, no: bool = False, default: bool = False 2513 ) -> exp.MergeBlockRatioProperty: 2514 if self._match(TokenType.EQ): 2515 return self.expression( 2516 exp.MergeBlockRatioProperty, 2517 this=self._parse_number(), 2518 percent=self._match(TokenType.PERCENT), 2519 ) 2520 2521 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2522 2523 def _parse_datablocksize( 2524 self, 2525 default: t.Optional[bool] = None, 2526 minimum: t.Optional[bool] = None, 2527 maximum: t.Optional[bool] = None, 2528 ) -> exp.DataBlocksizeProperty: 2529 self._match(TokenType.EQ) 2530 size = self._parse_number() 2531 2532 units = None 2533 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2534 units = self._prev.text 2535 2536 return self.expression( 2537 exp.DataBlocksizeProperty, 2538 size=size, 2539 units=units, 2540 default=default, 2541 minimum=minimum, 2542 maximum=maximum, 2543 ) 2544 2545 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2546 self._match(TokenType.EQ) 2547 always = self._match_text_seq("ALWAYS") 2548 manual = self._match_text_seq("MANUAL") 2549 never = self._match_text_seq("NEVER") 2550 default = self._match_text_seq("DEFAULT") 2551 2552 autotemp = None 2553 if self._match_text_seq("AUTOTEMP"): 2554 autotemp = self._parse_schema() 2555 2556 return self.expression( 2557 exp.BlockCompressionProperty, 2558 always=always, 2559 manual=manual, 2560 never=never, 2561 default=default, 2562 autotemp=autotemp, 2563 ) 2564 2565 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2566 index = self._index 2567 no = self._match_text_seq("NO") 2568 concurrent = self._match_text_seq("CONCURRENT") 2569 2570 if not self._match_text_seq("ISOLATED", "LOADING"): 2571 self._retreat(index) 2572 return None 2573 2574 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2575 return self.expression( 2576 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2577 ) 2578 2579 def _parse_locking(self) -> exp.LockingProperty: 2580 if self._match(TokenType.TABLE): 2581 kind = "TABLE" 2582 elif self._match(TokenType.VIEW): 2583 kind = "VIEW" 2584 elif self._match(TokenType.ROW): 2585 kind = "ROW" 2586 elif self._match_text_seq("DATABASE"): 2587 kind = "DATABASE" 2588 else: 2589 kind = None 2590 2591 if kind in ("DATABASE", "TABLE", "VIEW"): 2592 this = self._parse_table_parts() 2593 else: 2594 this = None 2595 2596 if self._match(TokenType.FOR): 2597 for_or_in = "FOR" 2598 elif self._match(TokenType.IN): 2599 for_or_in = "IN" 2600 else: 2601 for_or_in = None 2602 2603 if self._match_text_seq("ACCESS"): 2604 lock_type = "ACCESS" 2605 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2606 lock_type = "EXCLUSIVE" 2607 elif self._match_text_seq("SHARE"): 2608 lock_type = "SHARE" 2609 elif self._match_text_seq("READ"): 2610 lock_type = "READ" 2611 elif self._match_text_seq("WRITE"): 2612 lock_type = "WRITE" 2613 elif self._match_text_seq("CHECKSUM"): 2614 lock_type = "CHECKSUM" 2615 else: 2616 lock_type = None 2617 2618 override = self._match_text_seq("OVERRIDE") 2619 2620 return self.expression( 2621 exp.LockingProperty, 2622 this=this, 2623 kind=kind, 2624 for_or_in=for_or_in, 2625 lock_type=lock_type, 2626 override=override, 2627 ) 2628 2629 def _parse_partition_by(self) -> t.List[exp.Expression]: 2630 if self._match(TokenType.PARTITION_BY): 2631 return self._parse_csv(self._parse_assignment) 2632 return [] 2633 2634 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2635 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2636 if self._match_text_seq("MINVALUE"): 2637 return exp.var("MINVALUE") 2638 if self._match_text_seq("MAXVALUE"): 2639 return exp.var("MAXVALUE") 2640 return self._parse_bitwise() 2641 2642 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2643 expression = None 2644 from_expressions = None 2645 to_expressions = None 2646 2647 if self._match(TokenType.IN): 2648 this = self._parse_wrapped_csv(self._parse_bitwise) 2649 elif self._match(TokenType.FROM): 2650 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2651 self._match_text_seq("TO") 2652 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2653 elif self._match_text_seq("WITH", "(", "MODULUS"): 2654 this = self._parse_number() 2655 self._match_text_seq(",", "REMAINDER") 2656 expression = self._parse_number() 2657 self._match_r_paren() 2658 else: 2659 self.raise_error("Failed to parse partition bound spec.") 2660 2661 return self.expression( 2662 exp.PartitionBoundSpec, 2663 this=this, 2664 expression=expression, 2665 from_expressions=from_expressions, 2666 to_expressions=to_expressions, 2667 ) 2668 2669 # https://www.postgresql.org/docs/current/sql-createtable.html 2670 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2671 if not self._match_text_seq("OF"): 2672 self._retreat(self._index - 1) 2673 return None 2674 2675 this = self._parse_table(schema=True) 2676 2677 if self._match(TokenType.DEFAULT): 2678 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2679 elif self._match_text_seq("FOR", "VALUES"): 2680 expression = self._parse_partition_bound_spec() 2681 else: 2682 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2683 2684 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2685 2686 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2687 self._match(TokenType.EQ) 2688 return self.expression( 2689 exp.PartitionedByProperty, 2690 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2691 ) 2692 2693 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2694 if self._match_text_seq("AND", "STATISTICS"): 2695 statistics = True 2696 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2697 statistics = False 2698 else: 2699 statistics = None 2700 2701 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2702 2703 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2704 if self._match_text_seq("SQL"): 2705 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2706 return None 2707 2708 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2709 if self._match_text_seq("SQL", "DATA"): 2710 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2711 return None 2712 2713 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2714 if self._match_text_seq("PRIMARY", "INDEX"): 2715 return exp.NoPrimaryIndexProperty() 2716 if self._match_text_seq("SQL"): 2717 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2718 return None 2719 2720 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2721 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2722 return exp.OnCommitProperty() 2723 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2724 return exp.OnCommitProperty(delete=True) 2725 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2726 2727 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2728 if self._match_text_seq("SQL", "DATA"): 2729 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2730 return None 2731 2732 def _parse_distkey(self) -> exp.DistKeyProperty: 2733 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2734 2735 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2736 table = self._parse_table(schema=True) 2737 2738 options = [] 2739 while self._match_texts(("INCLUDING", "EXCLUDING")): 2740 this = self._prev.text.upper() 2741 2742 id_var = self._parse_id_var() 2743 if not id_var: 2744 return None 2745 2746 options.append( 2747 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2748 ) 2749 2750 return self.expression(exp.LikeProperty, this=table, expressions=options) 2751 2752 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2753 return self.expression( 2754 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2755 ) 2756 2757 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2758 self._match(TokenType.EQ) 2759 return self.expression( 2760 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2761 ) 2762 2763 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2764 self._match_text_seq("WITH", "CONNECTION") 2765 return self.expression( 2766 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2767 ) 2768 2769 def _parse_returns(self) -> exp.ReturnsProperty: 2770 value: t.Optional[exp.Expression] 2771 null = None 2772 is_table = self._match(TokenType.TABLE) 2773 2774 if is_table: 2775 if self._match(TokenType.LT): 2776 value = self.expression( 2777 exp.Schema, 2778 this="TABLE", 2779 expressions=self._parse_csv(self._parse_struct_types), 2780 ) 2781 if not self._match(TokenType.GT): 2782 self.raise_error("Expecting >") 2783 else: 2784 value = self._parse_schema(exp.var("TABLE")) 2785 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2786 null = True 2787 value = None 2788 else: 2789 value = self._parse_types() 2790 2791 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2792 2793 def _parse_describe(self) -> exp.Describe: 2794 kind = self._match_set(self.CREATABLES) and self._prev.text 2795 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2796 if self._match(TokenType.DOT): 2797 style = None 2798 self._retreat(self._index - 2) 2799 2800 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2801 2802 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2803 this = self._parse_statement() 2804 else: 2805 this = self._parse_table(schema=True) 2806 2807 properties = self._parse_properties() 2808 expressions = properties.expressions if properties else None 2809 partition = self._parse_partition() 2810 return self.expression( 2811 exp.Describe, 2812 this=this, 2813 style=style, 2814 kind=kind, 2815 expressions=expressions, 2816 partition=partition, 2817 format=format, 2818 ) 2819 2820 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2821 kind = self._prev.text.upper() 2822 expressions = [] 2823 2824 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2825 if self._match(TokenType.WHEN): 2826 expression = self._parse_disjunction() 2827 self._match(TokenType.THEN) 2828 else: 2829 expression = None 2830 2831 else_ = self._match(TokenType.ELSE) 2832 2833 if not self._match(TokenType.INTO): 2834 return None 2835 2836 return self.expression( 2837 exp.ConditionalInsert, 2838 this=self.expression( 2839 exp.Insert, 2840 this=self._parse_table(schema=True), 2841 expression=self._parse_derived_table_values(), 2842 ), 2843 expression=expression, 2844 else_=else_, 2845 ) 2846 2847 expression = parse_conditional_insert() 2848 while expression is not None: 2849 expressions.append(expression) 2850 expression = parse_conditional_insert() 2851 2852 return self.expression( 2853 exp.MultitableInserts, 2854 kind=kind, 2855 comments=comments, 2856 expressions=expressions, 2857 source=self._parse_table(), 2858 ) 2859 2860 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2861 comments = [] 2862 hint = self._parse_hint() 2863 overwrite = self._match(TokenType.OVERWRITE) 2864 ignore = self._match(TokenType.IGNORE) 2865 local = self._match_text_seq("LOCAL") 2866 alternative = None 2867 is_function = None 2868 2869 if self._match_text_seq("DIRECTORY"): 2870 this: t.Optional[exp.Expression] = self.expression( 2871 exp.Directory, 2872 this=self._parse_var_or_string(), 2873 local=local, 2874 row_format=self._parse_row_format(match_row=True), 2875 ) 2876 else: 2877 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2878 comments += ensure_list(self._prev_comments) 2879 return self._parse_multitable_inserts(comments) 2880 2881 if self._match(TokenType.OR): 2882 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2883 2884 self._match(TokenType.INTO) 2885 comments += ensure_list(self._prev_comments) 2886 self._match(TokenType.TABLE) 2887 is_function = self._match(TokenType.FUNCTION) 2888 2889 this = ( 2890 self._parse_table(schema=True, parse_partition=True) 2891 if not is_function 2892 else self._parse_function() 2893 ) 2894 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2895 this.set("alias", self._parse_table_alias()) 2896 2897 returning = self._parse_returning() 2898 2899 return self.expression( 2900 exp.Insert, 2901 comments=comments, 2902 hint=hint, 2903 is_function=is_function, 2904 this=this, 2905 stored=self._match_text_seq("STORED") and self._parse_stored(), 2906 by_name=self._match_text_seq("BY", "NAME"), 2907 exists=self._parse_exists(), 2908 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2909 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2910 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2911 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2912 conflict=self._parse_on_conflict(), 2913 returning=returning or self._parse_returning(), 2914 overwrite=overwrite, 2915 alternative=alternative, 2916 ignore=ignore, 2917 source=self._match(TokenType.TABLE) and self._parse_table(), 2918 ) 2919 2920 def _parse_kill(self) -> exp.Kill: 2921 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2922 2923 return self.expression( 2924 exp.Kill, 2925 this=self._parse_primary(), 2926 kind=kind, 2927 ) 2928 2929 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2930 conflict = self._match_text_seq("ON", "CONFLICT") 2931 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2932 2933 if not conflict and not duplicate: 2934 return None 2935 2936 conflict_keys = None 2937 constraint = None 2938 2939 if conflict: 2940 if self._match_text_seq("ON", "CONSTRAINT"): 2941 constraint = self._parse_id_var() 2942 elif self._match(TokenType.L_PAREN): 2943 conflict_keys = self._parse_csv(self._parse_id_var) 2944 self._match_r_paren() 2945 2946 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2947 if self._prev.token_type == TokenType.UPDATE: 2948 self._match(TokenType.SET) 2949 expressions = self._parse_csv(self._parse_equality) 2950 else: 2951 expressions = None 2952 2953 return self.expression( 2954 exp.OnConflict, 2955 duplicate=duplicate, 2956 expressions=expressions, 2957 action=action, 2958 conflict_keys=conflict_keys, 2959 constraint=constraint, 2960 where=self._parse_where(), 2961 ) 2962 2963 def _parse_returning(self) -> t.Optional[exp.Returning]: 2964 if not self._match(TokenType.RETURNING): 2965 return None 2966 return self.expression( 2967 exp.Returning, 2968 expressions=self._parse_csv(self._parse_expression), 2969 into=self._match(TokenType.INTO) and self._parse_table_part(), 2970 ) 2971 2972 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2973 if not self._match(TokenType.FORMAT): 2974 return None 2975 return self._parse_row_format() 2976 2977 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2978 index = self._index 2979 with_ = with_ or self._match_text_seq("WITH") 2980 2981 if not self._match(TokenType.SERDE_PROPERTIES): 2982 self._retreat(index) 2983 return None 2984 return self.expression( 2985 exp.SerdeProperties, 2986 **{ # type: ignore 2987 "expressions": self._parse_wrapped_properties(), 2988 "with": with_, 2989 }, 2990 ) 2991 2992 def _parse_row_format( 2993 self, match_row: bool = False 2994 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2995 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2996 return None 2997 2998 if self._match_text_seq("SERDE"): 2999 this = self._parse_string() 3000 3001 serde_properties = self._parse_serde_properties() 3002 3003 return self.expression( 3004 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3005 ) 3006 3007 self._match_text_seq("DELIMITED") 3008 3009 kwargs = {} 3010 3011 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3012 kwargs["fields"] = self._parse_string() 3013 if self._match_text_seq("ESCAPED", "BY"): 3014 kwargs["escaped"] = self._parse_string() 3015 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3016 kwargs["collection_items"] = self._parse_string() 3017 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3018 kwargs["map_keys"] = self._parse_string() 3019 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3020 kwargs["lines"] = self._parse_string() 3021 if self._match_text_seq("NULL", "DEFINED", "AS"): 3022 kwargs["null"] = self._parse_string() 3023 3024 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3025 3026 def _parse_load(self) -> exp.LoadData | exp.Command: 3027 if self._match_text_seq("DATA"): 3028 local = self._match_text_seq("LOCAL") 3029 self._match_text_seq("INPATH") 3030 inpath = self._parse_string() 3031 overwrite = self._match(TokenType.OVERWRITE) 3032 self._match_pair(TokenType.INTO, TokenType.TABLE) 3033 3034 return self.expression( 3035 exp.LoadData, 3036 this=self._parse_table(schema=True), 3037 local=local, 3038 overwrite=overwrite, 3039 inpath=inpath, 3040 partition=self._parse_partition(), 3041 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3042 serde=self._match_text_seq("SERDE") and self._parse_string(), 3043 ) 3044 return self._parse_as_command(self._prev) 3045 3046 def _parse_delete(self) -> exp.Delete: 3047 # This handles MySQL's "Multiple-Table Syntax" 3048 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3049 tables = None 3050 if not self._match(TokenType.FROM, advance=False): 3051 tables = self._parse_csv(self._parse_table) or None 3052 3053 returning = self._parse_returning() 3054 3055 return self.expression( 3056 exp.Delete, 3057 tables=tables, 3058 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3059 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3060 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3061 where=self._parse_where(), 3062 returning=returning or self._parse_returning(), 3063 limit=self._parse_limit(), 3064 ) 3065 3066 def _parse_update(self) -> exp.Update: 3067 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3068 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3069 returning = self._parse_returning() 3070 return self.expression( 3071 exp.Update, 3072 **{ # type: ignore 3073 "this": this, 3074 "expressions": expressions, 3075 "from": self._parse_from(joins=True), 3076 "where": self._parse_where(), 3077 "returning": returning or self._parse_returning(), 3078 "order": self._parse_order(), 3079 "limit": self._parse_limit(), 3080 }, 3081 ) 3082 3083 def _parse_use(self) -> exp.Use: 3084 return self.expression( 3085 exp.Use, 3086 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3087 this=self._parse_table(schema=False), 3088 ) 3089 3090 def _parse_uncache(self) -> exp.Uncache: 3091 if not self._match(TokenType.TABLE): 3092 self.raise_error("Expecting TABLE after UNCACHE") 3093 3094 return self.expression( 3095 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3096 ) 3097 3098 def _parse_cache(self) -> exp.Cache: 3099 lazy = self._match_text_seq("LAZY") 3100 self._match(TokenType.TABLE) 3101 table = self._parse_table(schema=True) 3102 3103 options = [] 3104 if self._match_text_seq("OPTIONS"): 3105 self._match_l_paren() 3106 k = self._parse_string() 3107 self._match(TokenType.EQ) 3108 v = self._parse_string() 3109 options = [k, v] 3110 self._match_r_paren() 3111 3112 self._match(TokenType.ALIAS) 3113 return self.expression( 3114 exp.Cache, 3115 this=table, 3116 lazy=lazy, 3117 options=options, 3118 expression=self._parse_select(nested=True), 3119 ) 3120 3121 def _parse_partition(self) -> t.Optional[exp.Partition]: 3122 if not self._match_texts(self.PARTITION_KEYWORDS): 3123 return None 3124 3125 return self.expression( 3126 exp.Partition, 3127 subpartition=self._prev.text.upper() == "SUBPARTITION", 3128 expressions=self._parse_wrapped_csv(self._parse_assignment), 3129 ) 3130 3131 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3132 def _parse_value_expression() -> t.Optional[exp.Expression]: 3133 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3134 return exp.var(self._prev.text.upper()) 3135 return self._parse_expression() 3136 3137 if self._match(TokenType.L_PAREN): 3138 expressions = self._parse_csv(_parse_value_expression) 3139 self._match_r_paren() 3140 return self.expression(exp.Tuple, expressions=expressions) 3141 3142 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3143 expression = self._parse_expression() 3144 if expression: 3145 return self.expression(exp.Tuple, expressions=[expression]) 3146 return None 3147 3148 def _parse_projections(self) -> t.List[exp.Expression]: 3149 return self._parse_expressions() 3150 3151 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3152 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3153 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3154 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3155 ) 3156 elif self._match(TokenType.FROM): 3157 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3158 # Support parentheses for duckdb FROM-first syntax 3159 select = self._parse_select() 3160 if select: 3161 select.set("from", from_) 3162 this = select 3163 else: 3164 this = exp.select("*").from_(t.cast(exp.From, from_)) 3165 else: 3166 this = ( 3167 self._parse_table(consume_pipe=True) 3168 if table 3169 else self._parse_select(nested=True, parse_set_operation=False) 3170 ) 3171 3172 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3173 # in case a modifier (e.g. join) is following 3174 if table and isinstance(this, exp.Values) and this.alias: 3175 alias = this.args["alias"].pop() 3176 this = exp.Table(this=this, alias=alias) 3177 3178 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3179 3180 return this 3181 3182 def _parse_select( 3183 self, 3184 nested: bool = False, 3185 table: bool = False, 3186 parse_subquery_alias: bool = True, 3187 parse_set_operation: bool = True, 3188 consume_pipe: bool = True, 3189 ) -> t.Optional[exp.Expression]: 3190 query = self._parse_select_query( 3191 nested=nested, 3192 table=table, 3193 parse_subquery_alias=parse_subquery_alias, 3194 parse_set_operation=parse_set_operation, 3195 ) 3196 3197 if ( 3198 consume_pipe 3199 and self._match(TokenType.PIPE_GT, advance=False) 3200 and isinstance(query, exp.Query) 3201 ): 3202 query = self._parse_pipe_syntax_query(query) 3203 query = query.subquery(copy=False) if query and table else query 3204 3205 return query 3206 3207 def _parse_select_query( 3208 self, 3209 nested: bool = False, 3210 table: bool = False, 3211 parse_subquery_alias: bool = True, 3212 parse_set_operation: bool = True, 3213 ) -> t.Optional[exp.Expression]: 3214 cte = self._parse_with() 3215 3216 if cte: 3217 this = self._parse_statement() 3218 3219 if not this: 3220 self.raise_error("Failed to parse any statement following CTE") 3221 return cte 3222 3223 if "with" in this.arg_types: 3224 this.set("with", cte) 3225 else: 3226 self.raise_error(f"{this.key} does not support CTE") 3227 this = cte 3228 3229 return this 3230 3231 # duckdb supports leading with FROM x 3232 from_ = ( 3233 self._parse_from(consume_pipe=True) 3234 if self._match(TokenType.FROM, advance=False) 3235 else None 3236 ) 3237 3238 if self._match(TokenType.SELECT): 3239 comments = self._prev_comments 3240 3241 hint = self._parse_hint() 3242 3243 if self._next and not self._next.token_type == TokenType.DOT: 3244 all_ = self._match(TokenType.ALL) 3245 distinct = self._match_set(self.DISTINCT_TOKENS) 3246 else: 3247 all_, distinct = None, None 3248 3249 kind = ( 3250 self._match(TokenType.ALIAS) 3251 and self._match_texts(("STRUCT", "VALUE")) 3252 and self._prev.text.upper() 3253 ) 3254 3255 if distinct: 3256 distinct = self.expression( 3257 exp.Distinct, 3258 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3259 ) 3260 3261 if all_ and distinct: 3262 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3263 3264 operation_modifiers = [] 3265 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3266 operation_modifiers.append(exp.var(self._prev.text.upper())) 3267 3268 limit = self._parse_limit(top=True) 3269 projections = self._parse_projections() 3270 3271 this = self.expression( 3272 exp.Select, 3273 kind=kind, 3274 hint=hint, 3275 distinct=distinct, 3276 expressions=projections, 3277 limit=limit, 3278 operation_modifiers=operation_modifiers or None, 3279 ) 3280 this.comments = comments 3281 3282 into = self._parse_into() 3283 if into: 3284 this.set("into", into) 3285 3286 if not from_: 3287 from_ = self._parse_from() 3288 3289 if from_: 3290 this.set("from", from_) 3291 3292 this = self._parse_query_modifiers(this) 3293 elif (table or nested) and self._match(TokenType.L_PAREN): 3294 this = self._parse_wrapped_select(table=table) 3295 3296 # We return early here so that the UNION isn't attached to the subquery by the 3297 # following call to _parse_set_operations, but instead becomes the parent node 3298 self._match_r_paren() 3299 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3300 elif self._match(TokenType.VALUES, advance=False): 3301 this = self._parse_derived_table_values() 3302 elif from_: 3303 this = exp.select("*").from_(from_.this, copy=False) 3304 elif self._match(TokenType.SUMMARIZE): 3305 table = self._match(TokenType.TABLE) 3306 this = self._parse_select() or self._parse_string() or self._parse_table() 3307 return self.expression(exp.Summarize, this=this, table=table) 3308 elif self._match(TokenType.DESCRIBE): 3309 this = self._parse_describe() 3310 elif self._match_text_seq("STREAM"): 3311 this = self._parse_function() 3312 if this: 3313 this = self.expression(exp.Stream, this=this) 3314 else: 3315 self._retreat(self._index - 1) 3316 else: 3317 this = None 3318 3319 return self._parse_set_operations(this) if parse_set_operation else this 3320 3321 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3322 self._match_text_seq("SEARCH") 3323 3324 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3325 3326 if not kind: 3327 return None 3328 3329 self._match_text_seq("FIRST", "BY") 3330 3331 return self.expression( 3332 exp.RecursiveWithSearch, 3333 kind=kind, 3334 this=self._parse_id_var(), 3335 expression=self._match_text_seq("SET") and self._parse_id_var(), 3336 using=self._match_text_seq("USING") and self._parse_id_var(), 3337 ) 3338 3339 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3340 if not skip_with_token and not self._match(TokenType.WITH): 3341 return None 3342 3343 comments = self._prev_comments 3344 recursive = self._match(TokenType.RECURSIVE) 3345 3346 last_comments = None 3347 expressions = [] 3348 while True: 3349 cte = self._parse_cte() 3350 if isinstance(cte, exp.CTE): 3351 expressions.append(cte) 3352 if last_comments: 3353 cte.add_comments(last_comments) 3354 3355 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3356 break 3357 else: 3358 self._match(TokenType.WITH) 3359 3360 last_comments = self._prev_comments 3361 3362 return self.expression( 3363 exp.With, 3364 comments=comments, 3365 expressions=expressions, 3366 recursive=recursive, 3367 search=self._parse_recursive_with_search(), 3368 ) 3369 3370 def _parse_cte(self) -> t.Optional[exp.CTE]: 3371 index = self._index 3372 3373 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3374 if not alias or not alias.this: 3375 self.raise_error("Expected CTE to have alias") 3376 3377 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3378 self._retreat(index) 3379 return None 3380 3381 comments = self._prev_comments 3382 3383 if self._match_text_seq("NOT", "MATERIALIZED"): 3384 materialized = False 3385 elif self._match_text_seq("MATERIALIZED"): 3386 materialized = True 3387 else: 3388 materialized = None 3389 3390 cte = self.expression( 3391 exp.CTE, 3392 this=self._parse_wrapped(self._parse_statement), 3393 alias=alias, 3394 materialized=materialized, 3395 comments=comments, 3396 ) 3397 3398 if isinstance(cte.this, exp.Values): 3399 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3400 3401 return cte 3402 3403 def _parse_table_alias( 3404 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3405 ) -> t.Optional[exp.TableAlias]: 3406 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3407 # so this section tries to parse the clause version and if it fails, it treats the token 3408 # as an identifier (alias) 3409 if self._can_parse_limit_or_offset(): 3410 return None 3411 3412 any_token = self._match(TokenType.ALIAS) 3413 alias = ( 3414 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3415 or self._parse_string_as_identifier() 3416 ) 3417 3418 index = self._index 3419 if self._match(TokenType.L_PAREN): 3420 columns = self._parse_csv(self._parse_function_parameter) 3421 self._match_r_paren() if columns else self._retreat(index) 3422 else: 3423 columns = None 3424 3425 if not alias and not columns: 3426 return None 3427 3428 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3429 3430 # We bubble up comments from the Identifier to the TableAlias 3431 if isinstance(alias, exp.Identifier): 3432 table_alias.add_comments(alias.pop_comments()) 3433 3434 return table_alias 3435 3436 def _parse_subquery( 3437 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3438 ) -> t.Optional[exp.Subquery]: 3439 if not this: 3440 return None 3441 3442 return self.expression( 3443 exp.Subquery, 3444 this=this, 3445 pivots=self._parse_pivots(), 3446 alias=self._parse_table_alias() if parse_alias else None, 3447 sample=self._parse_table_sample(), 3448 ) 3449 3450 def _implicit_unnests_to_explicit(self, this: E) -> E: 3451 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3452 3453 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3454 for i, join in enumerate(this.args.get("joins") or []): 3455 table = join.this 3456 normalized_table = table.copy() 3457 normalized_table.meta["maybe_column"] = True 3458 normalized_table = _norm(normalized_table, dialect=self.dialect) 3459 3460 if isinstance(table, exp.Table) and not join.args.get("on"): 3461 if normalized_table.parts[0].name in refs: 3462 table_as_column = table.to_column() 3463 unnest = exp.Unnest(expressions=[table_as_column]) 3464 3465 # Table.to_column creates a parent Alias node that we want to convert to 3466 # a TableAlias and attach to the Unnest, so it matches the parser's output 3467 if isinstance(table.args.get("alias"), exp.TableAlias): 3468 table_as_column.replace(table_as_column.this) 3469 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3470 3471 table.replace(unnest) 3472 3473 refs.add(normalized_table.alias_or_name) 3474 3475 return this 3476 3477 def _parse_query_modifiers( 3478 self, this: t.Optional[exp.Expression] 3479 ) -> t.Optional[exp.Expression]: 3480 if isinstance(this, self.MODIFIABLES): 3481 for join in self._parse_joins(): 3482 this.append("joins", join) 3483 for lateral in iter(self._parse_lateral, None): 3484 this.append("laterals", lateral) 3485 3486 while True: 3487 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3488 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3489 key, expression = parser(self) 3490 3491 if expression: 3492 this.set(key, expression) 3493 if key == "limit": 3494 offset = expression.args.pop("offset", None) 3495 3496 if offset: 3497 offset = exp.Offset(expression=offset) 3498 this.set("offset", offset) 3499 3500 limit_by_expressions = expression.expressions 3501 expression.set("expressions", None) 3502 offset.set("expressions", limit_by_expressions) 3503 continue 3504 break 3505 3506 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3507 this = self._implicit_unnests_to_explicit(this) 3508 3509 return this 3510 3511 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3512 start = self._curr 3513 while self._curr: 3514 self._advance() 3515 3516 end = self._tokens[self._index - 1] 3517 return exp.Hint(expressions=[self._find_sql(start, end)]) 3518 3519 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3520 return self._parse_function_call() 3521 3522 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3523 start_index = self._index 3524 should_fallback_to_string = False 3525 3526 hints = [] 3527 try: 3528 for hint in iter( 3529 lambda: self._parse_csv( 3530 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3531 ), 3532 [], 3533 ): 3534 hints.extend(hint) 3535 except ParseError: 3536 should_fallback_to_string = True 3537 3538 if should_fallback_to_string or self._curr: 3539 self._retreat(start_index) 3540 return self._parse_hint_fallback_to_string() 3541 3542 return self.expression(exp.Hint, expressions=hints) 3543 3544 def _parse_hint(self) -> t.Optional[exp.Hint]: 3545 if self._match(TokenType.HINT) and self._prev_comments: 3546 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3547 3548 return None 3549 3550 def _parse_into(self) -> t.Optional[exp.Into]: 3551 if not self._match(TokenType.INTO): 3552 return None 3553 3554 temp = self._match(TokenType.TEMPORARY) 3555 unlogged = self._match_text_seq("UNLOGGED") 3556 self._match(TokenType.TABLE) 3557 3558 return self.expression( 3559 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3560 ) 3561 3562 def _parse_from( 3563 self, 3564 joins: bool = False, 3565 skip_from_token: bool = False, 3566 consume_pipe: bool = False, 3567 ) -> t.Optional[exp.From]: 3568 if not skip_from_token and not self._match(TokenType.FROM): 3569 return None 3570 3571 return self.expression( 3572 exp.From, 3573 comments=self._prev_comments, 3574 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3575 ) 3576 3577 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3578 return self.expression( 3579 exp.MatchRecognizeMeasure, 3580 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3581 this=self._parse_expression(), 3582 ) 3583 3584 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3585 if not self._match(TokenType.MATCH_RECOGNIZE): 3586 return None 3587 3588 self._match_l_paren() 3589 3590 partition = self._parse_partition_by() 3591 order = self._parse_order() 3592 3593 measures = ( 3594 self._parse_csv(self._parse_match_recognize_measure) 3595 if self._match_text_seq("MEASURES") 3596 else None 3597 ) 3598 3599 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3600 rows = exp.var("ONE ROW PER MATCH") 3601 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3602 text = "ALL ROWS PER MATCH" 3603 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3604 text += " SHOW EMPTY MATCHES" 3605 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3606 text += " OMIT EMPTY MATCHES" 3607 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3608 text += " WITH UNMATCHED ROWS" 3609 rows = exp.var(text) 3610 else: 3611 rows = None 3612 3613 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3614 text = "AFTER MATCH SKIP" 3615 if self._match_text_seq("PAST", "LAST", "ROW"): 3616 text += " PAST LAST ROW" 3617 elif self._match_text_seq("TO", "NEXT", "ROW"): 3618 text += " TO NEXT ROW" 3619 elif self._match_text_seq("TO", "FIRST"): 3620 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3621 elif self._match_text_seq("TO", "LAST"): 3622 text += f" TO LAST {self._advance_any().text}" # type: ignore 3623 after = exp.var(text) 3624 else: 3625 after = None 3626 3627 if self._match_text_seq("PATTERN"): 3628 self._match_l_paren() 3629 3630 if not self._curr: 3631 self.raise_error("Expecting )", self._curr) 3632 3633 paren = 1 3634 start = self._curr 3635 3636 while self._curr and paren > 0: 3637 if self._curr.token_type == TokenType.L_PAREN: 3638 paren += 1 3639 if self._curr.token_type == TokenType.R_PAREN: 3640 paren -= 1 3641 3642 end = self._prev 3643 self._advance() 3644 3645 if paren > 0: 3646 self.raise_error("Expecting )", self._curr) 3647 3648 pattern = exp.var(self._find_sql(start, end)) 3649 else: 3650 pattern = None 3651 3652 define = ( 3653 self._parse_csv(self._parse_name_as_expression) 3654 if self._match_text_seq("DEFINE") 3655 else None 3656 ) 3657 3658 self._match_r_paren() 3659 3660 return self.expression( 3661 exp.MatchRecognize, 3662 partition_by=partition, 3663 order=order, 3664 measures=measures, 3665 rows=rows, 3666 after=after, 3667 pattern=pattern, 3668 define=define, 3669 alias=self._parse_table_alias(), 3670 ) 3671 3672 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3673 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3674 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3675 cross_apply = False 3676 3677 if cross_apply is not None: 3678 this = self._parse_select(table=True) 3679 view = None 3680 outer = None 3681 elif self._match(TokenType.LATERAL): 3682 this = self._parse_select(table=True) 3683 view = self._match(TokenType.VIEW) 3684 outer = self._match(TokenType.OUTER) 3685 else: 3686 return None 3687 3688 if not this: 3689 this = ( 3690 self._parse_unnest() 3691 or self._parse_function() 3692 or self._parse_id_var(any_token=False) 3693 ) 3694 3695 while self._match(TokenType.DOT): 3696 this = exp.Dot( 3697 this=this, 3698 expression=self._parse_function() or self._parse_id_var(any_token=False), 3699 ) 3700 3701 ordinality: t.Optional[bool] = None 3702 3703 if view: 3704 table = self._parse_id_var(any_token=False) 3705 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3706 table_alias: t.Optional[exp.TableAlias] = self.expression( 3707 exp.TableAlias, this=table, columns=columns 3708 ) 3709 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3710 # We move the alias from the lateral's child node to the lateral itself 3711 table_alias = this.args["alias"].pop() 3712 else: 3713 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3714 table_alias = self._parse_table_alias() 3715 3716 return self.expression( 3717 exp.Lateral, 3718 this=this, 3719 view=view, 3720 outer=outer, 3721 alias=table_alias, 3722 cross_apply=cross_apply, 3723 ordinality=ordinality, 3724 ) 3725 3726 def _parse_join_parts( 3727 self, 3728 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3729 return ( 3730 self._match_set(self.JOIN_METHODS) and self._prev, 3731 self._match_set(self.JOIN_SIDES) and self._prev, 3732 self._match_set(self.JOIN_KINDS) and self._prev, 3733 ) 3734 3735 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3736 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3737 this = self._parse_column() 3738 if isinstance(this, exp.Column): 3739 return this.this 3740 return this 3741 3742 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3743 3744 def _parse_join( 3745 self, skip_join_token: bool = False, parse_bracket: bool = False 3746 ) -> t.Optional[exp.Join]: 3747 if self._match(TokenType.COMMA): 3748 table = self._try_parse(self._parse_table) 3749 cross_join = self.expression(exp.Join, this=table) if table else None 3750 3751 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3752 cross_join.set("kind", "CROSS") 3753 3754 return cross_join 3755 3756 index = self._index 3757 method, side, kind = self._parse_join_parts() 3758 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3759 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3760 3761 if not skip_join_token and not join: 3762 self._retreat(index) 3763 kind = None 3764 method = None 3765 side = None 3766 3767 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3768 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3769 3770 if not skip_join_token and not join and not outer_apply and not cross_apply: 3771 return None 3772 3773 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3774 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3775 kwargs["expressions"] = self._parse_csv( 3776 lambda: self._parse_table(parse_bracket=parse_bracket) 3777 ) 3778 3779 if method: 3780 kwargs["method"] = method.text 3781 if side: 3782 kwargs["side"] = side.text 3783 if kind: 3784 kwargs["kind"] = kind.text 3785 if hint: 3786 kwargs["hint"] = hint 3787 3788 if self._match(TokenType.MATCH_CONDITION): 3789 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3790 3791 if self._match(TokenType.ON): 3792 kwargs["on"] = self._parse_assignment() 3793 elif self._match(TokenType.USING): 3794 kwargs["using"] = self._parse_using_identifiers() 3795 elif ( 3796 not (outer_apply or cross_apply) 3797 and not isinstance(kwargs["this"], exp.Unnest) 3798 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3799 ): 3800 index = self._index 3801 joins: t.Optional[list] = list(self._parse_joins()) 3802 3803 if joins and self._match(TokenType.ON): 3804 kwargs["on"] = self._parse_assignment() 3805 elif joins and self._match(TokenType.USING): 3806 kwargs["using"] = self._parse_using_identifiers() 3807 else: 3808 joins = None 3809 self._retreat(index) 3810 3811 kwargs["this"].set("joins", joins if joins else None) 3812 3813 kwargs["pivots"] = self._parse_pivots() 3814 3815 comments = [c for token in (method, side, kind) if token for c in token.comments] 3816 return self.expression(exp.Join, comments=comments, **kwargs) 3817 3818 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3819 this = self._parse_assignment() 3820 3821 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3822 return this 3823 3824 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3825 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3826 3827 return this 3828 3829 def _parse_index_params(self) -> exp.IndexParameters: 3830 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3831 3832 if self._match(TokenType.L_PAREN, advance=False): 3833 columns = self._parse_wrapped_csv(self._parse_with_operator) 3834 else: 3835 columns = None 3836 3837 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3838 partition_by = self._parse_partition_by() 3839 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3840 tablespace = ( 3841 self._parse_var(any_token=True) 3842 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3843 else None 3844 ) 3845 where = self._parse_where() 3846 3847 on = self._parse_field() if self._match(TokenType.ON) else None 3848 3849 return self.expression( 3850 exp.IndexParameters, 3851 using=using, 3852 columns=columns, 3853 include=include, 3854 partition_by=partition_by, 3855 where=where, 3856 with_storage=with_storage, 3857 tablespace=tablespace, 3858 on=on, 3859 ) 3860 3861 def _parse_index( 3862 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3863 ) -> t.Optional[exp.Index]: 3864 if index or anonymous: 3865 unique = None 3866 primary = None 3867 amp = None 3868 3869 self._match(TokenType.ON) 3870 self._match(TokenType.TABLE) # hive 3871 table = self._parse_table_parts(schema=True) 3872 else: 3873 unique = self._match(TokenType.UNIQUE) 3874 primary = self._match_text_seq("PRIMARY") 3875 amp = self._match_text_seq("AMP") 3876 3877 if not self._match(TokenType.INDEX): 3878 return None 3879 3880 index = self._parse_id_var() 3881 table = None 3882 3883 params = self._parse_index_params() 3884 3885 return self.expression( 3886 exp.Index, 3887 this=index, 3888 table=table, 3889 unique=unique, 3890 primary=primary, 3891 amp=amp, 3892 params=params, 3893 ) 3894 3895 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3896 hints: t.List[exp.Expression] = [] 3897 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3898 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3899 hints.append( 3900 self.expression( 3901 exp.WithTableHint, 3902 expressions=self._parse_csv( 3903 lambda: self._parse_function() or self._parse_var(any_token=True) 3904 ), 3905 ) 3906 ) 3907 self._match_r_paren() 3908 else: 3909 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3910 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3911 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3912 3913 self._match_set((TokenType.INDEX, TokenType.KEY)) 3914 if self._match(TokenType.FOR): 3915 hint.set("target", self._advance_any() and self._prev.text.upper()) 3916 3917 hint.set("expressions", self._parse_wrapped_id_vars()) 3918 hints.append(hint) 3919 3920 return hints or None 3921 3922 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3923 return ( 3924 (not schema and self._parse_function(optional_parens=False)) 3925 or self._parse_id_var(any_token=False) 3926 or self._parse_string_as_identifier() 3927 or self._parse_placeholder() 3928 ) 3929 3930 def _parse_table_parts( 3931 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3932 ) -> exp.Table: 3933 catalog = None 3934 db = None 3935 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3936 3937 while self._match(TokenType.DOT): 3938 if catalog: 3939 # This allows nesting the table in arbitrarily many dot expressions if needed 3940 table = self.expression( 3941 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3942 ) 3943 else: 3944 catalog = db 3945 db = table 3946 # "" used for tsql FROM a..b case 3947 table = self._parse_table_part(schema=schema) or "" 3948 3949 if ( 3950 wildcard 3951 and self._is_connected() 3952 and (isinstance(table, exp.Identifier) or not table) 3953 and self._match(TokenType.STAR) 3954 ): 3955 if isinstance(table, exp.Identifier): 3956 table.args["this"] += "*" 3957 else: 3958 table = exp.Identifier(this="*") 3959 3960 # We bubble up comments from the Identifier to the Table 3961 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3962 3963 if is_db_reference: 3964 catalog = db 3965 db = table 3966 table = None 3967 3968 if not table and not is_db_reference: 3969 self.raise_error(f"Expected table name but got {self._curr}") 3970 if not db and is_db_reference: 3971 self.raise_error(f"Expected database name but got {self._curr}") 3972 3973 table = self.expression( 3974 exp.Table, 3975 comments=comments, 3976 this=table, 3977 db=db, 3978 catalog=catalog, 3979 ) 3980 3981 changes = self._parse_changes() 3982 if changes: 3983 table.set("changes", changes) 3984 3985 at_before = self._parse_historical_data() 3986 if at_before: 3987 table.set("when", at_before) 3988 3989 pivots = self._parse_pivots() 3990 if pivots: 3991 table.set("pivots", pivots) 3992 3993 return table 3994 3995 def _parse_table( 3996 self, 3997 schema: bool = False, 3998 joins: bool = False, 3999 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4000 parse_bracket: bool = False, 4001 is_db_reference: bool = False, 4002 parse_partition: bool = False, 4003 consume_pipe: bool = False, 4004 ) -> t.Optional[exp.Expression]: 4005 lateral = self._parse_lateral() 4006 if lateral: 4007 return lateral 4008 4009 unnest = self._parse_unnest() 4010 if unnest: 4011 return unnest 4012 4013 values = self._parse_derived_table_values() 4014 if values: 4015 return values 4016 4017 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4018 if subquery: 4019 if not subquery.args.get("pivots"): 4020 subquery.set("pivots", self._parse_pivots()) 4021 return subquery 4022 4023 bracket = parse_bracket and self._parse_bracket(None) 4024 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4025 4026 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4027 self._parse_table 4028 ) 4029 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4030 4031 only = self._match(TokenType.ONLY) 4032 4033 this = t.cast( 4034 exp.Expression, 4035 bracket 4036 or rows_from 4037 or self._parse_bracket( 4038 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4039 ), 4040 ) 4041 4042 if only: 4043 this.set("only", only) 4044 4045 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4046 self._match_text_seq("*") 4047 4048 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4049 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4050 this.set("partition", self._parse_partition()) 4051 4052 if schema: 4053 return self._parse_schema(this=this) 4054 4055 version = self._parse_version() 4056 4057 if version: 4058 this.set("version", version) 4059 4060 if self.dialect.ALIAS_POST_TABLESAMPLE: 4061 this.set("sample", self._parse_table_sample()) 4062 4063 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4064 if alias: 4065 this.set("alias", alias) 4066 4067 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4068 return self.expression( 4069 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4070 ) 4071 4072 this.set("hints", self._parse_table_hints()) 4073 4074 if not this.args.get("pivots"): 4075 this.set("pivots", self._parse_pivots()) 4076 4077 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4078 this.set("sample", self._parse_table_sample()) 4079 4080 if joins: 4081 for join in self._parse_joins(): 4082 this.append("joins", join) 4083 4084 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4085 this.set("ordinality", True) 4086 this.set("alias", self._parse_table_alias()) 4087 4088 return this 4089 4090 def _parse_version(self) -> t.Optional[exp.Version]: 4091 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4092 this = "TIMESTAMP" 4093 elif self._match(TokenType.VERSION_SNAPSHOT): 4094 this = "VERSION" 4095 else: 4096 return None 4097 4098 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4099 kind = self._prev.text.upper() 4100 start = self._parse_bitwise() 4101 self._match_texts(("TO", "AND")) 4102 end = self._parse_bitwise() 4103 expression: t.Optional[exp.Expression] = self.expression( 4104 exp.Tuple, expressions=[start, end] 4105 ) 4106 elif self._match_text_seq("CONTAINED", "IN"): 4107 kind = "CONTAINED IN" 4108 expression = self.expression( 4109 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4110 ) 4111 elif self._match(TokenType.ALL): 4112 kind = "ALL" 4113 expression = None 4114 else: 4115 self._match_text_seq("AS", "OF") 4116 kind = "AS OF" 4117 expression = self._parse_type() 4118 4119 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4120 4121 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4122 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4123 index = self._index 4124 historical_data = None 4125 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4126 this = self._prev.text.upper() 4127 kind = ( 4128 self._match(TokenType.L_PAREN) 4129 and self._match_texts(self.HISTORICAL_DATA_KIND) 4130 and self._prev.text.upper() 4131 ) 4132 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4133 4134 if expression: 4135 self._match_r_paren() 4136 historical_data = self.expression( 4137 exp.HistoricalData, this=this, kind=kind, expression=expression 4138 ) 4139 else: 4140 self._retreat(index) 4141 4142 return historical_data 4143 4144 def _parse_changes(self) -> t.Optional[exp.Changes]: 4145 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4146 return None 4147 4148 information = self._parse_var(any_token=True) 4149 self._match_r_paren() 4150 4151 return self.expression( 4152 exp.Changes, 4153 information=information, 4154 at_before=self._parse_historical_data(), 4155 end=self._parse_historical_data(), 4156 ) 4157 4158 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4159 if not self._match(TokenType.UNNEST): 4160 return None 4161 4162 expressions = self._parse_wrapped_csv(self._parse_equality) 4163 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4164 4165 alias = self._parse_table_alias() if with_alias else None 4166 4167 if alias: 4168 if self.dialect.UNNEST_COLUMN_ONLY: 4169 if alias.args.get("columns"): 4170 self.raise_error("Unexpected extra column alias in unnest.") 4171 4172 alias.set("columns", [alias.this]) 4173 alias.set("this", None) 4174 4175 columns = alias.args.get("columns") or [] 4176 if offset and len(expressions) < len(columns): 4177 offset = columns.pop() 4178 4179 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4180 self._match(TokenType.ALIAS) 4181 offset = self._parse_id_var( 4182 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4183 ) or exp.to_identifier("offset") 4184 4185 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4186 4187 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4188 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4189 if not is_derived and not ( 4190 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4191 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4192 ): 4193 return None 4194 4195 expressions = self._parse_csv(self._parse_value) 4196 alias = self._parse_table_alias() 4197 4198 if is_derived: 4199 self._match_r_paren() 4200 4201 return self.expression( 4202 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4203 ) 4204 4205 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4206 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4207 as_modifier and self._match_text_seq("USING", "SAMPLE") 4208 ): 4209 return None 4210 4211 bucket_numerator = None 4212 bucket_denominator = None 4213 bucket_field = None 4214 percent = None 4215 size = None 4216 seed = None 4217 4218 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4219 matched_l_paren = self._match(TokenType.L_PAREN) 4220 4221 if self.TABLESAMPLE_CSV: 4222 num = None 4223 expressions = self._parse_csv(self._parse_primary) 4224 else: 4225 expressions = None 4226 num = ( 4227 self._parse_factor() 4228 if self._match(TokenType.NUMBER, advance=False) 4229 else self._parse_primary() or self._parse_placeholder() 4230 ) 4231 4232 if self._match_text_seq("BUCKET"): 4233 bucket_numerator = self._parse_number() 4234 self._match_text_seq("OUT", "OF") 4235 bucket_denominator = bucket_denominator = self._parse_number() 4236 self._match(TokenType.ON) 4237 bucket_field = self._parse_field() 4238 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4239 percent = num 4240 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4241 size = num 4242 else: 4243 percent = num 4244 4245 if matched_l_paren: 4246 self._match_r_paren() 4247 4248 if self._match(TokenType.L_PAREN): 4249 method = self._parse_var(upper=True) 4250 seed = self._match(TokenType.COMMA) and self._parse_number() 4251 self._match_r_paren() 4252 elif self._match_texts(("SEED", "REPEATABLE")): 4253 seed = self._parse_wrapped(self._parse_number) 4254 4255 if not method and self.DEFAULT_SAMPLING_METHOD: 4256 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4257 4258 return self.expression( 4259 exp.TableSample, 4260 expressions=expressions, 4261 method=method, 4262 bucket_numerator=bucket_numerator, 4263 bucket_denominator=bucket_denominator, 4264 bucket_field=bucket_field, 4265 percent=percent, 4266 size=size, 4267 seed=seed, 4268 ) 4269 4270 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4271 return list(iter(self._parse_pivot, None)) or None 4272 4273 def _parse_joins(self) -> t.Iterator[exp.Join]: 4274 return iter(self._parse_join, None) 4275 4276 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4277 if not self._match(TokenType.INTO): 4278 return None 4279 4280 return self.expression( 4281 exp.UnpivotColumns, 4282 this=self._match_text_seq("NAME") and self._parse_column(), 4283 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4284 ) 4285 4286 # https://duckdb.org/docs/sql/statements/pivot 4287 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4288 def _parse_on() -> t.Optional[exp.Expression]: 4289 this = self._parse_bitwise() 4290 4291 if self._match(TokenType.IN): 4292 # PIVOT ... ON col IN (row_val1, row_val2) 4293 return self._parse_in(this) 4294 if self._match(TokenType.ALIAS, advance=False): 4295 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4296 return self._parse_alias(this) 4297 4298 return this 4299 4300 this = self._parse_table() 4301 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4302 into = self._parse_unpivot_columns() 4303 using = self._match(TokenType.USING) and self._parse_csv( 4304 lambda: self._parse_alias(self._parse_function()) 4305 ) 4306 group = self._parse_group() 4307 4308 return self.expression( 4309 exp.Pivot, 4310 this=this, 4311 expressions=expressions, 4312 using=using, 4313 group=group, 4314 unpivot=is_unpivot, 4315 into=into, 4316 ) 4317 4318 def _parse_pivot_in(self) -> exp.In: 4319 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4320 this = self._parse_select_or_expression() 4321 4322 self._match(TokenType.ALIAS) 4323 alias = self._parse_bitwise() 4324 if alias: 4325 if isinstance(alias, exp.Column) and not alias.db: 4326 alias = alias.this 4327 return self.expression(exp.PivotAlias, this=this, alias=alias) 4328 4329 return this 4330 4331 value = self._parse_column() 4332 4333 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4334 self.raise_error("Expecting IN (") 4335 4336 if self._match(TokenType.ANY): 4337 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4338 else: 4339 exprs = self._parse_csv(_parse_aliased_expression) 4340 4341 self._match_r_paren() 4342 return self.expression(exp.In, this=value, expressions=exprs) 4343 4344 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4345 index = self._index 4346 include_nulls = None 4347 4348 if self._match(TokenType.PIVOT): 4349 unpivot = False 4350 elif self._match(TokenType.UNPIVOT): 4351 unpivot = True 4352 4353 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4354 if self._match_text_seq("INCLUDE", "NULLS"): 4355 include_nulls = True 4356 elif self._match_text_seq("EXCLUDE", "NULLS"): 4357 include_nulls = False 4358 else: 4359 return None 4360 4361 expressions = [] 4362 4363 if not self._match(TokenType.L_PAREN): 4364 self._retreat(index) 4365 return None 4366 4367 if unpivot: 4368 expressions = self._parse_csv(self._parse_column) 4369 else: 4370 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4371 4372 if not expressions: 4373 self.raise_error("Failed to parse PIVOT's aggregation list") 4374 4375 if not self._match(TokenType.FOR): 4376 self.raise_error("Expecting FOR") 4377 4378 fields = [] 4379 while True: 4380 field = self._try_parse(self._parse_pivot_in) 4381 if not field: 4382 break 4383 fields.append(field) 4384 4385 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4386 self._parse_bitwise 4387 ) 4388 4389 group = self._parse_group() 4390 4391 self._match_r_paren() 4392 4393 pivot = self.expression( 4394 exp.Pivot, 4395 expressions=expressions, 4396 fields=fields, 4397 unpivot=unpivot, 4398 include_nulls=include_nulls, 4399 default_on_null=default_on_null, 4400 group=group, 4401 ) 4402 4403 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4404 pivot.set("alias", self._parse_table_alias()) 4405 4406 if not unpivot: 4407 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4408 4409 columns: t.List[exp.Expression] = [] 4410 all_fields = [] 4411 for pivot_field in pivot.fields: 4412 pivot_field_expressions = pivot_field.expressions 4413 4414 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4415 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4416 continue 4417 4418 all_fields.append( 4419 [ 4420 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4421 for fld in pivot_field_expressions 4422 ] 4423 ) 4424 4425 if all_fields: 4426 if names: 4427 all_fields.append(names) 4428 4429 # Generate all possible combinations of the pivot columns 4430 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4431 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4432 for fld_parts_tuple in itertools.product(*all_fields): 4433 fld_parts = list(fld_parts_tuple) 4434 4435 if names and self.PREFIXED_PIVOT_COLUMNS: 4436 # Move the "name" to the front of the list 4437 fld_parts.insert(0, fld_parts.pop(-1)) 4438 4439 columns.append(exp.to_identifier("_".join(fld_parts))) 4440 4441 pivot.set("columns", columns) 4442 4443 return pivot 4444 4445 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4446 return [agg.alias for agg in aggregations if agg.alias] 4447 4448 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4449 if not skip_where_token and not self._match(TokenType.PREWHERE): 4450 return None 4451 4452 return self.expression( 4453 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4454 ) 4455 4456 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4457 if not skip_where_token and not self._match(TokenType.WHERE): 4458 return None 4459 4460 return self.expression( 4461 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4462 ) 4463 4464 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4465 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4466 return None 4467 4468 elements: t.Dict[str, t.Any] = defaultdict(list) 4469 4470 if self._match(TokenType.ALL): 4471 elements["all"] = True 4472 elif self._match(TokenType.DISTINCT): 4473 elements["all"] = False 4474 4475 while True: 4476 index = self._index 4477 4478 elements["expressions"].extend( 4479 self._parse_csv( 4480 lambda: None 4481 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4482 else self._parse_assignment() 4483 ) 4484 ) 4485 4486 before_with_index = self._index 4487 with_prefix = self._match(TokenType.WITH) 4488 4489 if self._match(TokenType.ROLLUP): 4490 elements["rollup"].append( 4491 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4492 ) 4493 elif self._match(TokenType.CUBE): 4494 elements["cube"].append( 4495 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4496 ) 4497 elif self._match(TokenType.GROUPING_SETS): 4498 elements["grouping_sets"].append( 4499 self.expression( 4500 exp.GroupingSets, 4501 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4502 ) 4503 ) 4504 elif self._match_text_seq("TOTALS"): 4505 elements["totals"] = True # type: ignore 4506 4507 if before_with_index <= self._index <= before_with_index + 1: 4508 self._retreat(before_with_index) 4509 break 4510 4511 if index == self._index: 4512 break 4513 4514 return self.expression(exp.Group, **elements) # type: ignore 4515 4516 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4517 return self.expression( 4518 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4519 ) 4520 4521 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4522 if self._match(TokenType.L_PAREN): 4523 grouping_set = self._parse_csv(self._parse_column) 4524 self._match_r_paren() 4525 return self.expression(exp.Tuple, expressions=grouping_set) 4526 4527 return self._parse_column() 4528 4529 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4530 if not skip_having_token and not self._match(TokenType.HAVING): 4531 return None 4532 return self.expression(exp.Having, this=self._parse_assignment()) 4533 4534 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4535 if not self._match(TokenType.QUALIFY): 4536 return None 4537 return self.expression(exp.Qualify, this=self._parse_assignment()) 4538 4539 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4540 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4541 exp.Prior, this=self._parse_bitwise() 4542 ) 4543 connect = self._parse_assignment() 4544 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4545 return connect 4546 4547 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4548 if skip_start_token: 4549 start = None 4550 elif self._match(TokenType.START_WITH): 4551 start = self._parse_assignment() 4552 else: 4553 return None 4554 4555 self._match(TokenType.CONNECT_BY) 4556 nocycle = self._match_text_seq("NOCYCLE") 4557 connect = self._parse_connect_with_prior() 4558 4559 if not start and self._match(TokenType.START_WITH): 4560 start = self._parse_assignment() 4561 4562 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4563 4564 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4565 this = self._parse_id_var(any_token=True) 4566 if self._match(TokenType.ALIAS): 4567 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4568 return this 4569 4570 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4571 if self._match_text_seq("INTERPOLATE"): 4572 return self._parse_wrapped_csv(self._parse_name_as_expression) 4573 return None 4574 4575 def _parse_order( 4576 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4577 ) -> t.Optional[exp.Expression]: 4578 siblings = None 4579 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4580 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4581 return this 4582 4583 siblings = True 4584 4585 return self.expression( 4586 exp.Order, 4587 this=this, 4588 expressions=self._parse_csv(self._parse_ordered), 4589 siblings=siblings, 4590 ) 4591 4592 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4593 if not self._match(token): 4594 return None 4595 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4596 4597 def _parse_ordered( 4598 self, parse_method: t.Optional[t.Callable] = None 4599 ) -> t.Optional[exp.Ordered]: 4600 this = parse_method() if parse_method else self._parse_assignment() 4601 if not this: 4602 return None 4603 4604 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4605 this = exp.var("ALL") 4606 4607 asc = self._match(TokenType.ASC) 4608 desc = self._match(TokenType.DESC) or (asc and False) 4609 4610 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4611 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4612 4613 nulls_first = is_nulls_first or False 4614 explicitly_null_ordered = is_nulls_first or is_nulls_last 4615 4616 if ( 4617 not explicitly_null_ordered 4618 and ( 4619 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4620 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4621 ) 4622 and self.dialect.NULL_ORDERING != "nulls_are_last" 4623 ): 4624 nulls_first = True 4625 4626 if self._match_text_seq("WITH", "FILL"): 4627 with_fill = self.expression( 4628 exp.WithFill, 4629 **{ # type: ignore 4630 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4631 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4632 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4633 "interpolate": self._parse_interpolate(), 4634 }, 4635 ) 4636 else: 4637 with_fill = None 4638 4639 return self.expression( 4640 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4641 ) 4642 4643 def _parse_limit_options(self) -> exp.LimitOptions: 4644 percent = self._match(TokenType.PERCENT) 4645 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4646 self._match_text_seq("ONLY") 4647 with_ties = self._match_text_seq("WITH", "TIES") 4648 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4649 4650 def _parse_limit( 4651 self, 4652 this: t.Optional[exp.Expression] = None, 4653 top: bool = False, 4654 skip_limit_token: bool = False, 4655 ) -> t.Optional[exp.Expression]: 4656 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4657 comments = self._prev_comments 4658 if top: 4659 limit_paren = self._match(TokenType.L_PAREN) 4660 expression = self._parse_term() if limit_paren else self._parse_number() 4661 4662 if limit_paren: 4663 self._match_r_paren() 4664 4665 limit_options = self._parse_limit_options() 4666 else: 4667 limit_options = None 4668 expression = self._parse_term() 4669 4670 if self._match(TokenType.COMMA): 4671 offset = expression 4672 expression = self._parse_term() 4673 else: 4674 offset = None 4675 4676 limit_exp = self.expression( 4677 exp.Limit, 4678 this=this, 4679 expression=expression, 4680 offset=offset, 4681 comments=comments, 4682 limit_options=limit_options, 4683 expressions=self._parse_limit_by(), 4684 ) 4685 4686 return limit_exp 4687 4688 if self._match(TokenType.FETCH): 4689 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4690 direction = self._prev.text.upper() if direction else "FIRST" 4691 4692 count = self._parse_field(tokens=self.FETCH_TOKENS) 4693 4694 return self.expression( 4695 exp.Fetch, 4696 direction=direction, 4697 count=count, 4698 limit_options=self._parse_limit_options(), 4699 ) 4700 4701 return this 4702 4703 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4704 if not self._match(TokenType.OFFSET): 4705 return this 4706 4707 count = self._parse_term() 4708 self._match_set((TokenType.ROW, TokenType.ROWS)) 4709 4710 return self.expression( 4711 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4712 ) 4713 4714 def _can_parse_limit_or_offset(self) -> bool: 4715 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4716 return False 4717 4718 index = self._index 4719 result = bool( 4720 self._try_parse(self._parse_limit, retreat=True) 4721 or self._try_parse(self._parse_offset, retreat=True) 4722 ) 4723 self._retreat(index) 4724 return result 4725 4726 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4727 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4728 4729 def _parse_locks(self) -> t.List[exp.Lock]: 4730 locks = [] 4731 while True: 4732 if self._match_text_seq("FOR", "UPDATE"): 4733 update = True 4734 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4735 "LOCK", "IN", "SHARE", "MODE" 4736 ): 4737 update = False 4738 else: 4739 break 4740 4741 expressions = None 4742 if self._match_text_seq("OF"): 4743 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4744 4745 wait: t.Optional[bool | exp.Expression] = None 4746 if self._match_text_seq("NOWAIT"): 4747 wait = True 4748 elif self._match_text_seq("WAIT"): 4749 wait = self._parse_primary() 4750 elif self._match_text_seq("SKIP", "LOCKED"): 4751 wait = False 4752 4753 locks.append( 4754 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4755 ) 4756 4757 return locks 4758 4759 def parse_set_operation( 4760 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4761 ) -> t.Optional[exp.Expression]: 4762 start = self._index 4763 _, side_token, kind_token = self._parse_join_parts() 4764 4765 side = side_token.text if side_token else None 4766 kind = kind_token.text if kind_token else None 4767 4768 if not self._match_set(self.SET_OPERATIONS): 4769 self._retreat(start) 4770 return None 4771 4772 token_type = self._prev.token_type 4773 4774 if token_type == TokenType.UNION: 4775 operation: t.Type[exp.SetOperation] = exp.Union 4776 elif token_type == TokenType.EXCEPT: 4777 operation = exp.Except 4778 else: 4779 operation = exp.Intersect 4780 4781 comments = self._prev.comments 4782 4783 if self._match(TokenType.DISTINCT): 4784 distinct: t.Optional[bool] = True 4785 elif self._match(TokenType.ALL): 4786 distinct = False 4787 else: 4788 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4789 if distinct is None: 4790 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4791 4792 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4793 "STRICT", "CORRESPONDING" 4794 ) 4795 if self._match_text_seq("CORRESPONDING"): 4796 by_name = True 4797 if not side and not kind: 4798 kind = "INNER" 4799 4800 on_column_list = None 4801 if by_name and self._match_texts(("ON", "BY")): 4802 on_column_list = self._parse_wrapped_csv(self._parse_column) 4803 4804 expression = self._parse_select( 4805 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4806 ) 4807 4808 return self.expression( 4809 operation, 4810 comments=comments, 4811 this=this, 4812 distinct=distinct, 4813 by_name=by_name, 4814 expression=expression, 4815 side=side, 4816 kind=kind, 4817 on=on_column_list, 4818 ) 4819 4820 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4821 while this: 4822 setop = self.parse_set_operation(this) 4823 if not setop: 4824 break 4825 this = setop 4826 4827 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4828 expression = this.expression 4829 4830 if expression: 4831 for arg in self.SET_OP_MODIFIERS: 4832 expr = expression.args.get(arg) 4833 if expr: 4834 this.set(arg, expr.pop()) 4835 4836 return this 4837 4838 def _parse_expression(self) -> t.Optional[exp.Expression]: 4839 return self._parse_alias(self._parse_assignment()) 4840 4841 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4842 this = self._parse_disjunction() 4843 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4844 # This allows us to parse <non-identifier token> := <expr> 4845 this = exp.column( 4846 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4847 ) 4848 4849 while self._match_set(self.ASSIGNMENT): 4850 if isinstance(this, exp.Column) and len(this.parts) == 1: 4851 this = this.this 4852 4853 this = self.expression( 4854 self.ASSIGNMENT[self._prev.token_type], 4855 this=this, 4856 comments=self._prev_comments, 4857 expression=self._parse_assignment(), 4858 ) 4859 4860 return this 4861 4862 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4863 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4864 4865 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4866 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4867 4868 def _parse_equality(self) -> t.Optional[exp.Expression]: 4869 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4870 4871 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4872 return self._parse_tokens(self._parse_range, self.COMPARISON) 4873 4874 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4875 this = this or self._parse_bitwise() 4876 negate = self._match(TokenType.NOT) 4877 4878 if self._match_set(self.RANGE_PARSERS): 4879 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4880 if not expression: 4881 return this 4882 4883 this = expression 4884 elif self._match(TokenType.ISNULL): 4885 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4886 4887 # Postgres supports ISNULL and NOTNULL for conditions. 4888 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4889 if self._match(TokenType.NOTNULL): 4890 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4891 this = self.expression(exp.Not, this=this) 4892 4893 if negate: 4894 this = self._negate_range(this) 4895 4896 if self._match(TokenType.IS): 4897 this = self._parse_is(this) 4898 4899 return this 4900 4901 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4902 if not this: 4903 return this 4904 4905 return self.expression(exp.Not, this=this) 4906 4907 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4908 index = self._index - 1 4909 negate = self._match(TokenType.NOT) 4910 4911 if self._match_text_seq("DISTINCT", "FROM"): 4912 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4913 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4914 4915 if self._match(TokenType.JSON): 4916 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4917 4918 if self._match_text_seq("WITH"): 4919 _with = True 4920 elif self._match_text_seq("WITHOUT"): 4921 _with = False 4922 else: 4923 _with = None 4924 4925 unique = self._match(TokenType.UNIQUE) 4926 self._match_text_seq("KEYS") 4927 expression: t.Optional[exp.Expression] = self.expression( 4928 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4929 ) 4930 else: 4931 expression = self._parse_primary() or self._parse_null() 4932 if not expression: 4933 self._retreat(index) 4934 return None 4935 4936 this = self.expression(exp.Is, this=this, expression=expression) 4937 return self.expression(exp.Not, this=this) if negate else this 4938 4939 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4940 unnest = self._parse_unnest(with_alias=False) 4941 if unnest: 4942 this = self.expression(exp.In, this=this, unnest=unnest) 4943 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4944 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4945 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4946 4947 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4948 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4949 else: 4950 this = self.expression(exp.In, this=this, expressions=expressions) 4951 4952 if matched_l_paren: 4953 self._match_r_paren(this) 4954 elif not self._match(TokenType.R_BRACKET, expression=this): 4955 self.raise_error("Expecting ]") 4956 else: 4957 this = self.expression(exp.In, this=this, field=self._parse_column()) 4958 4959 return this 4960 4961 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4962 low = self._parse_bitwise() 4963 self._match(TokenType.AND) 4964 high = self._parse_bitwise() 4965 return self.expression(exp.Between, this=this, low=low, high=high) 4966 4967 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4968 if not self._match(TokenType.ESCAPE): 4969 return this 4970 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4971 4972 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4973 index = self._index 4974 4975 if not self._match(TokenType.INTERVAL) and match_interval: 4976 return None 4977 4978 if self._match(TokenType.STRING, advance=False): 4979 this = self._parse_primary() 4980 else: 4981 this = self._parse_term() 4982 4983 if not this or ( 4984 isinstance(this, exp.Column) 4985 and not this.table 4986 and not this.this.quoted 4987 and this.name.upper() == "IS" 4988 ): 4989 self._retreat(index) 4990 return None 4991 4992 unit = self._parse_function() or ( 4993 not self._match(TokenType.ALIAS, advance=False) 4994 and self._parse_var(any_token=True, upper=True) 4995 ) 4996 4997 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4998 # each INTERVAL expression into this canonical form so it's easy to transpile 4999 if this and this.is_number: 5000 this = exp.Literal.string(this.to_py()) 5001 elif this and this.is_string: 5002 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5003 if parts and unit: 5004 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5005 unit = None 5006 self._retreat(self._index - 1) 5007 5008 if len(parts) == 1: 5009 this = exp.Literal.string(parts[0][0]) 5010 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5011 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5012 unit = self.expression( 5013 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5014 ) 5015 5016 interval = self.expression(exp.Interval, this=this, unit=unit) 5017 5018 index = self._index 5019 self._match(TokenType.PLUS) 5020 5021 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5022 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5023 return self.expression( 5024 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5025 ) 5026 5027 self._retreat(index) 5028 return interval 5029 5030 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5031 this = self._parse_term() 5032 5033 while True: 5034 if self._match_set(self.BITWISE): 5035 this = self.expression( 5036 self.BITWISE[self._prev.token_type], 5037 this=this, 5038 expression=self._parse_term(), 5039 ) 5040 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5041 this = self.expression( 5042 exp.DPipe, 5043 this=this, 5044 expression=self._parse_term(), 5045 safe=not self.dialect.STRICT_STRING_CONCAT, 5046 ) 5047 elif self._match(TokenType.DQMARK): 5048 this = self.expression( 5049 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5050 ) 5051 elif self._match_pair(TokenType.LT, TokenType.LT): 5052 this = self.expression( 5053 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5054 ) 5055 elif self._match_pair(TokenType.GT, TokenType.GT): 5056 this = self.expression( 5057 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5058 ) 5059 else: 5060 break 5061 5062 return this 5063 5064 def _parse_term(self) -> t.Optional[exp.Expression]: 5065 this = self._parse_factor() 5066 5067 while self._match_set(self.TERM): 5068 klass = self.TERM[self._prev.token_type] 5069 comments = self._prev_comments 5070 expression = self._parse_factor() 5071 5072 this = self.expression(klass, this=this, comments=comments, expression=expression) 5073 5074 if isinstance(this, exp.Collate): 5075 expr = this.expression 5076 5077 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5078 # fallback to Identifier / Var 5079 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5080 ident = expr.this 5081 if isinstance(ident, exp.Identifier): 5082 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5083 5084 return this 5085 5086 def _parse_factor(self) -> t.Optional[exp.Expression]: 5087 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5088 this = parse_method() 5089 5090 while self._match_set(self.FACTOR): 5091 klass = self.FACTOR[self._prev.token_type] 5092 comments = self._prev_comments 5093 expression = parse_method() 5094 5095 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5096 self._retreat(self._index - 1) 5097 return this 5098 5099 this = self.expression(klass, this=this, comments=comments, expression=expression) 5100 5101 if isinstance(this, exp.Div): 5102 this.args["typed"] = self.dialect.TYPED_DIVISION 5103 this.args["safe"] = self.dialect.SAFE_DIVISION 5104 5105 return this 5106 5107 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5108 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5109 5110 def _parse_unary(self) -> t.Optional[exp.Expression]: 5111 if self._match_set(self.UNARY_PARSERS): 5112 return self.UNARY_PARSERS[self._prev.token_type](self) 5113 return self._parse_at_time_zone(self._parse_type()) 5114 5115 def _parse_type( 5116 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5117 ) -> t.Optional[exp.Expression]: 5118 interval = parse_interval and self._parse_interval() 5119 if interval: 5120 return interval 5121 5122 index = self._index 5123 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5124 5125 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5126 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5127 if isinstance(data_type, exp.Cast): 5128 # This constructor can contain ops directly after it, for instance struct unnesting: 5129 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5130 return self._parse_column_ops(data_type) 5131 5132 if data_type: 5133 index2 = self._index 5134 this = self._parse_primary() 5135 5136 if isinstance(this, exp.Literal): 5137 literal = this.name 5138 this = self._parse_column_ops(this) 5139 5140 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5141 if parser: 5142 return parser(self, this, data_type) 5143 5144 if ( 5145 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5146 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5147 and TIME_ZONE_RE.search(literal) 5148 ): 5149 data_type = exp.DataType.build("TIMESTAMPTZ") 5150 5151 return self.expression(exp.Cast, this=this, to=data_type) 5152 5153 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5154 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5155 # 5156 # If the index difference here is greater than 1, that means the parser itself must have 5157 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5158 # 5159 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5160 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5161 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5162 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5163 # 5164 # In these cases, we don't really want to return the converted type, but instead retreat 5165 # and try to parse a Column or Identifier in the section below. 5166 if data_type.expressions and index2 - index > 1: 5167 self._retreat(index2) 5168 return self._parse_column_ops(data_type) 5169 5170 self._retreat(index) 5171 5172 if fallback_to_identifier: 5173 return self._parse_id_var() 5174 5175 this = self._parse_column() 5176 return this and self._parse_column_ops(this) 5177 5178 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5179 this = self._parse_type() 5180 if not this: 5181 return None 5182 5183 if isinstance(this, exp.Column) and not this.table: 5184 this = exp.var(this.name.upper()) 5185 5186 return self.expression( 5187 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5188 ) 5189 5190 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5191 type_name = identifier.name 5192 5193 while self._match(TokenType.DOT): 5194 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5195 5196 return exp.DataType.build(type_name, udt=True) 5197 5198 def _parse_types( 5199 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5200 ) -> t.Optional[exp.Expression]: 5201 index = self._index 5202 5203 this: t.Optional[exp.Expression] = None 5204 prefix = self._match_text_seq("SYSUDTLIB", ".") 5205 5206 if not self._match_set(self.TYPE_TOKENS): 5207 identifier = allow_identifiers and self._parse_id_var( 5208 any_token=False, tokens=(TokenType.VAR,) 5209 ) 5210 if isinstance(identifier, exp.Identifier): 5211 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5212 5213 if len(tokens) != 1: 5214 self.raise_error("Unexpected identifier", self._prev) 5215 5216 if tokens[0].token_type in self.TYPE_TOKENS: 5217 self._prev = tokens[0] 5218 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5219 this = self._parse_user_defined_type(identifier) 5220 else: 5221 self._retreat(self._index - 1) 5222 return None 5223 else: 5224 return None 5225 5226 type_token = self._prev.token_type 5227 5228 if type_token == TokenType.PSEUDO_TYPE: 5229 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5230 5231 if type_token == TokenType.OBJECT_IDENTIFIER: 5232 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5233 5234 # https://materialize.com/docs/sql/types/map/ 5235 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5236 key_type = self._parse_types( 5237 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5238 ) 5239 if not self._match(TokenType.FARROW): 5240 self._retreat(index) 5241 return None 5242 5243 value_type = self._parse_types( 5244 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5245 ) 5246 if not self._match(TokenType.R_BRACKET): 5247 self._retreat(index) 5248 return None 5249 5250 return exp.DataType( 5251 this=exp.DataType.Type.MAP, 5252 expressions=[key_type, value_type], 5253 nested=True, 5254 prefix=prefix, 5255 ) 5256 5257 nested = type_token in self.NESTED_TYPE_TOKENS 5258 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5259 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5260 expressions = None 5261 maybe_func = False 5262 5263 if self._match(TokenType.L_PAREN): 5264 if is_struct: 5265 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5266 elif nested: 5267 expressions = self._parse_csv( 5268 lambda: self._parse_types( 5269 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5270 ) 5271 ) 5272 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5273 this = expressions[0] 5274 this.set("nullable", True) 5275 self._match_r_paren() 5276 return this 5277 elif type_token in self.ENUM_TYPE_TOKENS: 5278 expressions = self._parse_csv(self._parse_equality) 5279 elif is_aggregate: 5280 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5281 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5282 ) 5283 if not func_or_ident: 5284 return None 5285 expressions = [func_or_ident] 5286 if self._match(TokenType.COMMA): 5287 expressions.extend( 5288 self._parse_csv( 5289 lambda: self._parse_types( 5290 check_func=check_func, 5291 schema=schema, 5292 allow_identifiers=allow_identifiers, 5293 ) 5294 ) 5295 ) 5296 else: 5297 expressions = self._parse_csv(self._parse_type_size) 5298 5299 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5300 if type_token == TokenType.VECTOR and len(expressions) == 2: 5301 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5302 5303 if not expressions or not self._match(TokenType.R_PAREN): 5304 self._retreat(index) 5305 return None 5306 5307 maybe_func = True 5308 5309 values: t.Optional[t.List[exp.Expression]] = None 5310 5311 if nested and self._match(TokenType.LT): 5312 if is_struct: 5313 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5314 else: 5315 expressions = self._parse_csv( 5316 lambda: self._parse_types( 5317 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5318 ) 5319 ) 5320 5321 if not self._match(TokenType.GT): 5322 self.raise_error("Expecting >") 5323 5324 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5325 values = self._parse_csv(self._parse_assignment) 5326 if not values and is_struct: 5327 values = None 5328 self._retreat(self._index - 1) 5329 else: 5330 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5331 5332 if type_token in self.TIMESTAMPS: 5333 if self._match_text_seq("WITH", "TIME", "ZONE"): 5334 maybe_func = False 5335 tz_type = ( 5336 exp.DataType.Type.TIMETZ 5337 if type_token in self.TIMES 5338 else exp.DataType.Type.TIMESTAMPTZ 5339 ) 5340 this = exp.DataType(this=tz_type, expressions=expressions) 5341 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5342 maybe_func = False 5343 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5344 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5345 maybe_func = False 5346 elif type_token == TokenType.INTERVAL: 5347 unit = self._parse_var(upper=True) 5348 if unit: 5349 if self._match_text_seq("TO"): 5350 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5351 5352 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5353 else: 5354 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5355 elif type_token == TokenType.VOID: 5356 this = exp.DataType(this=exp.DataType.Type.NULL) 5357 5358 if maybe_func and check_func: 5359 index2 = self._index 5360 peek = self._parse_string() 5361 5362 if not peek: 5363 self._retreat(index) 5364 return None 5365 5366 self._retreat(index2) 5367 5368 if not this: 5369 if self._match_text_seq("UNSIGNED"): 5370 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5371 if not unsigned_type_token: 5372 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5373 5374 type_token = unsigned_type_token or type_token 5375 5376 this = exp.DataType( 5377 this=exp.DataType.Type[type_token.value], 5378 expressions=expressions, 5379 nested=nested, 5380 prefix=prefix, 5381 ) 5382 5383 # Empty arrays/structs are allowed 5384 if values is not None: 5385 cls = exp.Struct if is_struct else exp.Array 5386 this = exp.cast(cls(expressions=values), this, copy=False) 5387 5388 elif expressions: 5389 this.set("expressions", expressions) 5390 5391 # https://materialize.com/docs/sql/types/list/#type-name 5392 while self._match(TokenType.LIST): 5393 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5394 5395 index = self._index 5396 5397 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5398 matched_array = self._match(TokenType.ARRAY) 5399 5400 while self._curr: 5401 datatype_token = self._prev.token_type 5402 matched_l_bracket = self._match(TokenType.L_BRACKET) 5403 5404 if (not matched_l_bracket and not matched_array) or ( 5405 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5406 ): 5407 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5408 # not to be confused with the fixed size array parsing 5409 break 5410 5411 matched_array = False 5412 values = self._parse_csv(self._parse_assignment) or None 5413 if ( 5414 values 5415 and not schema 5416 and ( 5417 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5418 ) 5419 ): 5420 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5421 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5422 self._retreat(index) 5423 break 5424 5425 this = exp.DataType( 5426 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5427 ) 5428 self._match(TokenType.R_BRACKET) 5429 5430 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5431 converter = self.TYPE_CONVERTERS.get(this.this) 5432 if converter: 5433 this = converter(t.cast(exp.DataType, this)) 5434 5435 return this 5436 5437 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5438 index = self._index 5439 5440 if ( 5441 self._curr 5442 and self._next 5443 and self._curr.token_type in self.TYPE_TOKENS 5444 and self._next.token_type in self.TYPE_TOKENS 5445 ): 5446 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5447 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5448 this = self._parse_id_var() 5449 else: 5450 this = ( 5451 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5452 or self._parse_id_var() 5453 ) 5454 5455 self._match(TokenType.COLON) 5456 5457 if ( 5458 type_required 5459 and not isinstance(this, exp.DataType) 5460 and not self._match_set(self.TYPE_TOKENS, advance=False) 5461 ): 5462 self._retreat(index) 5463 return self._parse_types() 5464 5465 return self._parse_column_def(this) 5466 5467 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5468 if not self._match_text_seq("AT", "TIME", "ZONE"): 5469 return this 5470 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5471 5472 def _parse_column(self) -> t.Optional[exp.Expression]: 5473 this = self._parse_column_reference() 5474 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5475 5476 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5477 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5478 5479 return column 5480 5481 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5482 this = self._parse_field() 5483 if ( 5484 not this 5485 and self._match(TokenType.VALUES, advance=False) 5486 and self.VALUES_FOLLOWED_BY_PAREN 5487 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5488 ): 5489 this = self._parse_id_var() 5490 5491 if isinstance(this, exp.Identifier): 5492 # We bubble up comments from the Identifier to the Column 5493 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5494 5495 return this 5496 5497 def _parse_colon_as_variant_extract( 5498 self, this: t.Optional[exp.Expression] 5499 ) -> t.Optional[exp.Expression]: 5500 casts = [] 5501 json_path = [] 5502 escape = None 5503 5504 while self._match(TokenType.COLON): 5505 start_index = self._index 5506 5507 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5508 path = self._parse_column_ops( 5509 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5510 ) 5511 5512 # The cast :: operator has a lower precedence than the extraction operator :, so 5513 # we rearrange the AST appropriately to avoid casting the JSON path 5514 while isinstance(path, exp.Cast): 5515 casts.append(path.to) 5516 path = path.this 5517 5518 if casts: 5519 dcolon_offset = next( 5520 i 5521 for i, t in enumerate(self._tokens[start_index:]) 5522 if t.token_type == TokenType.DCOLON 5523 ) 5524 end_token = self._tokens[start_index + dcolon_offset - 1] 5525 else: 5526 end_token = self._prev 5527 5528 if path: 5529 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5530 # it'll roundtrip to a string literal in GET_PATH 5531 if isinstance(path, exp.Identifier) and path.quoted: 5532 escape = True 5533 5534 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5535 5536 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5537 # Databricks transforms it back to the colon/dot notation 5538 if json_path: 5539 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5540 5541 if json_path_expr: 5542 json_path_expr.set("escape", escape) 5543 5544 this = self.expression( 5545 exp.JSONExtract, 5546 this=this, 5547 expression=json_path_expr, 5548 variant_extract=True, 5549 ) 5550 5551 while casts: 5552 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5553 5554 return this 5555 5556 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5557 return self._parse_types() 5558 5559 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5560 this = self._parse_bracket(this) 5561 5562 while self._match_set(self.COLUMN_OPERATORS): 5563 op_token = self._prev.token_type 5564 op = self.COLUMN_OPERATORS.get(op_token) 5565 5566 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5567 field = self._parse_dcolon() 5568 if not field: 5569 self.raise_error("Expected type") 5570 elif op and self._curr: 5571 field = self._parse_column_reference() or self._parse_bracket() 5572 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5573 field = self._parse_column_ops(field) 5574 else: 5575 field = self._parse_field(any_token=True, anonymous_func=True) 5576 5577 # Function calls can be qualified, e.g., x.y.FOO() 5578 # This converts the final AST to a series of Dots leading to the function call 5579 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5580 if isinstance(field, (exp.Func, exp.Window)) and this: 5581 this = this.transform( 5582 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5583 ) 5584 5585 if op: 5586 this = op(self, this, field) 5587 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5588 this = self.expression( 5589 exp.Column, 5590 comments=this.comments, 5591 this=field, 5592 table=this.this, 5593 db=this.args.get("table"), 5594 catalog=this.args.get("db"), 5595 ) 5596 elif isinstance(field, exp.Window): 5597 # Move the exp.Dot's to the window's function 5598 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5599 field.set("this", window_func) 5600 this = field 5601 else: 5602 this = self.expression(exp.Dot, this=this, expression=field) 5603 5604 if field and field.comments: 5605 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5606 5607 this = self._parse_bracket(this) 5608 5609 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5610 5611 def _parse_paren(self) -> t.Optional[exp.Expression]: 5612 if not self._match(TokenType.L_PAREN): 5613 return None 5614 5615 comments = self._prev_comments 5616 query = self._parse_select() 5617 5618 if query: 5619 expressions = [query] 5620 else: 5621 expressions = self._parse_expressions() 5622 5623 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5624 5625 if not this and self._match(TokenType.R_PAREN, advance=False): 5626 this = self.expression(exp.Tuple) 5627 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5628 this = self._parse_subquery(this=this, parse_alias=False) 5629 elif isinstance(this, exp.Subquery): 5630 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5631 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5632 this = self.expression(exp.Tuple, expressions=expressions) 5633 else: 5634 this = self.expression(exp.Paren, this=this) 5635 5636 if this: 5637 this.add_comments(comments) 5638 5639 self._match_r_paren(expression=this) 5640 return this 5641 5642 def _parse_primary(self) -> t.Optional[exp.Expression]: 5643 if self._match_set(self.PRIMARY_PARSERS): 5644 token_type = self._prev.token_type 5645 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5646 5647 if token_type == TokenType.STRING: 5648 expressions = [primary] 5649 while self._match(TokenType.STRING): 5650 expressions.append(exp.Literal.string(self._prev.text)) 5651 5652 if len(expressions) > 1: 5653 return self.expression(exp.Concat, expressions=expressions) 5654 5655 return primary 5656 5657 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5658 return exp.Literal.number(f"0.{self._prev.text}") 5659 5660 return self._parse_paren() 5661 5662 def _parse_field( 5663 self, 5664 any_token: bool = False, 5665 tokens: t.Optional[t.Collection[TokenType]] = None, 5666 anonymous_func: bool = False, 5667 ) -> t.Optional[exp.Expression]: 5668 if anonymous_func: 5669 field = ( 5670 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5671 or self._parse_primary() 5672 ) 5673 else: 5674 field = self._parse_primary() or self._parse_function( 5675 anonymous=anonymous_func, any_token=any_token 5676 ) 5677 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5678 5679 def _parse_function( 5680 self, 5681 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5682 anonymous: bool = False, 5683 optional_parens: bool = True, 5684 any_token: bool = False, 5685 ) -> t.Optional[exp.Expression]: 5686 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5687 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5688 fn_syntax = False 5689 if ( 5690 self._match(TokenType.L_BRACE, advance=False) 5691 and self._next 5692 and self._next.text.upper() == "FN" 5693 ): 5694 self._advance(2) 5695 fn_syntax = True 5696 5697 func = self._parse_function_call( 5698 functions=functions, 5699 anonymous=anonymous, 5700 optional_parens=optional_parens, 5701 any_token=any_token, 5702 ) 5703 5704 if fn_syntax: 5705 self._match(TokenType.R_BRACE) 5706 5707 return func 5708 5709 def _parse_function_call( 5710 self, 5711 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5712 anonymous: bool = False, 5713 optional_parens: bool = True, 5714 any_token: bool = False, 5715 ) -> t.Optional[exp.Expression]: 5716 if not self._curr: 5717 return None 5718 5719 comments = self._curr.comments 5720 token = self._curr 5721 token_type = self._curr.token_type 5722 this = self._curr.text 5723 upper = this.upper() 5724 5725 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5726 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5727 self._advance() 5728 return self._parse_window(parser(self)) 5729 5730 if not self._next or self._next.token_type != TokenType.L_PAREN: 5731 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5732 self._advance() 5733 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5734 5735 return None 5736 5737 if any_token: 5738 if token_type in self.RESERVED_TOKENS: 5739 return None 5740 elif token_type not in self.FUNC_TOKENS: 5741 return None 5742 5743 self._advance(2) 5744 5745 parser = self.FUNCTION_PARSERS.get(upper) 5746 if parser and not anonymous: 5747 this = parser(self) 5748 else: 5749 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5750 5751 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5752 this = self.expression( 5753 subquery_predicate, comments=comments, this=self._parse_select() 5754 ) 5755 self._match_r_paren() 5756 return this 5757 5758 if functions is None: 5759 functions = self.FUNCTIONS 5760 5761 function = functions.get(upper) 5762 known_function = function and not anonymous 5763 5764 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5765 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5766 5767 post_func_comments = self._curr and self._curr.comments 5768 if known_function and post_func_comments: 5769 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5770 # call we'll construct it as exp.Anonymous, even if it's "known" 5771 if any( 5772 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5773 for comment in post_func_comments 5774 ): 5775 known_function = False 5776 5777 if alias and known_function: 5778 args = self._kv_to_prop_eq(args) 5779 5780 if known_function: 5781 func_builder = t.cast(t.Callable, function) 5782 5783 if "dialect" in func_builder.__code__.co_varnames: 5784 func = func_builder(args, dialect=self.dialect) 5785 else: 5786 func = func_builder(args) 5787 5788 func = self.validate_expression(func, args) 5789 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5790 func.meta["name"] = this 5791 5792 this = func 5793 else: 5794 if token_type == TokenType.IDENTIFIER: 5795 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5796 5797 this = self.expression(exp.Anonymous, this=this, expressions=args) 5798 this = this.update_positions(token) 5799 5800 if isinstance(this, exp.Expression): 5801 this.add_comments(comments) 5802 5803 self._match_r_paren(this) 5804 return self._parse_window(this) 5805 5806 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5807 return expression 5808 5809 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5810 transformed = [] 5811 5812 for index, e in enumerate(expressions): 5813 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5814 if isinstance(e, exp.Alias): 5815 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5816 5817 if not isinstance(e, exp.PropertyEQ): 5818 e = self.expression( 5819 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5820 ) 5821 5822 if isinstance(e.this, exp.Column): 5823 e.this.replace(e.this.this) 5824 else: 5825 e = self._to_prop_eq(e, index) 5826 5827 transformed.append(e) 5828 5829 return transformed 5830 5831 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5832 return self._parse_statement() 5833 5834 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5835 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5836 5837 def _parse_user_defined_function( 5838 self, kind: t.Optional[TokenType] = None 5839 ) -> t.Optional[exp.Expression]: 5840 this = self._parse_table_parts(schema=True) 5841 5842 if not self._match(TokenType.L_PAREN): 5843 return this 5844 5845 expressions = self._parse_csv(self._parse_function_parameter) 5846 self._match_r_paren() 5847 return self.expression( 5848 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5849 ) 5850 5851 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5852 literal = self._parse_primary() 5853 if literal: 5854 return self.expression(exp.Introducer, this=token.text, expression=literal) 5855 5856 return self._identifier_expression(token) 5857 5858 def _parse_session_parameter(self) -> exp.SessionParameter: 5859 kind = None 5860 this = self._parse_id_var() or self._parse_primary() 5861 5862 if this and self._match(TokenType.DOT): 5863 kind = this.name 5864 this = self._parse_var() or self._parse_primary() 5865 5866 return self.expression(exp.SessionParameter, this=this, kind=kind) 5867 5868 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5869 return self._parse_id_var() 5870 5871 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5872 index = self._index 5873 5874 if self._match(TokenType.L_PAREN): 5875 expressions = t.cast( 5876 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5877 ) 5878 5879 if not self._match(TokenType.R_PAREN): 5880 self._retreat(index) 5881 else: 5882 expressions = [self._parse_lambda_arg()] 5883 5884 if self._match_set(self.LAMBDAS): 5885 return self.LAMBDAS[self._prev.token_type](self, expressions) 5886 5887 self._retreat(index) 5888 5889 this: t.Optional[exp.Expression] 5890 5891 if self._match(TokenType.DISTINCT): 5892 this = self.expression( 5893 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5894 ) 5895 else: 5896 this = self._parse_select_or_expression(alias=alias) 5897 5898 return self._parse_limit( 5899 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5900 ) 5901 5902 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5903 index = self._index 5904 if not self._match(TokenType.L_PAREN): 5905 return this 5906 5907 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5908 # expr can be of both types 5909 if self._match_set(self.SELECT_START_TOKENS): 5910 self._retreat(index) 5911 return this 5912 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5913 self._match_r_paren() 5914 return self.expression(exp.Schema, this=this, expressions=args) 5915 5916 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5917 return self._parse_column_def(self._parse_field(any_token=True)) 5918 5919 def _parse_column_def( 5920 self, this: t.Optional[exp.Expression], computed_column: bool = True 5921 ) -> t.Optional[exp.Expression]: 5922 # column defs are not really columns, they're identifiers 5923 if isinstance(this, exp.Column): 5924 this = this.this 5925 5926 if not computed_column: 5927 self._match(TokenType.ALIAS) 5928 5929 kind = self._parse_types(schema=True) 5930 5931 if self._match_text_seq("FOR", "ORDINALITY"): 5932 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5933 5934 constraints: t.List[exp.Expression] = [] 5935 5936 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5937 ("ALIAS", "MATERIALIZED") 5938 ): 5939 persisted = self._prev.text.upper() == "MATERIALIZED" 5940 constraint_kind = exp.ComputedColumnConstraint( 5941 this=self._parse_assignment(), 5942 persisted=persisted or self._match_text_seq("PERSISTED"), 5943 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5944 ) 5945 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5946 elif ( 5947 kind 5948 and self._match(TokenType.ALIAS, advance=False) 5949 and ( 5950 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5951 or (self._next and self._next.token_type == TokenType.L_PAREN) 5952 ) 5953 ): 5954 self._advance() 5955 constraints.append( 5956 self.expression( 5957 exp.ColumnConstraint, 5958 kind=exp.ComputedColumnConstraint( 5959 this=self._parse_disjunction(), 5960 persisted=self._match_texts(("STORED", "VIRTUAL")) 5961 and self._prev.text.upper() == "STORED", 5962 ), 5963 ) 5964 ) 5965 5966 while True: 5967 constraint = self._parse_column_constraint() 5968 if not constraint: 5969 break 5970 constraints.append(constraint) 5971 5972 if not kind and not constraints: 5973 return this 5974 5975 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5976 5977 def _parse_auto_increment( 5978 self, 5979 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5980 start = None 5981 increment = None 5982 order = None 5983 5984 if self._match(TokenType.L_PAREN, advance=False): 5985 args = self._parse_wrapped_csv(self._parse_bitwise) 5986 start = seq_get(args, 0) 5987 increment = seq_get(args, 1) 5988 elif self._match_text_seq("START"): 5989 start = self._parse_bitwise() 5990 self._match_text_seq("INCREMENT") 5991 increment = self._parse_bitwise() 5992 if self._match_text_seq("ORDER"): 5993 order = True 5994 elif self._match_text_seq("NOORDER"): 5995 order = False 5996 5997 if start and increment: 5998 return exp.GeneratedAsIdentityColumnConstraint( 5999 start=start, increment=increment, this=False, order=order 6000 ) 6001 6002 return exp.AutoIncrementColumnConstraint() 6003 6004 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6005 if not self._match_text_seq("REFRESH"): 6006 self._retreat(self._index - 1) 6007 return None 6008 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6009 6010 def _parse_compress(self) -> exp.CompressColumnConstraint: 6011 if self._match(TokenType.L_PAREN, advance=False): 6012 return self.expression( 6013 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6014 ) 6015 6016 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6017 6018 def _parse_generated_as_identity( 6019 self, 6020 ) -> ( 6021 exp.GeneratedAsIdentityColumnConstraint 6022 | exp.ComputedColumnConstraint 6023 | exp.GeneratedAsRowColumnConstraint 6024 ): 6025 if self._match_text_seq("BY", "DEFAULT"): 6026 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6027 this = self.expression( 6028 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6029 ) 6030 else: 6031 self._match_text_seq("ALWAYS") 6032 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6033 6034 self._match(TokenType.ALIAS) 6035 6036 if self._match_text_seq("ROW"): 6037 start = self._match_text_seq("START") 6038 if not start: 6039 self._match(TokenType.END) 6040 hidden = self._match_text_seq("HIDDEN") 6041 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6042 6043 identity = self._match_text_seq("IDENTITY") 6044 6045 if self._match(TokenType.L_PAREN): 6046 if self._match(TokenType.START_WITH): 6047 this.set("start", self._parse_bitwise()) 6048 if self._match_text_seq("INCREMENT", "BY"): 6049 this.set("increment", self._parse_bitwise()) 6050 if self._match_text_seq("MINVALUE"): 6051 this.set("minvalue", self._parse_bitwise()) 6052 if self._match_text_seq("MAXVALUE"): 6053 this.set("maxvalue", self._parse_bitwise()) 6054 6055 if self._match_text_seq("CYCLE"): 6056 this.set("cycle", True) 6057 elif self._match_text_seq("NO", "CYCLE"): 6058 this.set("cycle", False) 6059 6060 if not identity: 6061 this.set("expression", self._parse_range()) 6062 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6063 args = self._parse_csv(self._parse_bitwise) 6064 this.set("start", seq_get(args, 0)) 6065 this.set("increment", seq_get(args, 1)) 6066 6067 self._match_r_paren() 6068 6069 return this 6070 6071 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6072 self._match_text_seq("LENGTH") 6073 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6074 6075 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6076 if self._match_text_seq("NULL"): 6077 return self.expression(exp.NotNullColumnConstraint) 6078 if self._match_text_seq("CASESPECIFIC"): 6079 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6080 if self._match_text_seq("FOR", "REPLICATION"): 6081 return self.expression(exp.NotForReplicationColumnConstraint) 6082 6083 # Unconsume the `NOT` token 6084 self._retreat(self._index - 1) 6085 return None 6086 6087 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6088 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6089 6090 procedure_option_follows = ( 6091 self._match(TokenType.WITH, advance=False) 6092 and self._next 6093 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6094 ) 6095 6096 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6097 return self.expression( 6098 exp.ColumnConstraint, 6099 this=this, 6100 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6101 ) 6102 6103 return this 6104 6105 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6106 if not self._match(TokenType.CONSTRAINT): 6107 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6108 6109 return self.expression( 6110 exp.Constraint, 6111 this=self._parse_id_var(), 6112 expressions=self._parse_unnamed_constraints(), 6113 ) 6114 6115 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6116 constraints = [] 6117 while True: 6118 constraint = self._parse_unnamed_constraint() or self._parse_function() 6119 if not constraint: 6120 break 6121 constraints.append(constraint) 6122 6123 return constraints 6124 6125 def _parse_unnamed_constraint( 6126 self, constraints: t.Optional[t.Collection[str]] = None 6127 ) -> t.Optional[exp.Expression]: 6128 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6129 constraints or self.CONSTRAINT_PARSERS 6130 ): 6131 return None 6132 6133 constraint = self._prev.text.upper() 6134 if constraint not in self.CONSTRAINT_PARSERS: 6135 self.raise_error(f"No parser found for schema constraint {constraint}.") 6136 6137 return self.CONSTRAINT_PARSERS[constraint](self) 6138 6139 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6140 return self._parse_id_var(any_token=False) 6141 6142 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6143 self._match_text_seq("KEY") 6144 return self.expression( 6145 exp.UniqueColumnConstraint, 6146 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6147 this=self._parse_schema(self._parse_unique_key()), 6148 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6149 on_conflict=self._parse_on_conflict(), 6150 options=self._parse_key_constraint_options(), 6151 ) 6152 6153 def _parse_key_constraint_options(self) -> t.List[str]: 6154 options = [] 6155 while True: 6156 if not self._curr: 6157 break 6158 6159 if self._match(TokenType.ON): 6160 action = None 6161 on = self._advance_any() and self._prev.text 6162 6163 if self._match_text_seq("NO", "ACTION"): 6164 action = "NO ACTION" 6165 elif self._match_text_seq("CASCADE"): 6166 action = "CASCADE" 6167 elif self._match_text_seq("RESTRICT"): 6168 action = "RESTRICT" 6169 elif self._match_pair(TokenType.SET, TokenType.NULL): 6170 action = "SET NULL" 6171 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6172 action = "SET DEFAULT" 6173 else: 6174 self.raise_error("Invalid key constraint") 6175 6176 options.append(f"ON {on} {action}") 6177 else: 6178 var = self._parse_var_from_options( 6179 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6180 ) 6181 if not var: 6182 break 6183 options.append(var.name) 6184 6185 return options 6186 6187 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6188 if match and not self._match(TokenType.REFERENCES): 6189 return None 6190 6191 expressions = None 6192 this = self._parse_table(schema=True) 6193 options = self._parse_key_constraint_options() 6194 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6195 6196 def _parse_foreign_key(self) -> exp.ForeignKey: 6197 expressions = ( 6198 self._parse_wrapped_id_vars() 6199 if not self._match(TokenType.REFERENCES, advance=False) 6200 else None 6201 ) 6202 reference = self._parse_references() 6203 on_options = {} 6204 6205 while self._match(TokenType.ON): 6206 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6207 self.raise_error("Expected DELETE or UPDATE") 6208 6209 kind = self._prev.text.lower() 6210 6211 if self._match_text_seq("NO", "ACTION"): 6212 action = "NO ACTION" 6213 elif self._match(TokenType.SET): 6214 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6215 action = "SET " + self._prev.text.upper() 6216 else: 6217 self._advance() 6218 action = self._prev.text.upper() 6219 6220 on_options[kind] = action 6221 6222 return self.expression( 6223 exp.ForeignKey, 6224 expressions=expressions, 6225 reference=reference, 6226 options=self._parse_key_constraint_options(), 6227 **on_options, # type: ignore 6228 ) 6229 6230 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6231 return self._parse_ordered() or self._parse_field() 6232 6233 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6234 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6235 self._retreat(self._index - 1) 6236 return None 6237 6238 id_vars = self._parse_wrapped_id_vars() 6239 return self.expression( 6240 exp.PeriodForSystemTimeConstraint, 6241 this=seq_get(id_vars, 0), 6242 expression=seq_get(id_vars, 1), 6243 ) 6244 6245 def _parse_primary_key( 6246 self, wrapped_optional: bool = False, in_props: bool = False 6247 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6248 desc = ( 6249 self._match_set((TokenType.ASC, TokenType.DESC)) 6250 and self._prev.token_type == TokenType.DESC 6251 ) 6252 6253 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6254 return self.expression( 6255 exp.PrimaryKeyColumnConstraint, 6256 desc=desc, 6257 options=self._parse_key_constraint_options(), 6258 ) 6259 6260 expressions = self._parse_wrapped_csv( 6261 self._parse_primary_key_part, optional=wrapped_optional 6262 ) 6263 options = self._parse_key_constraint_options() 6264 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6265 6266 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6267 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6268 6269 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6270 """ 6271 Parses a datetime column in ODBC format. We parse the column into the corresponding 6272 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6273 same as we did for `DATE('yyyy-mm-dd')`. 6274 6275 Reference: 6276 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6277 """ 6278 self._match(TokenType.VAR) 6279 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6280 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6281 if not self._match(TokenType.R_BRACE): 6282 self.raise_error("Expected }") 6283 return expression 6284 6285 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6286 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6287 return this 6288 6289 bracket_kind = self._prev.token_type 6290 if ( 6291 bracket_kind == TokenType.L_BRACE 6292 and self._curr 6293 and self._curr.token_type == TokenType.VAR 6294 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6295 ): 6296 return self._parse_odbc_datetime_literal() 6297 6298 expressions = self._parse_csv( 6299 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6300 ) 6301 6302 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6303 self.raise_error("Expected ]") 6304 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6305 self.raise_error("Expected }") 6306 6307 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6308 if bracket_kind == TokenType.L_BRACE: 6309 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6310 elif not this: 6311 this = build_array_constructor( 6312 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6313 ) 6314 else: 6315 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6316 if constructor_type: 6317 return build_array_constructor( 6318 constructor_type, 6319 args=expressions, 6320 bracket_kind=bracket_kind, 6321 dialect=self.dialect, 6322 ) 6323 6324 expressions = apply_index_offset( 6325 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6326 ) 6327 this = self.expression( 6328 exp.Bracket, 6329 this=this, 6330 expressions=expressions, 6331 comments=this.pop_comments(), 6332 ) 6333 6334 self._add_comments(this) 6335 return self._parse_bracket(this) 6336 6337 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6338 if self._match(TokenType.COLON): 6339 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6340 return this 6341 6342 def _parse_case(self) -> t.Optional[exp.Expression]: 6343 ifs = [] 6344 default = None 6345 6346 comments = self._prev_comments 6347 expression = self._parse_assignment() 6348 6349 while self._match(TokenType.WHEN): 6350 this = self._parse_assignment() 6351 self._match(TokenType.THEN) 6352 then = self._parse_assignment() 6353 ifs.append(self.expression(exp.If, this=this, true=then)) 6354 6355 if self._match(TokenType.ELSE): 6356 default = self._parse_assignment() 6357 6358 if not self._match(TokenType.END): 6359 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6360 default = exp.column("interval") 6361 else: 6362 self.raise_error("Expected END after CASE", self._prev) 6363 6364 return self.expression( 6365 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6366 ) 6367 6368 def _parse_if(self) -> t.Optional[exp.Expression]: 6369 if self._match(TokenType.L_PAREN): 6370 args = self._parse_csv( 6371 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6372 ) 6373 this = self.validate_expression(exp.If.from_arg_list(args), args) 6374 self._match_r_paren() 6375 else: 6376 index = self._index - 1 6377 6378 if self.NO_PAREN_IF_COMMANDS and index == 0: 6379 return self._parse_as_command(self._prev) 6380 6381 condition = self._parse_assignment() 6382 6383 if not condition: 6384 self._retreat(index) 6385 return None 6386 6387 self._match(TokenType.THEN) 6388 true = self._parse_assignment() 6389 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6390 self._match(TokenType.END) 6391 this = self.expression(exp.If, this=condition, true=true, false=false) 6392 6393 return this 6394 6395 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6396 if not self._match_text_seq("VALUE", "FOR"): 6397 self._retreat(self._index - 1) 6398 return None 6399 6400 return self.expression( 6401 exp.NextValueFor, 6402 this=self._parse_column(), 6403 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6404 ) 6405 6406 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6407 this = self._parse_function() or self._parse_var_or_string(upper=True) 6408 6409 if self._match(TokenType.FROM): 6410 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6411 6412 if not self._match(TokenType.COMMA): 6413 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6414 6415 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6416 6417 def _parse_gap_fill(self) -> exp.GapFill: 6418 self._match(TokenType.TABLE) 6419 this = self._parse_table() 6420 6421 self._match(TokenType.COMMA) 6422 args = [this, *self._parse_csv(self._parse_lambda)] 6423 6424 gap_fill = exp.GapFill.from_arg_list(args) 6425 return self.validate_expression(gap_fill, args) 6426 6427 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6428 this = self._parse_assignment() 6429 6430 if not self._match(TokenType.ALIAS): 6431 if self._match(TokenType.COMMA): 6432 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6433 6434 self.raise_error("Expected AS after CAST") 6435 6436 fmt = None 6437 to = self._parse_types() 6438 6439 default = self._match(TokenType.DEFAULT) 6440 if default: 6441 default = self._parse_bitwise() 6442 self._match_text_seq("ON", "CONVERSION", "ERROR") 6443 6444 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6445 fmt_string = self._parse_string() 6446 fmt = self._parse_at_time_zone(fmt_string) 6447 6448 if not to: 6449 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6450 if to.this in exp.DataType.TEMPORAL_TYPES: 6451 this = self.expression( 6452 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6453 this=this, 6454 format=exp.Literal.string( 6455 format_time( 6456 fmt_string.this if fmt_string else "", 6457 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6458 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6459 ) 6460 ), 6461 safe=safe, 6462 ) 6463 6464 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6465 this.set("zone", fmt.args["zone"]) 6466 return this 6467 elif not to: 6468 self.raise_error("Expected TYPE after CAST") 6469 elif isinstance(to, exp.Identifier): 6470 to = exp.DataType.build(to.name, udt=True) 6471 elif to.this == exp.DataType.Type.CHAR: 6472 if self._match(TokenType.CHARACTER_SET): 6473 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6474 6475 return self.expression( 6476 exp.Cast if strict else exp.TryCast, 6477 this=this, 6478 to=to, 6479 format=fmt, 6480 safe=safe, 6481 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6482 default=default, 6483 ) 6484 6485 def _parse_string_agg(self) -> exp.GroupConcat: 6486 if self._match(TokenType.DISTINCT): 6487 args: t.List[t.Optional[exp.Expression]] = [ 6488 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6489 ] 6490 if self._match(TokenType.COMMA): 6491 args.extend(self._parse_csv(self._parse_assignment)) 6492 else: 6493 args = self._parse_csv(self._parse_assignment) # type: ignore 6494 6495 if self._match_text_seq("ON", "OVERFLOW"): 6496 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6497 if self._match_text_seq("ERROR"): 6498 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6499 else: 6500 self._match_text_seq("TRUNCATE") 6501 on_overflow = self.expression( 6502 exp.OverflowTruncateBehavior, 6503 this=self._parse_string(), 6504 with_count=( 6505 self._match_text_seq("WITH", "COUNT") 6506 or not self._match_text_seq("WITHOUT", "COUNT") 6507 ), 6508 ) 6509 else: 6510 on_overflow = None 6511 6512 index = self._index 6513 if not self._match(TokenType.R_PAREN) and args: 6514 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6515 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6516 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6517 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6518 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6519 6520 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6521 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6522 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6523 if not self._match_text_seq("WITHIN", "GROUP"): 6524 self._retreat(index) 6525 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6526 6527 # The corresponding match_r_paren will be called in parse_function (caller) 6528 self._match_l_paren() 6529 6530 return self.expression( 6531 exp.GroupConcat, 6532 this=self._parse_order(this=seq_get(args, 0)), 6533 separator=seq_get(args, 1), 6534 on_overflow=on_overflow, 6535 ) 6536 6537 def _parse_convert( 6538 self, strict: bool, safe: t.Optional[bool] = None 6539 ) -> t.Optional[exp.Expression]: 6540 this = self._parse_bitwise() 6541 6542 if self._match(TokenType.USING): 6543 to: t.Optional[exp.Expression] = self.expression( 6544 exp.CharacterSet, this=self._parse_var() 6545 ) 6546 elif self._match(TokenType.COMMA): 6547 to = self._parse_types() 6548 else: 6549 to = None 6550 6551 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6552 6553 def _parse_xml_table(self) -> exp.XMLTable: 6554 namespaces = None 6555 passing = None 6556 columns = None 6557 6558 if self._match_text_seq("XMLNAMESPACES", "("): 6559 namespaces = self._parse_xml_namespace() 6560 self._match_text_seq(")", ",") 6561 6562 this = self._parse_string() 6563 6564 if self._match_text_seq("PASSING"): 6565 # The BY VALUE keywords are optional and are provided for semantic clarity 6566 self._match_text_seq("BY", "VALUE") 6567 passing = self._parse_csv(self._parse_column) 6568 6569 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6570 6571 if self._match_text_seq("COLUMNS"): 6572 columns = self._parse_csv(self._parse_field_def) 6573 6574 return self.expression( 6575 exp.XMLTable, 6576 this=this, 6577 namespaces=namespaces, 6578 passing=passing, 6579 columns=columns, 6580 by_ref=by_ref, 6581 ) 6582 6583 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6584 namespaces = [] 6585 6586 while True: 6587 if self._match(TokenType.DEFAULT): 6588 uri = self._parse_string() 6589 else: 6590 uri = self._parse_alias(self._parse_string()) 6591 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6592 if not self._match(TokenType.COMMA): 6593 break 6594 6595 return namespaces 6596 6597 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6598 """ 6599 There are generally two variants of the DECODE function: 6600 6601 - DECODE(bin, charset) 6602 - DECODE(expression, search, result [, search, result] ... [, default]) 6603 6604 The second variant will always be parsed into a CASE expression. Note that NULL 6605 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6606 instead of relying on pattern matching. 6607 """ 6608 args = self._parse_csv(self._parse_assignment) 6609 6610 if len(args) < 3: 6611 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6612 6613 expression, *expressions = args 6614 if not expression: 6615 return None 6616 6617 ifs = [] 6618 for search, result in zip(expressions[::2], expressions[1::2]): 6619 if not search or not result: 6620 return None 6621 6622 if isinstance(search, exp.Literal): 6623 ifs.append( 6624 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6625 ) 6626 elif isinstance(search, exp.Null): 6627 ifs.append( 6628 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6629 ) 6630 else: 6631 cond = exp.or_( 6632 exp.EQ(this=expression.copy(), expression=search), 6633 exp.and_( 6634 exp.Is(this=expression.copy(), expression=exp.Null()), 6635 exp.Is(this=search.copy(), expression=exp.Null()), 6636 copy=False, 6637 ), 6638 copy=False, 6639 ) 6640 ifs.append(exp.If(this=cond, true=result)) 6641 6642 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6643 6644 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6645 self._match_text_seq("KEY") 6646 key = self._parse_column() 6647 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6648 self._match_text_seq("VALUE") 6649 value = self._parse_bitwise() 6650 6651 if not key and not value: 6652 return None 6653 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6654 6655 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6656 if not this or not self._match_text_seq("FORMAT", "JSON"): 6657 return this 6658 6659 return self.expression(exp.FormatJson, this=this) 6660 6661 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6662 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6663 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6664 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6665 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6666 else: 6667 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6668 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6669 6670 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6671 6672 if not empty and not error and not null: 6673 return None 6674 6675 return self.expression( 6676 exp.OnCondition, 6677 empty=empty, 6678 error=error, 6679 null=null, 6680 ) 6681 6682 def _parse_on_handling( 6683 self, on: str, *values: str 6684 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6685 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6686 for value in values: 6687 if self._match_text_seq(value, "ON", on): 6688 return f"{value} ON {on}" 6689 6690 index = self._index 6691 if self._match(TokenType.DEFAULT): 6692 default_value = self._parse_bitwise() 6693 if self._match_text_seq("ON", on): 6694 return default_value 6695 6696 self._retreat(index) 6697 6698 return None 6699 6700 @t.overload 6701 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6702 6703 @t.overload 6704 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6705 6706 def _parse_json_object(self, agg=False): 6707 star = self._parse_star() 6708 expressions = ( 6709 [star] 6710 if star 6711 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6712 ) 6713 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6714 6715 unique_keys = None 6716 if self._match_text_seq("WITH", "UNIQUE"): 6717 unique_keys = True 6718 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6719 unique_keys = False 6720 6721 self._match_text_seq("KEYS") 6722 6723 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6724 self._parse_type() 6725 ) 6726 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6727 6728 return self.expression( 6729 exp.JSONObjectAgg if agg else exp.JSONObject, 6730 expressions=expressions, 6731 null_handling=null_handling, 6732 unique_keys=unique_keys, 6733 return_type=return_type, 6734 encoding=encoding, 6735 ) 6736 6737 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6738 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6739 if not self._match_text_seq("NESTED"): 6740 this = self._parse_id_var() 6741 kind = self._parse_types(allow_identifiers=False) 6742 nested = None 6743 else: 6744 this = None 6745 kind = None 6746 nested = True 6747 6748 path = self._match_text_seq("PATH") and self._parse_string() 6749 nested_schema = nested and self._parse_json_schema() 6750 6751 return self.expression( 6752 exp.JSONColumnDef, 6753 this=this, 6754 kind=kind, 6755 path=path, 6756 nested_schema=nested_schema, 6757 ) 6758 6759 def _parse_json_schema(self) -> exp.JSONSchema: 6760 self._match_text_seq("COLUMNS") 6761 return self.expression( 6762 exp.JSONSchema, 6763 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6764 ) 6765 6766 def _parse_json_table(self) -> exp.JSONTable: 6767 this = self._parse_format_json(self._parse_bitwise()) 6768 path = self._match(TokenType.COMMA) and self._parse_string() 6769 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6770 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6771 schema = self._parse_json_schema() 6772 6773 return exp.JSONTable( 6774 this=this, 6775 schema=schema, 6776 path=path, 6777 error_handling=error_handling, 6778 empty_handling=empty_handling, 6779 ) 6780 6781 def _parse_match_against(self) -> exp.MatchAgainst: 6782 expressions = self._parse_csv(self._parse_column) 6783 6784 self._match_text_seq(")", "AGAINST", "(") 6785 6786 this = self._parse_string() 6787 6788 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6789 modifier = "IN NATURAL LANGUAGE MODE" 6790 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6791 modifier = f"{modifier} WITH QUERY EXPANSION" 6792 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6793 modifier = "IN BOOLEAN MODE" 6794 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6795 modifier = "WITH QUERY EXPANSION" 6796 else: 6797 modifier = None 6798 6799 return self.expression( 6800 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6801 ) 6802 6803 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6804 def _parse_open_json(self) -> exp.OpenJSON: 6805 this = self._parse_bitwise() 6806 path = self._match(TokenType.COMMA) and self._parse_string() 6807 6808 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6809 this = self._parse_field(any_token=True) 6810 kind = self._parse_types() 6811 path = self._parse_string() 6812 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6813 6814 return self.expression( 6815 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6816 ) 6817 6818 expressions = None 6819 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6820 self._match_l_paren() 6821 expressions = self._parse_csv(_parse_open_json_column_def) 6822 6823 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6824 6825 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6826 args = self._parse_csv(self._parse_bitwise) 6827 6828 if self._match(TokenType.IN): 6829 return self.expression( 6830 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6831 ) 6832 6833 if haystack_first: 6834 haystack = seq_get(args, 0) 6835 needle = seq_get(args, 1) 6836 else: 6837 haystack = seq_get(args, 1) 6838 needle = seq_get(args, 0) 6839 6840 return self.expression( 6841 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6842 ) 6843 6844 def _parse_predict(self) -> exp.Predict: 6845 self._match_text_seq("MODEL") 6846 this = self._parse_table() 6847 6848 self._match(TokenType.COMMA) 6849 self._match_text_seq("TABLE") 6850 6851 return self.expression( 6852 exp.Predict, 6853 this=this, 6854 expression=self._parse_table(), 6855 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6856 ) 6857 6858 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6859 args = self._parse_csv(self._parse_table) 6860 return exp.JoinHint(this=func_name.upper(), expressions=args) 6861 6862 def _parse_substring(self) -> exp.Substring: 6863 # Postgres supports the form: substring(string [from int] [for int]) 6864 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6865 6866 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6867 6868 if self._match(TokenType.FROM): 6869 args.append(self._parse_bitwise()) 6870 if self._match(TokenType.FOR): 6871 if len(args) == 1: 6872 args.append(exp.Literal.number(1)) 6873 args.append(self._parse_bitwise()) 6874 6875 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6876 6877 def _parse_trim(self) -> exp.Trim: 6878 # https://www.w3resource.com/sql/character-functions/trim.php 6879 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6880 6881 position = None 6882 collation = None 6883 expression = None 6884 6885 if self._match_texts(self.TRIM_TYPES): 6886 position = self._prev.text.upper() 6887 6888 this = self._parse_bitwise() 6889 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6890 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6891 expression = self._parse_bitwise() 6892 6893 if invert_order: 6894 this, expression = expression, this 6895 6896 if self._match(TokenType.COLLATE): 6897 collation = self._parse_bitwise() 6898 6899 return self.expression( 6900 exp.Trim, this=this, position=position, expression=expression, collation=collation 6901 ) 6902 6903 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6904 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6905 6906 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6907 return self._parse_window(self._parse_id_var(), alias=True) 6908 6909 def _parse_respect_or_ignore_nulls( 6910 self, this: t.Optional[exp.Expression] 6911 ) -> t.Optional[exp.Expression]: 6912 if self._match_text_seq("IGNORE", "NULLS"): 6913 return self.expression(exp.IgnoreNulls, this=this) 6914 if self._match_text_seq("RESPECT", "NULLS"): 6915 return self.expression(exp.RespectNulls, this=this) 6916 return this 6917 6918 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6919 if self._match(TokenType.HAVING): 6920 self._match_texts(("MAX", "MIN")) 6921 max = self._prev.text.upper() != "MIN" 6922 return self.expression( 6923 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6924 ) 6925 6926 return this 6927 6928 def _parse_window( 6929 self, this: t.Optional[exp.Expression], alias: bool = False 6930 ) -> t.Optional[exp.Expression]: 6931 func = this 6932 comments = func.comments if isinstance(func, exp.Expression) else None 6933 6934 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6935 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6936 if self._match_text_seq("WITHIN", "GROUP"): 6937 order = self._parse_wrapped(self._parse_order) 6938 this = self.expression(exp.WithinGroup, this=this, expression=order) 6939 6940 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6941 self._match(TokenType.WHERE) 6942 this = self.expression( 6943 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6944 ) 6945 self._match_r_paren() 6946 6947 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6948 # Some dialects choose to implement and some do not. 6949 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6950 6951 # There is some code above in _parse_lambda that handles 6952 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6953 6954 # The below changes handle 6955 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6956 6957 # Oracle allows both formats 6958 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6959 # and Snowflake chose to do the same for familiarity 6960 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6961 if isinstance(this, exp.AggFunc): 6962 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6963 6964 if ignore_respect and ignore_respect is not this: 6965 ignore_respect.replace(ignore_respect.this) 6966 this = self.expression(ignore_respect.__class__, this=this) 6967 6968 this = self._parse_respect_or_ignore_nulls(this) 6969 6970 # bigquery select from window x AS (partition by ...) 6971 if alias: 6972 over = None 6973 self._match(TokenType.ALIAS) 6974 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6975 return this 6976 else: 6977 over = self._prev.text.upper() 6978 6979 if comments and isinstance(func, exp.Expression): 6980 func.pop_comments() 6981 6982 if not self._match(TokenType.L_PAREN): 6983 return self.expression( 6984 exp.Window, 6985 comments=comments, 6986 this=this, 6987 alias=self._parse_id_var(False), 6988 over=over, 6989 ) 6990 6991 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6992 6993 first = self._match(TokenType.FIRST) 6994 if self._match_text_seq("LAST"): 6995 first = False 6996 6997 partition, order = self._parse_partition_and_order() 6998 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6999 7000 if kind: 7001 self._match(TokenType.BETWEEN) 7002 start = self._parse_window_spec() 7003 self._match(TokenType.AND) 7004 end = self._parse_window_spec() 7005 exclude = ( 7006 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7007 if self._match_text_seq("EXCLUDE") 7008 else None 7009 ) 7010 7011 spec = self.expression( 7012 exp.WindowSpec, 7013 kind=kind, 7014 start=start["value"], 7015 start_side=start["side"], 7016 end=end["value"], 7017 end_side=end["side"], 7018 exclude=exclude, 7019 ) 7020 else: 7021 spec = None 7022 7023 self._match_r_paren() 7024 7025 window = self.expression( 7026 exp.Window, 7027 comments=comments, 7028 this=this, 7029 partition_by=partition, 7030 order=order, 7031 spec=spec, 7032 alias=window_alias, 7033 over=over, 7034 first=first, 7035 ) 7036 7037 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7038 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7039 return self._parse_window(window, alias=alias) 7040 7041 return window 7042 7043 def _parse_partition_and_order( 7044 self, 7045 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7046 return self._parse_partition_by(), self._parse_order() 7047 7048 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7049 self._match(TokenType.BETWEEN) 7050 7051 return { 7052 "value": ( 7053 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7054 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7055 or self._parse_bitwise() 7056 ), 7057 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7058 } 7059 7060 def _parse_alias( 7061 self, this: t.Optional[exp.Expression], explicit: bool = False 7062 ) -> t.Optional[exp.Expression]: 7063 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7064 # so this section tries to parse the clause version and if it fails, it treats the token 7065 # as an identifier (alias) 7066 if self._can_parse_limit_or_offset(): 7067 return this 7068 7069 any_token = self._match(TokenType.ALIAS) 7070 comments = self._prev_comments or [] 7071 7072 if explicit and not any_token: 7073 return this 7074 7075 if self._match(TokenType.L_PAREN): 7076 aliases = self.expression( 7077 exp.Aliases, 7078 comments=comments, 7079 this=this, 7080 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7081 ) 7082 self._match_r_paren(aliases) 7083 return aliases 7084 7085 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7086 self.STRING_ALIASES and self._parse_string_as_identifier() 7087 ) 7088 7089 if alias: 7090 comments.extend(alias.pop_comments()) 7091 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7092 column = this.this 7093 7094 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7095 if not this.comments and column and column.comments: 7096 this.comments = column.pop_comments() 7097 7098 return this 7099 7100 def _parse_id_var( 7101 self, 7102 any_token: bool = True, 7103 tokens: t.Optional[t.Collection[TokenType]] = None, 7104 ) -> t.Optional[exp.Expression]: 7105 expression = self._parse_identifier() 7106 if not expression and ( 7107 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7108 ): 7109 quoted = self._prev.token_type == TokenType.STRING 7110 expression = self._identifier_expression(quoted=quoted) 7111 7112 return expression 7113 7114 def _parse_string(self) -> t.Optional[exp.Expression]: 7115 if self._match_set(self.STRING_PARSERS): 7116 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7117 return self._parse_placeholder() 7118 7119 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7120 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7121 if output: 7122 output.update_positions(self._prev) 7123 return output 7124 7125 def _parse_number(self) -> t.Optional[exp.Expression]: 7126 if self._match_set(self.NUMERIC_PARSERS): 7127 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7128 return self._parse_placeholder() 7129 7130 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7131 if self._match(TokenType.IDENTIFIER): 7132 return self._identifier_expression(quoted=True) 7133 return self._parse_placeholder() 7134 7135 def _parse_var( 7136 self, 7137 any_token: bool = False, 7138 tokens: t.Optional[t.Collection[TokenType]] = None, 7139 upper: bool = False, 7140 ) -> t.Optional[exp.Expression]: 7141 if ( 7142 (any_token and self._advance_any()) 7143 or self._match(TokenType.VAR) 7144 or (self._match_set(tokens) if tokens else False) 7145 ): 7146 return self.expression( 7147 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7148 ) 7149 return self._parse_placeholder() 7150 7151 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7152 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7153 self._advance() 7154 return self._prev 7155 return None 7156 7157 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7158 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7159 7160 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7161 return self._parse_primary() or self._parse_var(any_token=True) 7162 7163 def _parse_null(self) -> t.Optional[exp.Expression]: 7164 if self._match_set(self.NULL_TOKENS): 7165 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7166 return self._parse_placeholder() 7167 7168 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7169 if self._match(TokenType.TRUE): 7170 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7171 if self._match(TokenType.FALSE): 7172 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7173 return self._parse_placeholder() 7174 7175 def _parse_star(self) -> t.Optional[exp.Expression]: 7176 if self._match(TokenType.STAR): 7177 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7178 return self._parse_placeholder() 7179 7180 def _parse_parameter(self) -> exp.Parameter: 7181 this = self._parse_identifier() or self._parse_primary_or_var() 7182 return self.expression(exp.Parameter, this=this) 7183 7184 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7185 if self._match_set(self.PLACEHOLDER_PARSERS): 7186 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7187 if placeholder: 7188 return placeholder 7189 self._advance(-1) 7190 return None 7191 7192 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7193 if not self._match_texts(keywords): 7194 return None 7195 if self._match(TokenType.L_PAREN, advance=False): 7196 return self._parse_wrapped_csv(self._parse_expression) 7197 7198 expression = self._parse_expression() 7199 return [expression] if expression else None 7200 7201 def _parse_csv( 7202 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7203 ) -> t.List[exp.Expression]: 7204 parse_result = parse_method() 7205 items = [parse_result] if parse_result is not None else [] 7206 7207 while self._match(sep): 7208 self._add_comments(parse_result) 7209 parse_result = parse_method() 7210 if parse_result is not None: 7211 items.append(parse_result) 7212 7213 return items 7214 7215 def _parse_tokens( 7216 self, parse_method: t.Callable, expressions: t.Dict 7217 ) -> t.Optional[exp.Expression]: 7218 this = parse_method() 7219 7220 while self._match_set(expressions): 7221 this = self.expression( 7222 expressions[self._prev.token_type], 7223 this=this, 7224 comments=self._prev_comments, 7225 expression=parse_method(), 7226 ) 7227 7228 return this 7229 7230 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7231 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7232 7233 def _parse_wrapped_csv( 7234 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7235 ) -> t.List[exp.Expression]: 7236 return self._parse_wrapped( 7237 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7238 ) 7239 7240 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7241 wrapped = self._match(TokenType.L_PAREN) 7242 if not wrapped and not optional: 7243 self.raise_error("Expecting (") 7244 parse_result = parse_method() 7245 if wrapped: 7246 self._match_r_paren() 7247 return parse_result 7248 7249 def _parse_expressions(self) -> t.List[exp.Expression]: 7250 return self._parse_csv(self._parse_expression) 7251 7252 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7253 return self._parse_select() or self._parse_set_operations( 7254 self._parse_alias(self._parse_assignment(), explicit=True) 7255 if alias 7256 else self._parse_assignment() 7257 ) 7258 7259 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7260 return self._parse_query_modifiers( 7261 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7262 ) 7263 7264 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7265 this = None 7266 if self._match_texts(self.TRANSACTION_KIND): 7267 this = self._prev.text 7268 7269 self._match_texts(("TRANSACTION", "WORK")) 7270 7271 modes = [] 7272 while True: 7273 mode = [] 7274 while self._match(TokenType.VAR): 7275 mode.append(self._prev.text) 7276 7277 if mode: 7278 modes.append(" ".join(mode)) 7279 if not self._match(TokenType.COMMA): 7280 break 7281 7282 return self.expression(exp.Transaction, this=this, modes=modes) 7283 7284 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7285 chain = None 7286 savepoint = None 7287 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7288 7289 self._match_texts(("TRANSACTION", "WORK")) 7290 7291 if self._match_text_seq("TO"): 7292 self._match_text_seq("SAVEPOINT") 7293 savepoint = self._parse_id_var() 7294 7295 if self._match(TokenType.AND): 7296 chain = not self._match_text_seq("NO") 7297 self._match_text_seq("CHAIN") 7298 7299 if is_rollback: 7300 return self.expression(exp.Rollback, savepoint=savepoint) 7301 7302 return self.expression(exp.Commit, chain=chain) 7303 7304 def _parse_refresh(self) -> exp.Refresh: 7305 self._match(TokenType.TABLE) 7306 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7307 7308 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7309 if not self._prev.text.upper() == "ADD": 7310 return None 7311 7312 start = self._index 7313 self._match(TokenType.COLUMN) 7314 7315 exists_column = self._parse_exists(not_=True) 7316 expression = self._parse_field_def() 7317 7318 if not isinstance(expression, exp.ColumnDef): 7319 self._retreat(start) 7320 return None 7321 7322 expression.set("exists", exists_column) 7323 7324 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7325 if self._match_texts(("FIRST", "AFTER")): 7326 position = self._prev.text 7327 column_position = self.expression( 7328 exp.ColumnPosition, this=self._parse_column(), position=position 7329 ) 7330 expression.set("position", column_position) 7331 7332 return expression 7333 7334 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7335 drop = self._match(TokenType.DROP) and self._parse_drop() 7336 if drop and not isinstance(drop, exp.Command): 7337 drop.set("kind", drop.args.get("kind", "COLUMN")) 7338 return drop 7339 7340 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7341 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7342 return self.expression( 7343 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7344 ) 7345 7346 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7347 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7348 self._match_text_seq("ADD") 7349 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7350 return self.expression( 7351 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7352 ) 7353 7354 column_def = self._parse_add_column() 7355 if isinstance(column_def, exp.ColumnDef): 7356 return column_def 7357 7358 exists = self._parse_exists(not_=True) 7359 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7360 return self.expression( 7361 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7362 ) 7363 7364 return None 7365 7366 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7367 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7368 or self._match_text_seq("COLUMNS") 7369 ): 7370 schema = self._parse_schema() 7371 7372 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7373 7374 return self._parse_csv(_parse_add_alteration) 7375 7376 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7377 if self._match_texts(self.ALTER_ALTER_PARSERS): 7378 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7379 7380 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7381 # keyword after ALTER we default to parsing this statement 7382 self._match(TokenType.COLUMN) 7383 column = self._parse_field(any_token=True) 7384 7385 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7386 return self.expression(exp.AlterColumn, this=column, drop=True) 7387 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7388 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7389 if self._match(TokenType.COMMENT): 7390 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7391 if self._match_text_seq("DROP", "NOT", "NULL"): 7392 return self.expression( 7393 exp.AlterColumn, 7394 this=column, 7395 drop=True, 7396 allow_null=True, 7397 ) 7398 if self._match_text_seq("SET", "NOT", "NULL"): 7399 return self.expression( 7400 exp.AlterColumn, 7401 this=column, 7402 allow_null=False, 7403 ) 7404 7405 if self._match_text_seq("SET", "VISIBLE"): 7406 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7407 if self._match_text_seq("SET", "INVISIBLE"): 7408 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7409 7410 self._match_text_seq("SET", "DATA") 7411 self._match_text_seq("TYPE") 7412 return self.expression( 7413 exp.AlterColumn, 7414 this=column, 7415 dtype=self._parse_types(), 7416 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7417 using=self._match(TokenType.USING) and self._parse_assignment(), 7418 ) 7419 7420 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7421 if self._match_texts(("ALL", "EVEN", "AUTO")): 7422 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7423 7424 self._match_text_seq("KEY", "DISTKEY") 7425 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7426 7427 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7428 if compound: 7429 self._match_text_seq("SORTKEY") 7430 7431 if self._match(TokenType.L_PAREN, advance=False): 7432 return self.expression( 7433 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7434 ) 7435 7436 self._match_texts(("AUTO", "NONE")) 7437 return self.expression( 7438 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7439 ) 7440 7441 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7442 index = self._index - 1 7443 7444 partition_exists = self._parse_exists() 7445 if self._match(TokenType.PARTITION, advance=False): 7446 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7447 7448 self._retreat(index) 7449 return self._parse_csv(self._parse_drop_column) 7450 7451 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7452 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7453 exists = self._parse_exists() 7454 old_column = self._parse_column() 7455 to = self._match_text_seq("TO") 7456 new_column = self._parse_column() 7457 7458 if old_column is None or to is None or new_column is None: 7459 return None 7460 7461 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7462 7463 self._match_text_seq("TO") 7464 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7465 7466 def _parse_alter_table_set(self) -> exp.AlterSet: 7467 alter_set = self.expression(exp.AlterSet) 7468 7469 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7470 "TABLE", "PROPERTIES" 7471 ): 7472 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7473 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7474 alter_set.set("expressions", [self._parse_assignment()]) 7475 elif self._match_texts(("LOGGED", "UNLOGGED")): 7476 alter_set.set("option", exp.var(self._prev.text.upper())) 7477 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7478 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7479 elif self._match_text_seq("LOCATION"): 7480 alter_set.set("location", self._parse_field()) 7481 elif self._match_text_seq("ACCESS", "METHOD"): 7482 alter_set.set("access_method", self._parse_field()) 7483 elif self._match_text_seq("TABLESPACE"): 7484 alter_set.set("tablespace", self._parse_field()) 7485 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7486 alter_set.set("file_format", [self._parse_field()]) 7487 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7488 alter_set.set("file_format", self._parse_wrapped_options()) 7489 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7490 alter_set.set("copy_options", self._parse_wrapped_options()) 7491 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7492 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7493 else: 7494 if self._match_text_seq("SERDE"): 7495 alter_set.set("serde", self._parse_field()) 7496 7497 properties = self._parse_wrapped(self._parse_properties, optional=True) 7498 alter_set.set("expressions", [properties]) 7499 7500 return alter_set 7501 7502 def _parse_alter(self) -> exp.Alter | exp.Command: 7503 start = self._prev 7504 7505 alter_token = self._match_set(self.ALTERABLES) and self._prev 7506 if not alter_token: 7507 return self._parse_as_command(start) 7508 7509 exists = self._parse_exists() 7510 only = self._match_text_seq("ONLY") 7511 this = self._parse_table(schema=True) 7512 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7513 7514 if self._next: 7515 self._advance() 7516 7517 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7518 if parser: 7519 actions = ensure_list(parser(self)) 7520 not_valid = self._match_text_seq("NOT", "VALID") 7521 options = self._parse_csv(self._parse_property) 7522 7523 if not self._curr and actions: 7524 return self.expression( 7525 exp.Alter, 7526 this=this, 7527 kind=alter_token.text.upper(), 7528 exists=exists, 7529 actions=actions, 7530 only=only, 7531 options=options, 7532 cluster=cluster, 7533 not_valid=not_valid, 7534 ) 7535 7536 return self._parse_as_command(start) 7537 7538 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7539 start = self._prev 7540 # https://duckdb.org/docs/sql/statements/analyze 7541 if not self._curr: 7542 return self.expression(exp.Analyze) 7543 7544 options = [] 7545 while self._match_texts(self.ANALYZE_STYLES): 7546 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7547 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7548 else: 7549 options.append(self._prev.text.upper()) 7550 7551 this: t.Optional[exp.Expression] = None 7552 inner_expression: t.Optional[exp.Expression] = None 7553 7554 kind = self._curr and self._curr.text.upper() 7555 7556 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7557 this = self._parse_table_parts() 7558 elif self._match_text_seq("TABLES"): 7559 if self._match_set((TokenType.FROM, TokenType.IN)): 7560 kind = f"{kind} {self._prev.text.upper()}" 7561 this = self._parse_table(schema=True, is_db_reference=True) 7562 elif self._match_text_seq("DATABASE"): 7563 this = self._parse_table(schema=True, is_db_reference=True) 7564 elif self._match_text_seq("CLUSTER"): 7565 this = self._parse_table() 7566 # Try matching inner expr keywords before fallback to parse table. 7567 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7568 kind = None 7569 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7570 else: 7571 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7572 kind = None 7573 this = self._parse_table_parts() 7574 7575 partition = self._try_parse(self._parse_partition) 7576 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7577 return self._parse_as_command(start) 7578 7579 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7580 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7581 "WITH", "ASYNC", "MODE" 7582 ): 7583 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7584 else: 7585 mode = None 7586 7587 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7588 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7589 7590 properties = self._parse_properties() 7591 return self.expression( 7592 exp.Analyze, 7593 kind=kind, 7594 this=this, 7595 mode=mode, 7596 partition=partition, 7597 properties=properties, 7598 expression=inner_expression, 7599 options=options, 7600 ) 7601 7602 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7603 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7604 this = None 7605 kind = self._prev.text.upper() 7606 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7607 expressions = [] 7608 7609 if not self._match_text_seq("STATISTICS"): 7610 self.raise_error("Expecting token STATISTICS") 7611 7612 if self._match_text_seq("NOSCAN"): 7613 this = "NOSCAN" 7614 elif self._match(TokenType.FOR): 7615 if self._match_text_seq("ALL", "COLUMNS"): 7616 this = "FOR ALL COLUMNS" 7617 if self._match_texts("COLUMNS"): 7618 this = "FOR COLUMNS" 7619 expressions = self._parse_csv(self._parse_column_reference) 7620 elif self._match_text_seq("SAMPLE"): 7621 sample = self._parse_number() 7622 expressions = [ 7623 self.expression( 7624 exp.AnalyzeSample, 7625 sample=sample, 7626 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7627 ) 7628 ] 7629 7630 return self.expression( 7631 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7632 ) 7633 7634 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7635 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7636 kind = None 7637 this = None 7638 expression: t.Optional[exp.Expression] = None 7639 if self._match_text_seq("REF", "UPDATE"): 7640 kind = "REF" 7641 this = "UPDATE" 7642 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7643 this = "UPDATE SET DANGLING TO NULL" 7644 elif self._match_text_seq("STRUCTURE"): 7645 kind = "STRUCTURE" 7646 if self._match_text_seq("CASCADE", "FAST"): 7647 this = "CASCADE FAST" 7648 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7649 ("ONLINE", "OFFLINE") 7650 ): 7651 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7652 expression = self._parse_into() 7653 7654 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7655 7656 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7657 this = self._prev.text.upper() 7658 if self._match_text_seq("COLUMNS"): 7659 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7660 return None 7661 7662 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7663 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7664 if self._match_text_seq("STATISTICS"): 7665 return self.expression(exp.AnalyzeDelete, kind=kind) 7666 return None 7667 7668 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7669 if self._match_text_seq("CHAINED", "ROWS"): 7670 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7671 return None 7672 7673 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7674 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7675 this = self._prev.text.upper() 7676 expression: t.Optional[exp.Expression] = None 7677 expressions = [] 7678 update_options = None 7679 7680 if self._match_text_seq("HISTOGRAM", "ON"): 7681 expressions = self._parse_csv(self._parse_column_reference) 7682 with_expressions = [] 7683 while self._match(TokenType.WITH): 7684 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7685 if self._match_texts(("SYNC", "ASYNC")): 7686 if self._match_text_seq("MODE", advance=False): 7687 with_expressions.append(f"{self._prev.text.upper()} MODE") 7688 self._advance() 7689 else: 7690 buckets = self._parse_number() 7691 if self._match_text_seq("BUCKETS"): 7692 with_expressions.append(f"{buckets} BUCKETS") 7693 if with_expressions: 7694 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7695 7696 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7697 TokenType.UPDATE, advance=False 7698 ): 7699 update_options = self._prev.text.upper() 7700 self._advance() 7701 elif self._match_text_seq("USING", "DATA"): 7702 expression = self.expression(exp.UsingData, this=self._parse_string()) 7703 7704 return self.expression( 7705 exp.AnalyzeHistogram, 7706 this=this, 7707 expressions=expressions, 7708 expression=expression, 7709 update_options=update_options, 7710 ) 7711 7712 def _parse_merge(self) -> exp.Merge: 7713 self._match(TokenType.INTO) 7714 target = self._parse_table() 7715 7716 if target and self._match(TokenType.ALIAS, advance=False): 7717 target.set("alias", self._parse_table_alias()) 7718 7719 self._match(TokenType.USING) 7720 using = self._parse_table() 7721 7722 self._match(TokenType.ON) 7723 on = self._parse_assignment() 7724 7725 return self.expression( 7726 exp.Merge, 7727 this=target, 7728 using=using, 7729 on=on, 7730 whens=self._parse_when_matched(), 7731 returning=self._parse_returning(), 7732 ) 7733 7734 def _parse_when_matched(self) -> exp.Whens: 7735 whens = [] 7736 7737 while self._match(TokenType.WHEN): 7738 matched = not self._match(TokenType.NOT) 7739 self._match_text_seq("MATCHED") 7740 source = ( 7741 False 7742 if self._match_text_seq("BY", "TARGET") 7743 else self._match_text_seq("BY", "SOURCE") 7744 ) 7745 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7746 7747 self._match(TokenType.THEN) 7748 7749 if self._match(TokenType.INSERT): 7750 this = self._parse_star() 7751 if this: 7752 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7753 else: 7754 then = self.expression( 7755 exp.Insert, 7756 this=exp.var("ROW") 7757 if self._match_text_seq("ROW") 7758 else self._parse_value(values=False), 7759 expression=self._match_text_seq("VALUES") and self._parse_value(), 7760 ) 7761 elif self._match(TokenType.UPDATE): 7762 expressions = self._parse_star() 7763 if expressions: 7764 then = self.expression(exp.Update, expressions=expressions) 7765 else: 7766 then = self.expression( 7767 exp.Update, 7768 expressions=self._match(TokenType.SET) 7769 and self._parse_csv(self._parse_equality), 7770 ) 7771 elif self._match(TokenType.DELETE): 7772 then = self.expression(exp.Var, this=self._prev.text) 7773 else: 7774 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7775 7776 whens.append( 7777 self.expression( 7778 exp.When, 7779 matched=matched, 7780 source=source, 7781 condition=condition, 7782 then=then, 7783 ) 7784 ) 7785 return self.expression(exp.Whens, expressions=whens) 7786 7787 def _parse_show(self) -> t.Optional[exp.Expression]: 7788 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7789 if parser: 7790 return parser(self) 7791 return self._parse_as_command(self._prev) 7792 7793 def _parse_set_item_assignment( 7794 self, kind: t.Optional[str] = None 7795 ) -> t.Optional[exp.Expression]: 7796 index = self._index 7797 7798 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7799 return self._parse_set_transaction(global_=kind == "GLOBAL") 7800 7801 left = self._parse_primary() or self._parse_column() 7802 assignment_delimiter = self._match_texts(("=", "TO")) 7803 7804 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7805 self._retreat(index) 7806 return None 7807 7808 right = self._parse_statement() or self._parse_id_var() 7809 if isinstance(right, (exp.Column, exp.Identifier)): 7810 right = exp.var(right.name) 7811 7812 this = self.expression(exp.EQ, this=left, expression=right) 7813 return self.expression(exp.SetItem, this=this, kind=kind) 7814 7815 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7816 self._match_text_seq("TRANSACTION") 7817 characteristics = self._parse_csv( 7818 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7819 ) 7820 return self.expression( 7821 exp.SetItem, 7822 expressions=characteristics, 7823 kind="TRANSACTION", 7824 **{"global": global_}, # type: ignore 7825 ) 7826 7827 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7828 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7829 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7830 7831 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7832 index = self._index 7833 set_ = self.expression( 7834 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7835 ) 7836 7837 if self._curr: 7838 self._retreat(index) 7839 return self._parse_as_command(self._prev) 7840 7841 return set_ 7842 7843 def _parse_var_from_options( 7844 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7845 ) -> t.Optional[exp.Var]: 7846 start = self._curr 7847 if not start: 7848 return None 7849 7850 option = start.text.upper() 7851 continuations = options.get(option) 7852 7853 index = self._index 7854 self._advance() 7855 for keywords in continuations or []: 7856 if isinstance(keywords, str): 7857 keywords = (keywords,) 7858 7859 if self._match_text_seq(*keywords): 7860 option = f"{option} {' '.join(keywords)}" 7861 break 7862 else: 7863 if continuations or continuations is None: 7864 if raise_unmatched: 7865 self.raise_error(f"Unknown option {option}") 7866 7867 self._retreat(index) 7868 return None 7869 7870 return exp.var(option) 7871 7872 def _parse_as_command(self, start: Token) -> exp.Command: 7873 while self._curr: 7874 self._advance() 7875 text = self._find_sql(start, self._prev) 7876 size = len(start.text) 7877 self._warn_unsupported() 7878 return exp.Command(this=text[:size], expression=text[size:]) 7879 7880 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7881 settings = [] 7882 7883 self._match_l_paren() 7884 kind = self._parse_id_var() 7885 7886 if self._match(TokenType.L_PAREN): 7887 while True: 7888 key = self._parse_id_var() 7889 value = self._parse_primary() 7890 if not key and value is None: 7891 break 7892 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7893 self._match(TokenType.R_PAREN) 7894 7895 self._match_r_paren() 7896 7897 return self.expression( 7898 exp.DictProperty, 7899 this=this, 7900 kind=kind.this if kind else None, 7901 settings=settings, 7902 ) 7903 7904 def _parse_dict_range(self, this: str) -> exp.DictRange: 7905 self._match_l_paren() 7906 has_min = self._match_text_seq("MIN") 7907 if has_min: 7908 min = self._parse_var() or self._parse_primary() 7909 self._match_text_seq("MAX") 7910 max = self._parse_var() or self._parse_primary() 7911 else: 7912 max = self._parse_var() or self._parse_primary() 7913 min = exp.Literal.number(0) 7914 self._match_r_paren() 7915 return self.expression(exp.DictRange, this=this, min=min, max=max) 7916 7917 def _parse_comprehension( 7918 self, this: t.Optional[exp.Expression] 7919 ) -> t.Optional[exp.Comprehension]: 7920 index = self._index 7921 expression = self._parse_column() 7922 if not self._match(TokenType.IN): 7923 self._retreat(index - 1) 7924 return None 7925 iterator = self._parse_column() 7926 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7927 return self.expression( 7928 exp.Comprehension, 7929 this=this, 7930 expression=expression, 7931 iterator=iterator, 7932 condition=condition, 7933 ) 7934 7935 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7936 if self._match(TokenType.HEREDOC_STRING): 7937 return self.expression(exp.Heredoc, this=self._prev.text) 7938 7939 if not self._match_text_seq("$"): 7940 return None 7941 7942 tags = ["$"] 7943 tag_text = None 7944 7945 if self._is_connected(): 7946 self._advance() 7947 tags.append(self._prev.text.upper()) 7948 else: 7949 self.raise_error("No closing $ found") 7950 7951 if tags[-1] != "$": 7952 if self._is_connected() and self._match_text_seq("$"): 7953 tag_text = tags[-1] 7954 tags.append("$") 7955 else: 7956 self.raise_error("No closing $ found") 7957 7958 heredoc_start = self._curr 7959 7960 while self._curr: 7961 if self._match_text_seq(*tags, advance=False): 7962 this = self._find_sql(heredoc_start, self._prev) 7963 self._advance(len(tags)) 7964 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7965 7966 self._advance() 7967 7968 self.raise_error(f"No closing {''.join(tags)} found") 7969 return None 7970 7971 def _find_parser( 7972 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7973 ) -> t.Optional[t.Callable]: 7974 if not self._curr: 7975 return None 7976 7977 index = self._index 7978 this = [] 7979 while True: 7980 # The current token might be multiple words 7981 curr = self._curr.text.upper() 7982 key = curr.split(" ") 7983 this.append(curr) 7984 7985 self._advance() 7986 result, trie = in_trie(trie, key) 7987 if result == TrieResult.FAILED: 7988 break 7989 7990 if result == TrieResult.EXISTS: 7991 subparser = parsers[" ".join(this)] 7992 return subparser 7993 7994 self._retreat(index) 7995 return None 7996 7997 def _match(self, token_type, advance=True, expression=None): 7998 if not self._curr: 7999 return None 8000 8001 if self._curr.token_type == token_type: 8002 if advance: 8003 self._advance() 8004 self._add_comments(expression) 8005 return True 8006 8007 return None 8008 8009 def _match_set(self, types, advance=True): 8010 if not self._curr: 8011 return None 8012 8013 if self._curr.token_type in types: 8014 if advance: 8015 self._advance() 8016 return True 8017 8018 return None 8019 8020 def _match_pair(self, token_type_a, token_type_b, advance=True): 8021 if not self._curr or not self._next: 8022 return None 8023 8024 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8025 if advance: 8026 self._advance(2) 8027 return True 8028 8029 return None 8030 8031 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8032 if not self._match(TokenType.L_PAREN, expression=expression): 8033 self.raise_error("Expecting (") 8034 8035 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8036 if not self._match(TokenType.R_PAREN, expression=expression): 8037 self.raise_error("Expecting )") 8038 8039 def _match_texts(self, texts, advance=True): 8040 if ( 8041 self._curr 8042 and self._curr.token_type != TokenType.STRING 8043 and self._curr.text.upper() in texts 8044 ): 8045 if advance: 8046 self._advance() 8047 return True 8048 return None 8049 8050 def _match_text_seq(self, *texts, advance=True): 8051 index = self._index 8052 for text in texts: 8053 if ( 8054 self._curr 8055 and self._curr.token_type != TokenType.STRING 8056 and self._curr.text.upper() == text 8057 ): 8058 self._advance() 8059 else: 8060 self._retreat(index) 8061 return None 8062 8063 if not advance: 8064 self._retreat(index) 8065 8066 return True 8067 8068 def _replace_lambda( 8069 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8070 ) -> t.Optional[exp.Expression]: 8071 if not node: 8072 return node 8073 8074 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8075 8076 for column in node.find_all(exp.Column): 8077 typ = lambda_types.get(column.parts[0].name) 8078 if typ is not None: 8079 dot_or_id = column.to_dot() if column.table else column.this 8080 8081 if typ: 8082 dot_or_id = self.expression( 8083 exp.Cast, 8084 this=dot_or_id, 8085 to=typ, 8086 ) 8087 8088 parent = column.parent 8089 8090 while isinstance(parent, exp.Dot): 8091 if not isinstance(parent.parent, exp.Dot): 8092 parent.replace(dot_or_id) 8093 break 8094 parent = parent.parent 8095 else: 8096 if column is node: 8097 node = dot_or_id 8098 else: 8099 column.replace(dot_or_id) 8100 return node 8101 8102 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8103 start = self._prev 8104 8105 # Not to be confused with TRUNCATE(number, decimals) function call 8106 if self._match(TokenType.L_PAREN): 8107 self._retreat(self._index - 2) 8108 return self._parse_function() 8109 8110 # Clickhouse supports TRUNCATE DATABASE as well 8111 is_database = self._match(TokenType.DATABASE) 8112 8113 self._match(TokenType.TABLE) 8114 8115 exists = self._parse_exists(not_=False) 8116 8117 expressions = self._parse_csv( 8118 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8119 ) 8120 8121 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8122 8123 if self._match_text_seq("RESTART", "IDENTITY"): 8124 identity = "RESTART" 8125 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8126 identity = "CONTINUE" 8127 else: 8128 identity = None 8129 8130 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8131 option = self._prev.text 8132 else: 8133 option = None 8134 8135 partition = self._parse_partition() 8136 8137 # Fallback case 8138 if self._curr: 8139 return self._parse_as_command(start) 8140 8141 return self.expression( 8142 exp.TruncateTable, 8143 expressions=expressions, 8144 is_database=is_database, 8145 exists=exists, 8146 cluster=cluster, 8147 identity=identity, 8148 option=option, 8149 partition=partition, 8150 ) 8151 8152 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8153 this = self._parse_ordered(self._parse_opclass) 8154 8155 if not self._match(TokenType.WITH): 8156 return this 8157 8158 op = self._parse_var(any_token=True) 8159 8160 return self.expression(exp.WithOperator, this=this, op=op) 8161 8162 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8163 self._match(TokenType.EQ) 8164 self._match(TokenType.L_PAREN) 8165 8166 opts: t.List[t.Optional[exp.Expression]] = [] 8167 option: exp.Expression | None 8168 while self._curr and not self._match(TokenType.R_PAREN): 8169 if self._match_text_seq("FORMAT_NAME", "="): 8170 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8171 option = self._parse_format_name() 8172 else: 8173 option = self._parse_property() 8174 8175 if option is None: 8176 self.raise_error("Unable to parse option") 8177 break 8178 8179 opts.append(option) 8180 8181 return opts 8182 8183 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8184 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8185 8186 options = [] 8187 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8188 option = self._parse_var(any_token=True) 8189 prev = self._prev.text.upper() 8190 8191 # Different dialects might separate options and values by white space, "=" and "AS" 8192 self._match(TokenType.EQ) 8193 self._match(TokenType.ALIAS) 8194 8195 param = self.expression(exp.CopyParameter, this=option) 8196 8197 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8198 TokenType.L_PAREN, advance=False 8199 ): 8200 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8201 param.set("expressions", self._parse_wrapped_options()) 8202 elif prev == "FILE_FORMAT": 8203 # T-SQL's external file format case 8204 param.set("expression", self._parse_field()) 8205 else: 8206 param.set("expression", self._parse_unquoted_field()) 8207 8208 options.append(param) 8209 self._match(sep) 8210 8211 return options 8212 8213 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8214 expr = self.expression(exp.Credentials) 8215 8216 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8217 expr.set("storage", self._parse_field()) 8218 if self._match_text_seq("CREDENTIALS"): 8219 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8220 creds = ( 8221 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8222 ) 8223 expr.set("credentials", creds) 8224 if self._match_text_seq("ENCRYPTION"): 8225 expr.set("encryption", self._parse_wrapped_options()) 8226 if self._match_text_seq("IAM_ROLE"): 8227 expr.set("iam_role", self._parse_field()) 8228 if self._match_text_seq("REGION"): 8229 expr.set("region", self._parse_field()) 8230 8231 return expr 8232 8233 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8234 return self._parse_field() 8235 8236 def _parse_copy(self) -> exp.Copy | exp.Command: 8237 start = self._prev 8238 8239 self._match(TokenType.INTO) 8240 8241 this = ( 8242 self._parse_select(nested=True, parse_subquery_alias=False) 8243 if self._match(TokenType.L_PAREN, advance=False) 8244 else self._parse_table(schema=True) 8245 ) 8246 8247 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8248 8249 files = self._parse_csv(self._parse_file_location) 8250 credentials = self._parse_credentials() 8251 8252 self._match_text_seq("WITH") 8253 8254 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8255 8256 # Fallback case 8257 if self._curr: 8258 return self._parse_as_command(start) 8259 8260 return self.expression( 8261 exp.Copy, 8262 this=this, 8263 kind=kind, 8264 credentials=credentials, 8265 files=files, 8266 params=params, 8267 ) 8268 8269 def _parse_normalize(self) -> exp.Normalize: 8270 return self.expression( 8271 exp.Normalize, 8272 this=self._parse_bitwise(), 8273 form=self._match(TokenType.COMMA) and self._parse_var(), 8274 ) 8275 8276 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8277 args = self._parse_csv(lambda: self._parse_lambda()) 8278 8279 this = seq_get(args, 0) 8280 decimals = seq_get(args, 1) 8281 8282 return expr_type( 8283 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8284 ) 8285 8286 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8287 star_token = self._prev 8288 8289 if self._match_text_seq("COLUMNS", "(", advance=False): 8290 this = self._parse_function() 8291 if isinstance(this, exp.Columns): 8292 this.set("unpack", True) 8293 return this 8294 8295 return self.expression( 8296 exp.Star, 8297 **{ # type: ignore 8298 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8299 "replace": self._parse_star_op("REPLACE"), 8300 "rename": self._parse_star_op("RENAME"), 8301 }, 8302 ).update_positions(star_token) 8303 8304 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8305 privilege_parts = [] 8306 8307 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8308 # (end of privilege list) or L_PAREN (start of column list) are met 8309 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8310 privilege_parts.append(self._curr.text.upper()) 8311 self._advance() 8312 8313 this = exp.var(" ".join(privilege_parts)) 8314 expressions = ( 8315 self._parse_wrapped_csv(self._parse_column) 8316 if self._match(TokenType.L_PAREN, advance=False) 8317 else None 8318 ) 8319 8320 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8321 8322 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8323 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8324 principal = self._parse_id_var() 8325 8326 if not principal: 8327 return None 8328 8329 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8330 8331 def _parse_grant(self) -> exp.Grant | exp.Command: 8332 start = self._prev 8333 8334 privileges = self._parse_csv(self._parse_grant_privilege) 8335 8336 self._match(TokenType.ON) 8337 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8338 8339 # Attempt to parse the securable e.g. MySQL allows names 8340 # such as "foo.*", "*.*" which are not easily parseable yet 8341 securable = self._try_parse(self._parse_table_parts) 8342 8343 if not securable or not self._match_text_seq("TO"): 8344 return self._parse_as_command(start) 8345 8346 principals = self._parse_csv(self._parse_grant_principal) 8347 8348 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8349 8350 if self._curr: 8351 return self._parse_as_command(start) 8352 8353 return self.expression( 8354 exp.Grant, 8355 privileges=privileges, 8356 kind=kind, 8357 securable=securable, 8358 principals=principals, 8359 grant_option=grant_option, 8360 ) 8361 8362 def _parse_overlay(self) -> exp.Overlay: 8363 return self.expression( 8364 exp.Overlay, 8365 **{ # type: ignore 8366 "this": self._parse_bitwise(), 8367 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8368 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8369 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8370 }, 8371 ) 8372 8373 def _parse_format_name(self) -> exp.Property: 8374 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8375 # for FILE_FORMAT = <format_name> 8376 return self.expression( 8377 exp.Property, 8378 this=exp.var("FORMAT_NAME"), 8379 value=self._parse_string() or self._parse_table_parts(), 8380 ) 8381 8382 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8383 args: t.List[exp.Expression] = [] 8384 8385 if self._match(TokenType.DISTINCT): 8386 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8387 self._match(TokenType.COMMA) 8388 8389 args.extend(self._parse_csv(self._parse_assignment)) 8390 8391 return self.expression( 8392 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8393 ) 8394 8395 def _identifier_expression( 8396 self, token: t.Optional[Token] = None, **kwargs: t.Any 8397 ) -> exp.Identifier: 8398 token = token or self._prev 8399 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8400 expression.update_positions(token) 8401 return expression 8402 8403 def _build_pipe_cte( 8404 self, 8405 query: exp.Query, 8406 expressions: t.List[exp.Expression], 8407 alias_cte: t.Optional[exp.TableAlias] = None, 8408 ) -> exp.Select: 8409 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8410 if alias_cte: 8411 new_cte = alias_cte 8412 else: 8413 self._pipe_cte_counter += 1 8414 new_cte = f"__tmp{self._pipe_cte_counter}" 8415 8416 with_ = query.args.get("with") 8417 ctes = with_.pop() if with_ else None 8418 8419 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8420 if ctes: 8421 new_select.set("with", ctes) 8422 8423 return new_select.with_(new_cte, as_=query, copy=False) 8424 8425 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8426 select = self._parse_select(consume_pipe=False) 8427 if not select: 8428 return query 8429 8430 return self._build_pipe_cte( 8431 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8432 ) 8433 8434 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8435 limit = self._parse_limit() 8436 offset = self._parse_offset() 8437 if limit: 8438 curr_limit = query.args.get("limit", limit) 8439 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8440 query.limit(limit, copy=False) 8441 if offset: 8442 curr_offset = query.args.get("offset") 8443 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8444 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8445 8446 return query 8447 8448 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8449 this = self._parse_assignment() 8450 if self._match_text_seq("GROUP", "AND", advance=False): 8451 return this 8452 8453 this = self._parse_alias(this) 8454 8455 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8456 return self._parse_ordered(lambda: this) 8457 8458 return this 8459 8460 def _parse_pipe_syntax_aggregate_group_order_by( 8461 self, query: exp.Select, group_by_exists: bool = True 8462 ) -> exp.Select: 8463 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8464 aggregates_or_groups, orders = [], [] 8465 for element in expr: 8466 if isinstance(element, exp.Ordered): 8467 this = element.this 8468 if isinstance(this, exp.Alias): 8469 element.set("this", this.args["alias"]) 8470 orders.append(element) 8471 else: 8472 this = element 8473 aggregates_or_groups.append(this) 8474 8475 if group_by_exists: 8476 query.select(*aggregates_or_groups, copy=False).group_by( 8477 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8478 copy=False, 8479 ) 8480 else: 8481 query.select(*aggregates_or_groups, append=False, copy=False) 8482 8483 if orders: 8484 return query.order_by(*orders, append=False, copy=False) 8485 8486 return query 8487 8488 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8489 self._match_text_seq("AGGREGATE") 8490 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8491 8492 if self._match(TokenType.GROUP_BY) or ( 8493 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8494 ): 8495 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8496 8497 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8498 8499 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8500 first_setop = self.parse_set_operation(this=query) 8501 if not first_setop: 8502 return None 8503 8504 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8505 expr = self._parse_paren() 8506 return expr.assert_is(exp.Subquery).unnest() if expr else None 8507 8508 first_setop.this.pop() 8509 8510 setops = [ 8511 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8512 *self._parse_csv(_parse_and_unwrap_query), 8513 ] 8514 8515 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8516 with_ = query.args.get("with") 8517 ctes = with_.pop() if with_ else None 8518 8519 if isinstance(first_setop, exp.Union): 8520 query = query.union(*setops, copy=False, **first_setop.args) 8521 elif isinstance(first_setop, exp.Except): 8522 query = query.except_(*setops, copy=False, **first_setop.args) 8523 else: 8524 query = query.intersect(*setops, copy=False, **first_setop.args) 8525 8526 query.set("with", ctes) 8527 8528 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8529 8530 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8531 join = self._parse_join() 8532 if not join: 8533 return None 8534 8535 if isinstance(query, exp.Select): 8536 return query.join(join, copy=False) 8537 8538 return query 8539 8540 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8541 pivots = self._parse_pivots() 8542 if not pivots: 8543 return query 8544 8545 from_ = query.args.get("from") 8546 if from_: 8547 from_.this.set("pivots", pivots) 8548 8549 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8550 8551 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8552 self._match_text_seq("EXTEND") 8553 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8554 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8555 8556 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8557 sample = self._parse_table_sample() 8558 8559 with_ = query.args.get("with") 8560 if with_: 8561 with_.expressions[-1].this.set("sample", sample) 8562 else: 8563 query.set("sample", sample) 8564 8565 return query 8566 8567 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8568 if isinstance(query, exp.Subquery): 8569 query = exp.select("*").from_(query, copy=False) 8570 8571 if not query.args.get("from"): 8572 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8573 8574 while self._match(TokenType.PIPE_GT): 8575 start = self._curr 8576 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8577 if not parser: 8578 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8579 # keywords, making it tricky to disambiguate them without lookahead. The approach 8580 # here is to try and parse a set operation and if that fails, then try to parse a 8581 # join operator. If that fails as well, then the operator is not supported. 8582 parsed_query = self._parse_pipe_syntax_set_operator(query) 8583 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8584 if not parsed_query: 8585 self._retreat(start) 8586 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8587 break 8588 query = parsed_query 8589 else: 8590 query = parser(self, query) 8591 8592 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1553 def __init__( 1554 self, 1555 error_level: t.Optional[ErrorLevel] = None, 1556 error_message_context: int = 100, 1557 max_errors: int = 3, 1558 dialect: DialectType = None, 1559 ): 1560 from sqlglot.dialects import Dialect 1561 1562 self.error_level = error_level or ErrorLevel.IMMEDIATE 1563 self.error_message_context = error_message_context 1564 self.max_errors = max_errors 1565 self.dialect = Dialect.get_or_raise(dialect) 1566 self.reset()
1579 def parse( 1580 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 """ 1583 Parses a list of tokens and returns a list of syntax trees, one tree 1584 per parsed SQL statement. 1585 1586 Args: 1587 raw_tokens: The list of tokens. 1588 sql: The original SQL string, used to produce helpful debug messages. 1589 1590 Returns: 1591 The list of the produced syntax trees. 1592 """ 1593 return self._parse( 1594 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1595 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1597 def parse_into( 1598 self, 1599 expression_types: exp.IntoType, 1600 raw_tokens: t.List[Token], 1601 sql: t.Optional[str] = None, 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens into a given Expression type. If a collection of Expression 1605 types is given instead, this method will try to parse the token list into each one 1606 of them, stopping at the first for which the parsing succeeds. 1607 1608 Args: 1609 expression_types: The expression type(s) to try and parse the token list into. 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The target Expression. 1615 """ 1616 errors = [] 1617 for expression_type in ensure_list(expression_types): 1618 parser = self.EXPRESSION_PARSERS.get(expression_type) 1619 if not parser: 1620 raise TypeError(f"No parser registered for {expression_type}") 1621 1622 try: 1623 return self._parse(parser, raw_tokens, sql) 1624 except ParseError as e: 1625 e.errors[0]["into_expression"] = expression_type 1626 errors.append(e) 1627 1628 raise ParseError( 1629 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1630 errors=merge_errors(errors), 1631 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1671 def check_errors(self) -> None: 1672 """Logs or raises any found errors, depending on the chosen error level setting.""" 1673 if self.error_level == ErrorLevel.WARN: 1674 for error in self.errors: 1675 logger.error(str(error)) 1676 elif self.error_level == ErrorLevel.RAISE and self.errors: 1677 raise ParseError( 1678 concat_messages(self.errors, self.max_errors), 1679 errors=merge_errors(self.errors), 1680 )
Logs or raises any found errors, depending on the chosen error level setting.
1682 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1683 """ 1684 Appends an error in the list of recorded errors or raises it, depending on the chosen 1685 error level setting. 1686 """ 1687 token = token or self._curr or self._prev or Token.string("") 1688 start = token.start 1689 end = token.end + 1 1690 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1691 highlight = self.sql[start:end] 1692 end_context = self.sql[end : end + self.error_message_context] 1693 1694 error = ParseError.new( 1695 f"{message}. Line {token.line}, Col: {token.col}.\n" 1696 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1697 description=message, 1698 line=token.line, 1699 col=token.col, 1700 start_context=start_context, 1701 highlight=highlight, 1702 end_context=end_context, 1703 ) 1704 1705 if self.error_level == ErrorLevel.IMMEDIATE: 1706 raise error 1707 1708 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1710 def expression( 1711 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1712 ) -> E: 1713 """ 1714 Creates a new, validated Expression. 1715 1716 Args: 1717 exp_class: The expression class to instantiate. 1718 comments: An optional list of comments to attach to the expression. 1719 kwargs: The arguments to set for the expression along with their respective values. 1720 1721 Returns: 1722 The target expression. 1723 """ 1724 instance = exp_class(**kwargs) 1725 instance.add_comments(comments) if comments else self._add_comments(instance) 1726 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1733 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1734 """ 1735 Validates an Expression, making sure that all its mandatory arguments are set. 1736 1737 Args: 1738 expression: The expression to validate. 1739 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1740 1741 Returns: 1742 The validated expression. 1743 """ 1744 if self.error_level != ErrorLevel.IGNORE: 1745 for error_message in expression.error_messages(args): 1746 self.raise_error(error_message) 1747 1748 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4759 def parse_set_operation( 4760 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4761 ) -> t.Optional[exp.Expression]: 4762 start = self._index 4763 _, side_token, kind_token = self._parse_join_parts() 4764 4765 side = side_token.text if side_token else None 4766 kind = kind_token.text if kind_token else None 4767 4768 if not self._match_set(self.SET_OPERATIONS): 4769 self._retreat(start) 4770 return None 4771 4772 token_type = self._prev.token_type 4773 4774 if token_type == TokenType.UNION: 4775 operation: t.Type[exp.SetOperation] = exp.Union 4776 elif token_type == TokenType.EXCEPT: 4777 operation = exp.Except 4778 else: 4779 operation = exp.Intersect 4780 4781 comments = self._prev.comments 4782 4783 if self._match(TokenType.DISTINCT): 4784 distinct: t.Optional[bool] = True 4785 elif self._match(TokenType.ALL): 4786 distinct = False 4787 else: 4788 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4789 if distinct is None: 4790 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4791 4792 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4793 "STRICT", "CORRESPONDING" 4794 ) 4795 if self._match_text_seq("CORRESPONDING"): 4796 by_name = True 4797 if not side and not kind: 4798 kind = "INNER" 4799 4800 on_column_list = None 4801 if by_name and self._match_texts(("ON", "BY")): 4802 on_column_list = self._parse_wrapped_csv(self._parse_column) 4803 4804 expression = self._parse_select( 4805 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4806 ) 4807 4808 return self.expression( 4809 operation, 4810 comments=comments, 4811 this=this, 4812 distinct=distinct, 4813 by_name=by_name, 4814 expression=expression, 4815 side=side, 4816 kind=kind, 4817 on=on_column_list, 4818 )