sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOMETRY, 384 TokenType.POINT, 385 TokenType.RING, 386 TokenType.LINESTRING, 387 TokenType.MULTILINESTRING, 388 TokenType.POLYGON, 389 TokenType.MULTIPOLYGON, 390 TokenType.HLLSKETCH, 391 TokenType.HSTORE, 392 TokenType.PSEUDO_TYPE, 393 TokenType.SUPER, 394 TokenType.SERIAL, 395 TokenType.SMALLSERIAL, 396 TokenType.BIGSERIAL, 397 TokenType.XML, 398 TokenType.YEAR, 399 TokenType.USERDEFINED, 400 TokenType.MONEY, 401 TokenType.SMALLMONEY, 402 TokenType.ROWVERSION, 403 TokenType.IMAGE, 404 TokenType.VARIANT, 405 TokenType.VECTOR, 406 TokenType.VOID, 407 TokenType.OBJECT, 408 TokenType.OBJECT_IDENTIFIER, 409 TokenType.INET, 410 TokenType.IPADDRESS, 411 TokenType.IPPREFIX, 412 TokenType.IPV4, 413 TokenType.IPV6, 414 TokenType.UNKNOWN, 415 TokenType.NOTHING, 416 TokenType.NULL, 417 TokenType.NAME, 418 TokenType.TDIGEST, 419 TokenType.DYNAMIC, 420 *ENUM_TYPE_TOKENS, 421 *NESTED_TYPE_TOKENS, 422 *AGGREGATE_TYPE_TOKENS, 423 } 424 425 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 426 TokenType.BIGINT: TokenType.UBIGINT, 427 TokenType.INT: TokenType.UINT, 428 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 429 TokenType.SMALLINT: TokenType.USMALLINT, 430 TokenType.TINYINT: TokenType.UTINYINT, 431 TokenType.DECIMAL: TokenType.UDECIMAL, 432 TokenType.DOUBLE: TokenType.UDOUBLE, 433 } 434 435 SUBQUERY_PREDICATES = { 436 TokenType.ANY: exp.Any, 437 TokenType.ALL: exp.All, 438 TokenType.EXISTS: exp.Exists, 439 TokenType.SOME: exp.Any, 440 } 441 442 RESERVED_TOKENS = { 443 *Tokenizer.SINGLE_TOKENS.values(), 444 TokenType.SELECT, 445 } - {TokenType.IDENTIFIER} 446 447 DB_CREATABLES = { 448 TokenType.DATABASE, 449 TokenType.DICTIONARY, 450 TokenType.FILE_FORMAT, 451 TokenType.MODEL, 452 TokenType.NAMESPACE, 453 TokenType.SCHEMA, 454 TokenType.SEQUENCE, 455 TokenType.SINK, 456 TokenType.SOURCE, 457 TokenType.STAGE, 458 TokenType.STORAGE_INTEGRATION, 459 TokenType.STREAMLIT, 460 TokenType.TABLE, 461 TokenType.TAG, 462 TokenType.VIEW, 463 TokenType.WAREHOUSE, 464 } 465 466 CREATABLES = { 467 TokenType.COLUMN, 468 TokenType.CONSTRAINT, 469 TokenType.FOREIGN_KEY, 470 TokenType.FUNCTION, 471 TokenType.INDEX, 472 TokenType.PROCEDURE, 473 *DB_CREATABLES, 474 } 475 476 ALTERABLES = { 477 TokenType.INDEX, 478 TokenType.TABLE, 479 TokenType.VIEW, 480 } 481 482 # Tokens that can represent identifiers 483 ID_VAR_TOKENS = { 484 TokenType.ALL, 485 TokenType.ATTACH, 486 TokenType.VAR, 487 TokenType.ANTI, 488 TokenType.APPLY, 489 TokenType.ASC, 490 TokenType.ASOF, 491 TokenType.AUTO_INCREMENT, 492 TokenType.BEGIN, 493 TokenType.BPCHAR, 494 TokenType.CACHE, 495 TokenType.CASE, 496 TokenType.COLLATE, 497 TokenType.COMMAND, 498 TokenType.COMMENT, 499 TokenType.COMMIT, 500 TokenType.CONSTRAINT, 501 TokenType.COPY, 502 TokenType.CUBE, 503 TokenType.CURRENT_SCHEMA, 504 TokenType.DEFAULT, 505 TokenType.DELETE, 506 TokenType.DESC, 507 TokenType.DESCRIBE, 508 TokenType.DETACH, 509 TokenType.DICTIONARY, 510 TokenType.DIV, 511 TokenType.END, 512 TokenType.EXECUTE, 513 TokenType.EXPORT, 514 TokenType.ESCAPE, 515 TokenType.FALSE, 516 TokenType.FIRST, 517 TokenType.FILTER, 518 TokenType.FINAL, 519 TokenType.FORMAT, 520 TokenType.FULL, 521 TokenType.GET, 522 TokenType.IDENTIFIER, 523 TokenType.IS, 524 TokenType.ISNULL, 525 TokenType.INTERVAL, 526 TokenType.KEEP, 527 TokenType.KILL, 528 TokenType.LEFT, 529 TokenType.LIMIT, 530 TokenType.LOAD, 531 TokenType.MERGE, 532 TokenType.NATURAL, 533 TokenType.NEXT, 534 TokenType.OFFSET, 535 TokenType.OPERATOR, 536 TokenType.ORDINALITY, 537 TokenType.OVERLAPS, 538 TokenType.OVERWRITE, 539 TokenType.PARTITION, 540 TokenType.PERCENT, 541 TokenType.PIVOT, 542 TokenType.PRAGMA, 543 TokenType.PUT, 544 TokenType.RANGE, 545 TokenType.RECURSIVE, 546 TokenType.REFERENCES, 547 TokenType.REFRESH, 548 TokenType.RENAME, 549 TokenType.REPLACE, 550 TokenType.RIGHT, 551 TokenType.ROLLUP, 552 TokenType.ROW, 553 TokenType.ROWS, 554 TokenType.SEMI, 555 TokenType.SET, 556 TokenType.SETTINGS, 557 TokenType.SHOW, 558 TokenType.TEMPORARY, 559 TokenType.TOP, 560 TokenType.TRUE, 561 TokenType.TRUNCATE, 562 TokenType.UNIQUE, 563 TokenType.UNNEST, 564 TokenType.UNPIVOT, 565 TokenType.UPDATE, 566 TokenType.USE, 567 TokenType.VOLATILE, 568 TokenType.WINDOW, 569 *CREATABLES, 570 *SUBQUERY_PREDICATES, 571 *TYPE_TOKENS, 572 *NO_PAREN_FUNCTIONS, 573 } 574 ID_VAR_TOKENS.remove(TokenType.UNION) 575 576 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 577 TokenType.ANTI, 578 TokenType.APPLY, 579 TokenType.ASOF, 580 TokenType.FULL, 581 TokenType.LEFT, 582 TokenType.LOCK, 583 TokenType.NATURAL, 584 TokenType.RIGHT, 585 TokenType.SEMI, 586 TokenType.WINDOW, 587 } 588 589 ALIAS_TOKENS = ID_VAR_TOKENS 590 591 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 592 593 ARRAY_CONSTRUCTORS = { 594 "ARRAY": exp.Array, 595 "LIST": exp.List, 596 } 597 598 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 599 600 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 601 602 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 603 604 FUNC_TOKENS = { 605 TokenType.COLLATE, 606 TokenType.COMMAND, 607 TokenType.CURRENT_DATE, 608 TokenType.CURRENT_DATETIME, 609 TokenType.CURRENT_SCHEMA, 610 TokenType.CURRENT_TIMESTAMP, 611 TokenType.CURRENT_TIME, 612 TokenType.CURRENT_USER, 613 TokenType.FILTER, 614 TokenType.FIRST, 615 TokenType.FORMAT, 616 TokenType.GET, 617 TokenType.GLOB, 618 TokenType.IDENTIFIER, 619 TokenType.INDEX, 620 TokenType.ISNULL, 621 TokenType.ILIKE, 622 TokenType.INSERT, 623 TokenType.LIKE, 624 TokenType.MERGE, 625 TokenType.NEXT, 626 TokenType.OFFSET, 627 TokenType.PRIMARY_KEY, 628 TokenType.RANGE, 629 TokenType.REPLACE, 630 TokenType.RLIKE, 631 TokenType.ROW, 632 TokenType.UNNEST, 633 TokenType.VAR, 634 TokenType.LEFT, 635 TokenType.RIGHT, 636 TokenType.SEQUENCE, 637 TokenType.DATE, 638 TokenType.DATETIME, 639 TokenType.TABLE, 640 TokenType.TIMESTAMP, 641 TokenType.TIMESTAMPTZ, 642 TokenType.TRUNCATE, 643 TokenType.WINDOW, 644 TokenType.XOR, 645 *TYPE_TOKENS, 646 *SUBQUERY_PREDICATES, 647 } 648 649 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.AND: exp.And, 651 } 652 653 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.COLON_EQ: exp.PropertyEQ, 655 } 656 657 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.OR: exp.Or, 659 } 660 661 EQUALITY = { 662 TokenType.EQ: exp.EQ, 663 TokenType.NEQ: exp.NEQ, 664 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 665 } 666 667 COMPARISON = { 668 TokenType.GT: exp.GT, 669 TokenType.GTE: exp.GTE, 670 TokenType.LT: exp.LT, 671 TokenType.LTE: exp.LTE, 672 } 673 674 BITWISE = { 675 TokenType.AMP: exp.BitwiseAnd, 676 TokenType.CARET: exp.BitwiseXor, 677 TokenType.PIPE: exp.BitwiseOr, 678 } 679 680 TERM = { 681 TokenType.DASH: exp.Sub, 682 TokenType.PLUS: exp.Add, 683 TokenType.MOD: exp.Mod, 684 TokenType.COLLATE: exp.Collate, 685 } 686 687 FACTOR = { 688 TokenType.DIV: exp.IntDiv, 689 TokenType.LR_ARROW: exp.Distance, 690 TokenType.SLASH: exp.Div, 691 TokenType.STAR: exp.Mul, 692 } 693 694 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 695 696 TIMES = { 697 TokenType.TIME, 698 TokenType.TIMETZ, 699 } 700 701 TIMESTAMPS = { 702 TokenType.TIMESTAMP, 703 TokenType.TIMESTAMPNTZ, 704 TokenType.TIMESTAMPTZ, 705 TokenType.TIMESTAMPLTZ, 706 *TIMES, 707 } 708 709 SET_OPERATIONS = { 710 TokenType.UNION, 711 TokenType.INTERSECT, 712 TokenType.EXCEPT, 713 } 714 715 JOIN_METHODS = { 716 TokenType.ASOF, 717 TokenType.NATURAL, 718 TokenType.POSITIONAL, 719 } 720 721 JOIN_SIDES = { 722 TokenType.LEFT, 723 TokenType.RIGHT, 724 TokenType.FULL, 725 } 726 727 JOIN_KINDS = { 728 TokenType.ANTI, 729 TokenType.CROSS, 730 TokenType.INNER, 731 TokenType.OUTER, 732 TokenType.SEMI, 733 TokenType.STRAIGHT_JOIN, 734 } 735 736 JOIN_HINTS: t.Set[str] = set() 737 738 LAMBDAS = { 739 TokenType.ARROW: lambda self, expressions: self.expression( 740 exp.Lambda, 741 this=self._replace_lambda( 742 self._parse_assignment(), 743 expressions, 744 ), 745 expressions=expressions, 746 ), 747 TokenType.FARROW: lambda self, expressions: self.expression( 748 exp.Kwarg, 749 this=exp.var(expressions[0].name), 750 expression=self._parse_assignment(), 751 ), 752 } 753 754 COLUMN_OPERATORS = { 755 TokenType.DOT: None, 756 TokenType.DOTCOLON: lambda self, this, to: self.expression( 757 exp.JSONCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.DCOLON: lambda self, this, to: self.expression( 762 exp.Cast if self.STRICT_CAST else exp.TryCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1141 klass = ( 1142 exp.PartitionedByBucket 1143 if self._prev.text.upper() == "BUCKET" 1144 else exp.PartitionByTruncate 1145 ) 1146 1147 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1148 this, expression = seq_get(args, 0), seq_get(args, 1) 1149 1150 if isinstance(this, exp.Literal): 1151 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1152 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1153 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1154 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1155 # 1156 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1157 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1158 this, expression = expression, this 1159 1160 return self.expression(klass, this=this, expression=expression) 1161 1162 ALTER_PARSERS = { 1163 "ADD": lambda self: self._parse_alter_table_add(), 1164 "AS": lambda self: self._parse_select(), 1165 "ALTER": lambda self: self._parse_alter_table_alter(), 1166 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1167 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1168 "DROP": lambda self: self._parse_alter_table_drop(), 1169 "RENAME": lambda self: self._parse_alter_table_rename(), 1170 "SET": lambda self: self._parse_alter_table_set(), 1171 "SWAP": lambda self: self.expression( 1172 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1173 ), 1174 } 1175 1176 ALTER_ALTER_PARSERS = { 1177 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1178 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1179 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1180 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1181 } 1182 1183 SCHEMA_UNNAMED_CONSTRAINTS = { 1184 "CHECK", 1185 "EXCLUDE", 1186 "FOREIGN KEY", 1187 "LIKE", 1188 "PERIOD", 1189 "PRIMARY KEY", 1190 "UNIQUE", 1191 "WATERMARK", 1192 "BUCKET", 1193 "TRUNCATE", 1194 } 1195 1196 NO_PAREN_FUNCTION_PARSERS = { 1197 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1198 "CASE": lambda self: self._parse_case(), 1199 "CONNECT_BY_ROOT": lambda self: self.expression( 1200 exp.ConnectByRoot, this=self._parse_column() 1201 ), 1202 "IF": lambda self: self._parse_if(), 1203 } 1204 1205 INVALID_FUNC_NAME_TOKENS = { 1206 TokenType.IDENTIFIER, 1207 TokenType.STRING, 1208 } 1209 1210 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1211 1212 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1213 1214 FUNCTION_PARSERS = { 1215 **{ 1216 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1217 }, 1218 **{ 1219 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1220 }, 1221 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1222 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1223 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1224 "DECODE": lambda self: self._parse_decode(), 1225 "EXTRACT": lambda self: self._parse_extract(), 1226 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1227 "GAP_FILL": lambda self: self._parse_gap_fill(), 1228 "JSON_OBJECT": lambda self: self._parse_json_object(), 1229 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1230 "JSON_TABLE": lambda self: self._parse_json_table(), 1231 "MATCH": lambda self: self._parse_match_against(), 1232 "NORMALIZE": lambda self: self._parse_normalize(), 1233 "OPENJSON": lambda self: self._parse_open_json(), 1234 "OVERLAY": lambda self: self._parse_overlay(), 1235 "POSITION": lambda self: self._parse_position(), 1236 "PREDICT": lambda self: self._parse_predict(), 1237 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1238 "STRING_AGG": lambda self: self._parse_string_agg(), 1239 "SUBSTRING": lambda self: self._parse_substring(), 1240 "TRIM": lambda self: self._parse_trim(), 1241 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1242 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1243 "XMLELEMENT": lambda self: self.expression( 1244 exp.XMLElement, 1245 this=self._match_text_seq("NAME") and self._parse_id_var(), 1246 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1247 ), 1248 "XMLTABLE": lambda self: self._parse_xml_table(), 1249 } 1250 1251 QUERY_MODIFIER_PARSERS = { 1252 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1253 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1254 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1255 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1256 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1257 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1258 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1259 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1260 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1261 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1262 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1263 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1264 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1265 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.CLUSTER_BY: lambda self: ( 1268 "cluster", 1269 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1270 ), 1271 TokenType.DISTRIBUTE_BY: lambda self: ( 1272 "distribute", 1273 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1274 ), 1275 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1276 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1277 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1278 } 1279 1280 SET_PARSERS = { 1281 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1282 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1283 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1284 "TRANSACTION": lambda self: self._parse_set_transaction(), 1285 } 1286 1287 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1288 1289 TYPE_LITERAL_PARSERS = { 1290 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1291 } 1292 1293 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1294 1295 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1296 1297 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1298 1299 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1300 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1301 "ISOLATION": ( 1302 ("LEVEL", "REPEATABLE", "READ"), 1303 ("LEVEL", "READ", "COMMITTED"), 1304 ("LEVEL", "READ", "UNCOMITTED"), 1305 ("LEVEL", "SERIALIZABLE"), 1306 ), 1307 "READ": ("WRITE", "ONLY"), 1308 } 1309 1310 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1311 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1312 ) 1313 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1314 1315 CREATE_SEQUENCE: OPTIONS_TYPE = { 1316 "SCALE": ("EXTEND", "NOEXTEND"), 1317 "SHARD": ("EXTEND", "NOEXTEND"), 1318 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1319 **dict.fromkeys( 1320 ( 1321 "SESSION", 1322 "GLOBAL", 1323 "KEEP", 1324 "NOKEEP", 1325 "ORDER", 1326 "NOORDER", 1327 "NOCACHE", 1328 "CYCLE", 1329 "NOCYCLE", 1330 "NOMINVALUE", 1331 "NOMAXVALUE", 1332 "NOSCALE", 1333 "NOSHARD", 1334 ), 1335 tuple(), 1336 ), 1337 } 1338 1339 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1340 1341 USABLES: OPTIONS_TYPE = dict.fromkeys( 1342 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1343 ) 1344 1345 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1346 1347 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1348 "TYPE": ("EVOLUTION",), 1349 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1350 } 1351 1352 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1353 1354 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1355 1356 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1357 "NOT": ("ENFORCED",), 1358 "MATCH": ( 1359 "FULL", 1360 "PARTIAL", 1361 "SIMPLE", 1362 ), 1363 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1364 "USING": ( 1365 "BTREE", 1366 "HASH", 1367 ), 1368 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1369 } 1370 1371 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1372 "NO": ("OTHERS",), 1373 "CURRENT": ("ROW",), 1374 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1375 } 1376 1377 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1378 1379 CLONE_KEYWORDS = {"CLONE", "COPY"} 1380 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1381 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1382 1383 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1384 1385 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1386 1387 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1388 1389 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1390 1391 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1392 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1393 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1394 1395 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1396 1397 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1398 1399 ADD_CONSTRAINT_TOKENS = { 1400 TokenType.CONSTRAINT, 1401 TokenType.FOREIGN_KEY, 1402 TokenType.INDEX, 1403 TokenType.KEY, 1404 TokenType.PRIMARY_KEY, 1405 TokenType.UNIQUE, 1406 } 1407 1408 DISTINCT_TOKENS = {TokenType.DISTINCT} 1409 1410 NULL_TOKENS = {TokenType.NULL} 1411 1412 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1413 1414 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1415 1416 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1417 1418 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1419 1420 ODBC_DATETIME_LITERALS = { 1421 "d": exp.Date, 1422 "t": exp.Time, 1423 "ts": exp.Timestamp, 1424 } 1425 1426 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1427 1428 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1429 1430 # The style options for the DESCRIBE statement 1431 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1432 1433 # The style options for the ANALYZE statement 1434 ANALYZE_STYLES = { 1435 "BUFFER_USAGE_LIMIT", 1436 "FULL", 1437 "LOCAL", 1438 "NO_WRITE_TO_BINLOG", 1439 "SAMPLE", 1440 "SKIP_LOCKED", 1441 "VERBOSE", 1442 } 1443 1444 ANALYZE_EXPRESSION_PARSERS = { 1445 "ALL": lambda self: self._parse_analyze_columns(), 1446 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1447 "DELETE": lambda self: self._parse_analyze_delete(), 1448 "DROP": lambda self: self._parse_analyze_histogram(), 1449 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1450 "LIST": lambda self: self._parse_analyze_list(), 1451 "PREDICATE": lambda self: self._parse_analyze_columns(), 1452 "UPDATE": lambda self: self._parse_analyze_histogram(), 1453 "VALIDATE": lambda self: self._parse_analyze_validate(), 1454 } 1455 1456 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1457 1458 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1459 1460 OPERATION_MODIFIERS: t.Set[str] = set() 1461 1462 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1463 1464 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1465 1466 STRICT_CAST = True 1467 1468 PREFIXED_PIVOT_COLUMNS = False 1469 IDENTIFY_PIVOT_STRINGS = False 1470 1471 LOG_DEFAULTS_TO_LN = False 1472 1473 # Whether the table sample clause expects CSV syntax 1474 TABLESAMPLE_CSV = False 1475 1476 # The default method used for table sampling 1477 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1478 1479 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1480 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1481 1482 # Whether the TRIM function expects the characters to trim as its first argument 1483 TRIM_PATTERN_FIRST = False 1484 1485 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1486 STRING_ALIASES = False 1487 1488 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1489 MODIFIERS_ATTACHED_TO_SET_OP = True 1490 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1491 1492 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1493 NO_PAREN_IF_COMMANDS = True 1494 1495 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1496 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1497 1498 # Whether the `:` operator is used to extract a value from a VARIANT column 1499 COLON_IS_VARIANT_EXTRACT = False 1500 1501 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1502 # If this is True and '(' is not found, the keyword will be treated as an identifier 1503 VALUES_FOLLOWED_BY_PAREN = True 1504 1505 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1506 SUPPORTS_IMPLICIT_UNNEST = False 1507 1508 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1509 INTERVAL_SPANS = True 1510 1511 # Whether a PARTITION clause can follow a table reference 1512 SUPPORTS_PARTITION_SELECTION = False 1513 1514 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1515 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1516 1517 # Whether the 'AS' keyword is optional in the CTE definition syntax 1518 OPTIONAL_ALIAS_TOKEN_CTE = True 1519 1520 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1521 ALTER_RENAME_REQUIRES_COLUMN = True 1522 1523 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1524 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1525 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1526 # as BigQuery, where all joins have the same precedence. 1527 JOINS_HAVE_EQUAL_PRECEDENCE = False 1528 1529 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1530 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1531 1532 __slots__ = ( 1533 "error_level", 1534 "error_message_context", 1535 "max_errors", 1536 "dialect", 1537 "sql", 1538 "errors", 1539 "_tokens", 1540 "_index", 1541 "_curr", 1542 "_next", 1543 "_prev", 1544 "_prev_comments", 1545 "_pipe_cte_counter", 1546 ) 1547 1548 # Autofilled 1549 SHOW_TRIE: t.Dict = {} 1550 SET_TRIE: t.Dict = {} 1551 1552 def __init__( 1553 self, 1554 error_level: t.Optional[ErrorLevel] = None, 1555 error_message_context: int = 100, 1556 max_errors: int = 3, 1557 dialect: DialectType = None, 1558 ): 1559 from sqlglot.dialects import Dialect 1560 1561 self.error_level = error_level or ErrorLevel.IMMEDIATE 1562 self.error_message_context = error_message_context 1563 self.max_errors = max_errors 1564 self.dialect = Dialect.get_or_raise(dialect) 1565 self.reset() 1566 1567 def reset(self): 1568 self.sql = "" 1569 self.errors = [] 1570 self._tokens = [] 1571 self._index = 0 1572 self._curr = None 1573 self._next = None 1574 self._prev = None 1575 self._prev_comments = None 1576 self._pipe_cte_counter = 0 1577 1578 def parse( 1579 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1580 ) -> t.List[t.Optional[exp.Expression]]: 1581 """ 1582 Parses a list of tokens and returns a list of syntax trees, one tree 1583 per parsed SQL statement. 1584 1585 Args: 1586 raw_tokens: The list of tokens. 1587 sql: The original SQL string, used to produce helpful debug messages. 1588 1589 Returns: 1590 The list of the produced syntax trees. 1591 """ 1592 return self._parse( 1593 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1594 ) 1595 1596 def parse_into( 1597 self, 1598 expression_types: exp.IntoType, 1599 raw_tokens: t.List[Token], 1600 sql: t.Optional[str] = None, 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens into a given Expression type. If a collection of Expression 1604 types is given instead, this method will try to parse the token list into each one 1605 of them, stopping at the first for which the parsing succeeds. 1606 1607 Args: 1608 expression_types: The expression type(s) to try and parse the token list into. 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The target Expression. 1614 """ 1615 errors = [] 1616 for expression_type in ensure_list(expression_types): 1617 parser = self.EXPRESSION_PARSERS.get(expression_type) 1618 if not parser: 1619 raise TypeError(f"No parser registered for {expression_type}") 1620 1621 try: 1622 return self._parse(parser, raw_tokens, sql) 1623 except ParseError as e: 1624 e.errors[0]["into_expression"] = expression_type 1625 errors.append(e) 1626 1627 raise ParseError( 1628 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1629 errors=merge_errors(errors), 1630 ) from errors[-1] 1631 1632 def _parse( 1633 self, 1634 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1635 raw_tokens: t.List[Token], 1636 sql: t.Optional[str] = None, 1637 ) -> t.List[t.Optional[exp.Expression]]: 1638 self.reset() 1639 self.sql = sql or "" 1640 1641 total = len(raw_tokens) 1642 chunks: t.List[t.List[Token]] = [[]] 1643 1644 for i, token in enumerate(raw_tokens): 1645 if token.token_type == TokenType.SEMICOLON: 1646 if token.comments: 1647 chunks.append([token]) 1648 1649 if i < total - 1: 1650 chunks.append([]) 1651 else: 1652 chunks[-1].append(token) 1653 1654 expressions = [] 1655 1656 for tokens in chunks: 1657 self._index = -1 1658 self._tokens = tokens 1659 self._advance() 1660 1661 expressions.append(parse_method(self)) 1662 1663 if self._index < len(self._tokens): 1664 self.raise_error("Invalid expression / Unexpected token") 1665 1666 self.check_errors() 1667 1668 return expressions 1669 1670 def check_errors(self) -> None: 1671 """Logs or raises any found errors, depending on the chosen error level setting.""" 1672 if self.error_level == ErrorLevel.WARN: 1673 for error in self.errors: 1674 logger.error(str(error)) 1675 elif self.error_level == ErrorLevel.RAISE and self.errors: 1676 raise ParseError( 1677 concat_messages(self.errors, self.max_errors), 1678 errors=merge_errors(self.errors), 1679 ) 1680 1681 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1682 """ 1683 Appends an error in the list of recorded errors or raises it, depending on the chosen 1684 error level setting. 1685 """ 1686 token = token or self._curr or self._prev or Token.string("") 1687 start = token.start 1688 end = token.end + 1 1689 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1690 highlight = self.sql[start:end] 1691 end_context = self.sql[end : end + self.error_message_context] 1692 1693 error = ParseError.new( 1694 f"{message}. Line {token.line}, Col: {token.col}.\n" 1695 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1696 description=message, 1697 line=token.line, 1698 col=token.col, 1699 start_context=start_context, 1700 highlight=highlight, 1701 end_context=end_context, 1702 ) 1703 1704 if self.error_level == ErrorLevel.IMMEDIATE: 1705 raise error 1706 1707 self.errors.append(error) 1708 1709 def expression( 1710 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1711 ) -> E: 1712 """ 1713 Creates a new, validated Expression. 1714 1715 Args: 1716 exp_class: The expression class to instantiate. 1717 comments: An optional list of comments to attach to the expression. 1718 kwargs: The arguments to set for the expression along with their respective values. 1719 1720 Returns: 1721 The target expression. 1722 """ 1723 instance = exp_class(**kwargs) 1724 instance.add_comments(comments) if comments else self._add_comments(instance) 1725 return self.validate_expression(instance) 1726 1727 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1728 if expression and self._prev_comments: 1729 expression.add_comments(self._prev_comments) 1730 self._prev_comments = None 1731 1732 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1733 """ 1734 Validates an Expression, making sure that all its mandatory arguments are set. 1735 1736 Args: 1737 expression: The expression to validate. 1738 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1739 1740 Returns: 1741 The validated expression. 1742 """ 1743 if self.error_level != ErrorLevel.IGNORE: 1744 for error_message in expression.error_messages(args): 1745 self.raise_error(error_message) 1746 1747 return expression 1748 1749 def _find_sql(self, start: Token, end: Token) -> str: 1750 return self.sql[start.start : end.end + 1] 1751 1752 def _is_connected(self) -> bool: 1753 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1754 1755 def _advance(self, times: int = 1) -> None: 1756 self._index += times 1757 self._curr = seq_get(self._tokens, self._index) 1758 self._next = seq_get(self._tokens, self._index + 1) 1759 1760 if self._index > 0: 1761 self._prev = self._tokens[self._index - 1] 1762 self._prev_comments = self._prev.comments 1763 else: 1764 self._prev = None 1765 self._prev_comments = None 1766 1767 def _retreat(self, index: int) -> None: 1768 if index != self._index: 1769 self._advance(index - self._index) 1770 1771 def _warn_unsupported(self) -> None: 1772 if len(self._tokens) <= 1: 1773 return 1774 1775 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1776 # interested in emitting a warning for the one being currently processed. 1777 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1778 1779 logger.warning( 1780 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1781 ) 1782 1783 def _parse_command(self) -> exp.Command: 1784 self._warn_unsupported() 1785 return self.expression( 1786 exp.Command, 1787 comments=self._prev_comments, 1788 this=self._prev.text.upper(), 1789 expression=self._parse_string(), 1790 ) 1791 1792 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1793 """ 1794 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1795 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1796 solve this by setting & resetting the parser state accordingly 1797 """ 1798 index = self._index 1799 error_level = self.error_level 1800 1801 self.error_level = ErrorLevel.IMMEDIATE 1802 try: 1803 this = parse_method() 1804 except ParseError: 1805 this = None 1806 finally: 1807 if not this or retreat: 1808 self._retreat(index) 1809 self.error_level = error_level 1810 1811 return this 1812 1813 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1814 start = self._prev 1815 exists = self._parse_exists() if allow_exists else None 1816 1817 self._match(TokenType.ON) 1818 1819 materialized = self._match_text_seq("MATERIALIZED") 1820 kind = self._match_set(self.CREATABLES) and self._prev 1821 if not kind: 1822 return self._parse_as_command(start) 1823 1824 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1825 this = self._parse_user_defined_function(kind=kind.token_type) 1826 elif kind.token_type == TokenType.TABLE: 1827 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1828 elif kind.token_type == TokenType.COLUMN: 1829 this = self._parse_column() 1830 else: 1831 this = self._parse_id_var() 1832 1833 self._match(TokenType.IS) 1834 1835 return self.expression( 1836 exp.Comment, 1837 this=this, 1838 kind=kind.text, 1839 expression=self._parse_string(), 1840 exists=exists, 1841 materialized=materialized, 1842 ) 1843 1844 def _parse_to_table( 1845 self, 1846 ) -> exp.ToTableProperty: 1847 table = self._parse_table_parts(schema=True) 1848 return self.expression(exp.ToTableProperty, this=table) 1849 1850 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1851 def _parse_ttl(self) -> exp.Expression: 1852 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1853 this = self._parse_bitwise() 1854 1855 if self._match_text_seq("DELETE"): 1856 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1857 if self._match_text_seq("RECOMPRESS"): 1858 return self.expression( 1859 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1860 ) 1861 if self._match_text_seq("TO", "DISK"): 1862 return self.expression( 1863 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1864 ) 1865 if self._match_text_seq("TO", "VOLUME"): 1866 return self.expression( 1867 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1868 ) 1869 1870 return this 1871 1872 expressions = self._parse_csv(_parse_ttl_action) 1873 where = self._parse_where() 1874 group = self._parse_group() 1875 1876 aggregates = None 1877 if group and self._match(TokenType.SET): 1878 aggregates = self._parse_csv(self._parse_set_item) 1879 1880 return self.expression( 1881 exp.MergeTreeTTL, 1882 expressions=expressions, 1883 where=where, 1884 group=group, 1885 aggregates=aggregates, 1886 ) 1887 1888 def _parse_statement(self) -> t.Optional[exp.Expression]: 1889 if self._curr is None: 1890 return None 1891 1892 if self._match_set(self.STATEMENT_PARSERS): 1893 comments = self._prev_comments 1894 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1895 stmt.add_comments(comments, prepend=True) 1896 return stmt 1897 1898 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1899 return self._parse_command() 1900 1901 expression = self._parse_expression() 1902 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1903 return self._parse_query_modifiers(expression) 1904 1905 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1906 start = self._prev 1907 temporary = self._match(TokenType.TEMPORARY) 1908 materialized = self._match_text_seq("MATERIALIZED") 1909 1910 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1911 if not kind: 1912 return self._parse_as_command(start) 1913 1914 concurrently = self._match_text_seq("CONCURRENTLY") 1915 if_exists = exists or self._parse_exists() 1916 1917 if kind == "COLUMN": 1918 this = self._parse_column() 1919 else: 1920 this = self._parse_table_parts( 1921 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1922 ) 1923 1924 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1925 1926 if self._match(TokenType.L_PAREN, advance=False): 1927 expressions = self._parse_wrapped_csv(self._parse_types) 1928 else: 1929 expressions = None 1930 1931 return self.expression( 1932 exp.Drop, 1933 exists=if_exists, 1934 this=this, 1935 expressions=expressions, 1936 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1937 temporary=temporary, 1938 materialized=materialized, 1939 cascade=self._match_text_seq("CASCADE"), 1940 constraints=self._match_text_seq("CONSTRAINTS"), 1941 purge=self._match_text_seq("PURGE"), 1942 cluster=cluster, 1943 concurrently=concurrently, 1944 ) 1945 1946 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1947 return ( 1948 self._match_text_seq("IF") 1949 and (not not_ or self._match(TokenType.NOT)) 1950 and self._match(TokenType.EXISTS) 1951 ) 1952 1953 def _parse_create(self) -> exp.Create | exp.Command: 1954 # Note: this can't be None because we've matched a statement parser 1955 start = self._prev 1956 1957 replace = ( 1958 start.token_type == TokenType.REPLACE 1959 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1960 or self._match_pair(TokenType.OR, TokenType.ALTER) 1961 ) 1962 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1963 1964 unique = self._match(TokenType.UNIQUE) 1965 1966 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1967 clustered = True 1968 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1969 "COLUMNSTORE" 1970 ): 1971 clustered = False 1972 else: 1973 clustered = None 1974 1975 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1976 self._advance() 1977 1978 properties = None 1979 create_token = self._match_set(self.CREATABLES) and self._prev 1980 1981 if not create_token: 1982 # exp.Properties.Location.POST_CREATE 1983 properties = self._parse_properties() 1984 create_token = self._match_set(self.CREATABLES) and self._prev 1985 1986 if not properties or not create_token: 1987 return self._parse_as_command(start) 1988 1989 concurrently = self._match_text_seq("CONCURRENTLY") 1990 exists = self._parse_exists(not_=True) 1991 this = None 1992 expression: t.Optional[exp.Expression] = None 1993 indexes = None 1994 no_schema_binding = None 1995 begin = None 1996 end = None 1997 clone = None 1998 1999 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2000 nonlocal properties 2001 if properties and temp_props: 2002 properties.expressions.extend(temp_props.expressions) 2003 elif temp_props: 2004 properties = temp_props 2005 2006 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2007 this = self._parse_user_defined_function(kind=create_token.token_type) 2008 2009 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2010 extend_props(self._parse_properties()) 2011 2012 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2013 extend_props(self._parse_properties()) 2014 2015 if not expression: 2016 if self._match(TokenType.COMMAND): 2017 expression = self._parse_as_command(self._prev) 2018 else: 2019 begin = self._match(TokenType.BEGIN) 2020 return_ = self._match_text_seq("RETURN") 2021 2022 if self._match(TokenType.STRING, advance=False): 2023 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2024 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2025 expression = self._parse_string() 2026 extend_props(self._parse_properties()) 2027 else: 2028 expression = self._parse_user_defined_function_expression() 2029 2030 end = self._match_text_seq("END") 2031 2032 if return_: 2033 expression = self.expression(exp.Return, this=expression) 2034 elif create_token.token_type == TokenType.INDEX: 2035 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2036 if not self._match(TokenType.ON): 2037 index = self._parse_id_var() 2038 anonymous = False 2039 else: 2040 index = None 2041 anonymous = True 2042 2043 this = self._parse_index(index=index, anonymous=anonymous) 2044 elif create_token.token_type in self.DB_CREATABLES: 2045 table_parts = self._parse_table_parts( 2046 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2047 ) 2048 2049 # exp.Properties.Location.POST_NAME 2050 self._match(TokenType.COMMA) 2051 extend_props(self._parse_properties(before=True)) 2052 2053 this = self._parse_schema(this=table_parts) 2054 2055 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2056 extend_props(self._parse_properties()) 2057 2058 has_alias = self._match(TokenType.ALIAS) 2059 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2060 # exp.Properties.Location.POST_ALIAS 2061 extend_props(self._parse_properties()) 2062 2063 if create_token.token_type == TokenType.SEQUENCE: 2064 expression = self._parse_types() 2065 extend_props(self._parse_properties()) 2066 else: 2067 expression = self._parse_ddl_select() 2068 2069 # Some dialects also support using a table as an alias instead of a SELECT. 2070 # Here we fallback to this as an alternative. 2071 if not expression and has_alias: 2072 expression = self._try_parse(self._parse_table_parts) 2073 2074 if create_token.token_type == TokenType.TABLE: 2075 # exp.Properties.Location.POST_EXPRESSION 2076 extend_props(self._parse_properties()) 2077 2078 indexes = [] 2079 while True: 2080 index = self._parse_index() 2081 2082 # exp.Properties.Location.POST_INDEX 2083 extend_props(self._parse_properties()) 2084 if not index: 2085 break 2086 else: 2087 self._match(TokenType.COMMA) 2088 indexes.append(index) 2089 elif create_token.token_type == TokenType.VIEW: 2090 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2091 no_schema_binding = True 2092 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2093 extend_props(self._parse_properties()) 2094 2095 shallow = self._match_text_seq("SHALLOW") 2096 2097 if self._match_texts(self.CLONE_KEYWORDS): 2098 copy = self._prev.text.lower() == "copy" 2099 clone = self.expression( 2100 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2101 ) 2102 2103 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2104 return self._parse_as_command(start) 2105 2106 create_kind_text = create_token.text.upper() 2107 return self.expression( 2108 exp.Create, 2109 this=this, 2110 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2111 replace=replace, 2112 refresh=refresh, 2113 unique=unique, 2114 expression=expression, 2115 exists=exists, 2116 properties=properties, 2117 indexes=indexes, 2118 no_schema_binding=no_schema_binding, 2119 begin=begin, 2120 end=end, 2121 clone=clone, 2122 concurrently=concurrently, 2123 clustered=clustered, 2124 ) 2125 2126 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2127 seq = exp.SequenceProperties() 2128 2129 options = [] 2130 index = self._index 2131 2132 while self._curr: 2133 self._match(TokenType.COMMA) 2134 if self._match_text_seq("INCREMENT"): 2135 self._match_text_seq("BY") 2136 self._match_text_seq("=") 2137 seq.set("increment", self._parse_term()) 2138 elif self._match_text_seq("MINVALUE"): 2139 seq.set("minvalue", self._parse_term()) 2140 elif self._match_text_seq("MAXVALUE"): 2141 seq.set("maxvalue", self._parse_term()) 2142 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2143 self._match_text_seq("=") 2144 seq.set("start", self._parse_term()) 2145 elif self._match_text_seq("CACHE"): 2146 # T-SQL allows empty CACHE which is initialized dynamically 2147 seq.set("cache", self._parse_number() or True) 2148 elif self._match_text_seq("OWNED", "BY"): 2149 # "OWNED BY NONE" is the default 2150 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2151 else: 2152 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2153 if opt: 2154 options.append(opt) 2155 else: 2156 break 2157 2158 seq.set("options", options if options else None) 2159 return None if self._index == index else seq 2160 2161 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2162 # only used for teradata currently 2163 self._match(TokenType.COMMA) 2164 2165 kwargs = { 2166 "no": self._match_text_seq("NO"), 2167 "dual": self._match_text_seq("DUAL"), 2168 "before": self._match_text_seq("BEFORE"), 2169 "default": self._match_text_seq("DEFAULT"), 2170 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2171 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2172 "after": self._match_text_seq("AFTER"), 2173 "minimum": self._match_texts(("MIN", "MINIMUM")), 2174 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2175 } 2176 2177 if self._match_texts(self.PROPERTY_PARSERS): 2178 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2179 try: 2180 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2181 except TypeError: 2182 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2183 2184 return None 2185 2186 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2187 return self._parse_wrapped_csv(self._parse_property) 2188 2189 def _parse_property(self) -> t.Optional[exp.Expression]: 2190 if self._match_texts(self.PROPERTY_PARSERS): 2191 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2192 2193 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2194 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2195 2196 if self._match_text_seq("COMPOUND", "SORTKEY"): 2197 return self._parse_sortkey(compound=True) 2198 2199 if self._match_text_seq("SQL", "SECURITY"): 2200 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2201 2202 index = self._index 2203 key = self._parse_column() 2204 2205 if not self._match(TokenType.EQ): 2206 self._retreat(index) 2207 return self._parse_sequence_properties() 2208 2209 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2210 if isinstance(key, exp.Column): 2211 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2212 2213 value = self._parse_bitwise() or self._parse_var(any_token=True) 2214 2215 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2216 if isinstance(value, exp.Column): 2217 value = exp.var(value.name) 2218 2219 return self.expression(exp.Property, this=key, value=value) 2220 2221 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2222 if self._match_text_seq("BY"): 2223 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2224 2225 self._match(TokenType.ALIAS) 2226 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2227 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2228 2229 return self.expression( 2230 exp.FileFormatProperty, 2231 this=( 2232 self.expression( 2233 exp.InputOutputFormat, 2234 input_format=input_format, 2235 output_format=output_format, 2236 ) 2237 if input_format or output_format 2238 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2239 ), 2240 ) 2241 2242 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2243 field = self._parse_field() 2244 if isinstance(field, exp.Identifier) and not field.quoted: 2245 field = exp.var(field) 2246 2247 return field 2248 2249 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2250 self._match(TokenType.EQ) 2251 self._match(TokenType.ALIAS) 2252 2253 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2254 2255 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2256 properties = [] 2257 while True: 2258 if before: 2259 prop = self._parse_property_before() 2260 else: 2261 prop = self._parse_property() 2262 if not prop: 2263 break 2264 for p in ensure_list(prop): 2265 properties.append(p) 2266 2267 if properties: 2268 return self.expression(exp.Properties, expressions=properties) 2269 2270 return None 2271 2272 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2273 return self.expression( 2274 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2275 ) 2276 2277 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2278 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2279 security_specifier = self._prev.text.upper() 2280 return self.expression(exp.SecurityProperty, this=security_specifier) 2281 return None 2282 2283 def _parse_settings_property(self) -> exp.SettingsProperty: 2284 return self.expression( 2285 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2286 ) 2287 2288 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2289 if self._index >= 2: 2290 pre_volatile_token = self._tokens[self._index - 2] 2291 else: 2292 pre_volatile_token = None 2293 2294 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2295 return exp.VolatileProperty() 2296 2297 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2298 2299 def _parse_retention_period(self) -> exp.Var: 2300 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2301 number = self._parse_number() 2302 number_str = f"{number} " if number else "" 2303 unit = self._parse_var(any_token=True) 2304 return exp.var(f"{number_str}{unit}") 2305 2306 def _parse_system_versioning_property( 2307 self, with_: bool = False 2308 ) -> exp.WithSystemVersioningProperty: 2309 self._match(TokenType.EQ) 2310 prop = self.expression( 2311 exp.WithSystemVersioningProperty, 2312 **{ # type: ignore 2313 "on": True, 2314 "with": with_, 2315 }, 2316 ) 2317 2318 if self._match_text_seq("OFF"): 2319 prop.set("on", False) 2320 return prop 2321 2322 self._match(TokenType.ON) 2323 if self._match(TokenType.L_PAREN): 2324 while self._curr and not self._match(TokenType.R_PAREN): 2325 if self._match_text_seq("HISTORY_TABLE", "="): 2326 prop.set("this", self._parse_table_parts()) 2327 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2328 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2329 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2330 prop.set("retention_period", self._parse_retention_period()) 2331 2332 self._match(TokenType.COMMA) 2333 2334 return prop 2335 2336 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2337 self._match(TokenType.EQ) 2338 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2339 prop = self.expression(exp.DataDeletionProperty, on=on) 2340 2341 if self._match(TokenType.L_PAREN): 2342 while self._curr and not self._match(TokenType.R_PAREN): 2343 if self._match_text_seq("FILTER_COLUMN", "="): 2344 prop.set("filter_column", self._parse_column()) 2345 elif self._match_text_seq("RETENTION_PERIOD", "="): 2346 prop.set("retention_period", self._parse_retention_period()) 2347 2348 self._match(TokenType.COMMA) 2349 2350 return prop 2351 2352 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2353 kind = "HASH" 2354 expressions: t.Optional[t.List[exp.Expression]] = None 2355 if self._match_text_seq("BY", "HASH"): 2356 expressions = self._parse_wrapped_csv(self._parse_id_var) 2357 elif self._match_text_seq("BY", "RANDOM"): 2358 kind = "RANDOM" 2359 2360 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2361 buckets: t.Optional[exp.Expression] = None 2362 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2363 buckets = self._parse_number() 2364 2365 return self.expression( 2366 exp.DistributedByProperty, 2367 expressions=expressions, 2368 kind=kind, 2369 buckets=buckets, 2370 order=self._parse_order(), 2371 ) 2372 2373 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2374 self._match_text_seq("KEY") 2375 expressions = self._parse_wrapped_id_vars() 2376 return self.expression(expr_type, expressions=expressions) 2377 2378 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2379 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2380 prop = self._parse_system_versioning_property(with_=True) 2381 self._match_r_paren() 2382 return prop 2383 2384 if self._match(TokenType.L_PAREN, advance=False): 2385 return self._parse_wrapped_properties() 2386 2387 if self._match_text_seq("JOURNAL"): 2388 return self._parse_withjournaltable() 2389 2390 if self._match_texts(self.VIEW_ATTRIBUTES): 2391 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2392 2393 if self._match_text_seq("DATA"): 2394 return self._parse_withdata(no=False) 2395 elif self._match_text_seq("NO", "DATA"): 2396 return self._parse_withdata(no=True) 2397 2398 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2399 return self._parse_serde_properties(with_=True) 2400 2401 if self._match(TokenType.SCHEMA): 2402 return self.expression( 2403 exp.WithSchemaBindingProperty, 2404 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2405 ) 2406 2407 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2408 return self.expression( 2409 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2410 ) 2411 2412 if not self._next: 2413 return None 2414 2415 return self._parse_withisolatedloading() 2416 2417 def _parse_procedure_option(self) -> exp.Expression | None: 2418 if self._match_text_seq("EXECUTE", "AS"): 2419 return self.expression( 2420 exp.ExecuteAsProperty, 2421 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2422 or self._parse_string(), 2423 ) 2424 2425 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2426 2427 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2428 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2429 self._match(TokenType.EQ) 2430 2431 user = self._parse_id_var() 2432 self._match(TokenType.PARAMETER) 2433 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2434 2435 if not user or not host: 2436 return None 2437 2438 return exp.DefinerProperty(this=f"{user}@{host}") 2439 2440 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2441 self._match(TokenType.TABLE) 2442 self._match(TokenType.EQ) 2443 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2444 2445 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2446 return self.expression(exp.LogProperty, no=no) 2447 2448 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2449 return self.expression(exp.JournalProperty, **kwargs) 2450 2451 def _parse_checksum(self) -> exp.ChecksumProperty: 2452 self._match(TokenType.EQ) 2453 2454 on = None 2455 if self._match(TokenType.ON): 2456 on = True 2457 elif self._match_text_seq("OFF"): 2458 on = False 2459 2460 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2461 2462 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2463 return self.expression( 2464 exp.Cluster, 2465 expressions=( 2466 self._parse_wrapped_csv(self._parse_ordered) 2467 if wrapped 2468 else self._parse_csv(self._parse_ordered) 2469 ), 2470 ) 2471 2472 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2473 self._match_text_seq("BY") 2474 2475 self._match_l_paren() 2476 expressions = self._parse_csv(self._parse_column) 2477 self._match_r_paren() 2478 2479 if self._match_text_seq("SORTED", "BY"): 2480 self._match_l_paren() 2481 sorted_by = self._parse_csv(self._parse_ordered) 2482 self._match_r_paren() 2483 else: 2484 sorted_by = None 2485 2486 self._match(TokenType.INTO) 2487 buckets = self._parse_number() 2488 self._match_text_seq("BUCKETS") 2489 2490 return self.expression( 2491 exp.ClusteredByProperty, 2492 expressions=expressions, 2493 sorted_by=sorted_by, 2494 buckets=buckets, 2495 ) 2496 2497 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2498 if not self._match_text_seq("GRANTS"): 2499 self._retreat(self._index - 1) 2500 return None 2501 2502 return self.expression(exp.CopyGrantsProperty) 2503 2504 def _parse_freespace(self) -> exp.FreespaceProperty: 2505 self._match(TokenType.EQ) 2506 return self.expression( 2507 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2508 ) 2509 2510 def _parse_mergeblockratio( 2511 self, no: bool = False, default: bool = False 2512 ) -> exp.MergeBlockRatioProperty: 2513 if self._match(TokenType.EQ): 2514 return self.expression( 2515 exp.MergeBlockRatioProperty, 2516 this=self._parse_number(), 2517 percent=self._match(TokenType.PERCENT), 2518 ) 2519 2520 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2521 2522 def _parse_datablocksize( 2523 self, 2524 default: t.Optional[bool] = None, 2525 minimum: t.Optional[bool] = None, 2526 maximum: t.Optional[bool] = None, 2527 ) -> exp.DataBlocksizeProperty: 2528 self._match(TokenType.EQ) 2529 size = self._parse_number() 2530 2531 units = None 2532 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2533 units = self._prev.text 2534 2535 return self.expression( 2536 exp.DataBlocksizeProperty, 2537 size=size, 2538 units=units, 2539 default=default, 2540 minimum=minimum, 2541 maximum=maximum, 2542 ) 2543 2544 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2545 self._match(TokenType.EQ) 2546 always = self._match_text_seq("ALWAYS") 2547 manual = self._match_text_seq("MANUAL") 2548 never = self._match_text_seq("NEVER") 2549 default = self._match_text_seq("DEFAULT") 2550 2551 autotemp = None 2552 if self._match_text_seq("AUTOTEMP"): 2553 autotemp = self._parse_schema() 2554 2555 return self.expression( 2556 exp.BlockCompressionProperty, 2557 always=always, 2558 manual=manual, 2559 never=never, 2560 default=default, 2561 autotemp=autotemp, 2562 ) 2563 2564 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2565 index = self._index 2566 no = self._match_text_seq("NO") 2567 concurrent = self._match_text_seq("CONCURRENT") 2568 2569 if not self._match_text_seq("ISOLATED", "LOADING"): 2570 self._retreat(index) 2571 return None 2572 2573 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2574 return self.expression( 2575 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2576 ) 2577 2578 def _parse_locking(self) -> exp.LockingProperty: 2579 if self._match(TokenType.TABLE): 2580 kind = "TABLE" 2581 elif self._match(TokenType.VIEW): 2582 kind = "VIEW" 2583 elif self._match(TokenType.ROW): 2584 kind = "ROW" 2585 elif self._match_text_seq("DATABASE"): 2586 kind = "DATABASE" 2587 else: 2588 kind = None 2589 2590 if kind in ("DATABASE", "TABLE", "VIEW"): 2591 this = self._parse_table_parts() 2592 else: 2593 this = None 2594 2595 if self._match(TokenType.FOR): 2596 for_or_in = "FOR" 2597 elif self._match(TokenType.IN): 2598 for_or_in = "IN" 2599 else: 2600 for_or_in = None 2601 2602 if self._match_text_seq("ACCESS"): 2603 lock_type = "ACCESS" 2604 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2605 lock_type = "EXCLUSIVE" 2606 elif self._match_text_seq("SHARE"): 2607 lock_type = "SHARE" 2608 elif self._match_text_seq("READ"): 2609 lock_type = "READ" 2610 elif self._match_text_seq("WRITE"): 2611 lock_type = "WRITE" 2612 elif self._match_text_seq("CHECKSUM"): 2613 lock_type = "CHECKSUM" 2614 else: 2615 lock_type = None 2616 2617 override = self._match_text_seq("OVERRIDE") 2618 2619 return self.expression( 2620 exp.LockingProperty, 2621 this=this, 2622 kind=kind, 2623 for_or_in=for_or_in, 2624 lock_type=lock_type, 2625 override=override, 2626 ) 2627 2628 def _parse_partition_by(self) -> t.List[exp.Expression]: 2629 if self._match(TokenType.PARTITION_BY): 2630 return self._parse_csv(self._parse_assignment) 2631 return [] 2632 2633 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2634 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2635 if self._match_text_seq("MINVALUE"): 2636 return exp.var("MINVALUE") 2637 if self._match_text_seq("MAXVALUE"): 2638 return exp.var("MAXVALUE") 2639 return self._parse_bitwise() 2640 2641 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2642 expression = None 2643 from_expressions = None 2644 to_expressions = None 2645 2646 if self._match(TokenType.IN): 2647 this = self._parse_wrapped_csv(self._parse_bitwise) 2648 elif self._match(TokenType.FROM): 2649 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2650 self._match_text_seq("TO") 2651 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2652 elif self._match_text_seq("WITH", "(", "MODULUS"): 2653 this = self._parse_number() 2654 self._match_text_seq(",", "REMAINDER") 2655 expression = self._parse_number() 2656 self._match_r_paren() 2657 else: 2658 self.raise_error("Failed to parse partition bound spec.") 2659 2660 return self.expression( 2661 exp.PartitionBoundSpec, 2662 this=this, 2663 expression=expression, 2664 from_expressions=from_expressions, 2665 to_expressions=to_expressions, 2666 ) 2667 2668 # https://www.postgresql.org/docs/current/sql-createtable.html 2669 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2670 if not self._match_text_seq("OF"): 2671 self._retreat(self._index - 1) 2672 return None 2673 2674 this = self._parse_table(schema=True) 2675 2676 if self._match(TokenType.DEFAULT): 2677 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2678 elif self._match_text_seq("FOR", "VALUES"): 2679 expression = self._parse_partition_bound_spec() 2680 else: 2681 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2682 2683 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2684 2685 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2686 self._match(TokenType.EQ) 2687 return self.expression( 2688 exp.PartitionedByProperty, 2689 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2690 ) 2691 2692 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2693 if self._match_text_seq("AND", "STATISTICS"): 2694 statistics = True 2695 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2696 statistics = False 2697 else: 2698 statistics = None 2699 2700 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2701 2702 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2703 if self._match_text_seq("SQL"): 2704 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2705 return None 2706 2707 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2708 if self._match_text_seq("SQL", "DATA"): 2709 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2710 return None 2711 2712 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2713 if self._match_text_seq("PRIMARY", "INDEX"): 2714 return exp.NoPrimaryIndexProperty() 2715 if self._match_text_seq("SQL"): 2716 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2717 return None 2718 2719 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2720 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2721 return exp.OnCommitProperty() 2722 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2723 return exp.OnCommitProperty(delete=True) 2724 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2725 2726 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2727 if self._match_text_seq("SQL", "DATA"): 2728 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2729 return None 2730 2731 def _parse_distkey(self) -> exp.DistKeyProperty: 2732 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2733 2734 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2735 table = self._parse_table(schema=True) 2736 2737 options = [] 2738 while self._match_texts(("INCLUDING", "EXCLUDING")): 2739 this = self._prev.text.upper() 2740 2741 id_var = self._parse_id_var() 2742 if not id_var: 2743 return None 2744 2745 options.append( 2746 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2747 ) 2748 2749 return self.expression(exp.LikeProperty, this=table, expressions=options) 2750 2751 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2752 return self.expression( 2753 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2754 ) 2755 2756 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2757 self._match(TokenType.EQ) 2758 return self.expression( 2759 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2760 ) 2761 2762 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2763 self._match_text_seq("WITH", "CONNECTION") 2764 return self.expression( 2765 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2766 ) 2767 2768 def _parse_returns(self) -> exp.ReturnsProperty: 2769 value: t.Optional[exp.Expression] 2770 null = None 2771 is_table = self._match(TokenType.TABLE) 2772 2773 if is_table: 2774 if self._match(TokenType.LT): 2775 value = self.expression( 2776 exp.Schema, 2777 this="TABLE", 2778 expressions=self._parse_csv(self._parse_struct_types), 2779 ) 2780 if not self._match(TokenType.GT): 2781 self.raise_error("Expecting >") 2782 else: 2783 value = self._parse_schema(exp.var("TABLE")) 2784 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2785 null = True 2786 value = None 2787 else: 2788 value = self._parse_types() 2789 2790 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2791 2792 def _parse_describe(self) -> exp.Describe: 2793 kind = self._match_set(self.CREATABLES) and self._prev.text 2794 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2795 if self._match(TokenType.DOT): 2796 style = None 2797 self._retreat(self._index - 2) 2798 2799 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2800 2801 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2802 this = self._parse_statement() 2803 else: 2804 this = self._parse_table(schema=True) 2805 2806 properties = self._parse_properties() 2807 expressions = properties.expressions if properties else None 2808 partition = self._parse_partition() 2809 return self.expression( 2810 exp.Describe, 2811 this=this, 2812 style=style, 2813 kind=kind, 2814 expressions=expressions, 2815 partition=partition, 2816 format=format, 2817 ) 2818 2819 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2820 kind = self._prev.text.upper() 2821 expressions = [] 2822 2823 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2824 if self._match(TokenType.WHEN): 2825 expression = self._parse_disjunction() 2826 self._match(TokenType.THEN) 2827 else: 2828 expression = None 2829 2830 else_ = self._match(TokenType.ELSE) 2831 2832 if not self._match(TokenType.INTO): 2833 return None 2834 2835 return self.expression( 2836 exp.ConditionalInsert, 2837 this=self.expression( 2838 exp.Insert, 2839 this=self._parse_table(schema=True), 2840 expression=self._parse_derived_table_values(), 2841 ), 2842 expression=expression, 2843 else_=else_, 2844 ) 2845 2846 expression = parse_conditional_insert() 2847 while expression is not None: 2848 expressions.append(expression) 2849 expression = parse_conditional_insert() 2850 2851 return self.expression( 2852 exp.MultitableInserts, 2853 kind=kind, 2854 comments=comments, 2855 expressions=expressions, 2856 source=self._parse_table(), 2857 ) 2858 2859 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2860 comments = [] 2861 hint = self._parse_hint() 2862 overwrite = self._match(TokenType.OVERWRITE) 2863 ignore = self._match(TokenType.IGNORE) 2864 local = self._match_text_seq("LOCAL") 2865 alternative = None 2866 is_function = None 2867 2868 if self._match_text_seq("DIRECTORY"): 2869 this: t.Optional[exp.Expression] = self.expression( 2870 exp.Directory, 2871 this=self._parse_var_or_string(), 2872 local=local, 2873 row_format=self._parse_row_format(match_row=True), 2874 ) 2875 else: 2876 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2877 comments += ensure_list(self._prev_comments) 2878 return self._parse_multitable_inserts(comments) 2879 2880 if self._match(TokenType.OR): 2881 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2882 2883 self._match(TokenType.INTO) 2884 comments += ensure_list(self._prev_comments) 2885 self._match(TokenType.TABLE) 2886 is_function = self._match(TokenType.FUNCTION) 2887 2888 this = ( 2889 self._parse_table(schema=True, parse_partition=True) 2890 if not is_function 2891 else self._parse_function() 2892 ) 2893 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2894 this.set("alias", self._parse_table_alias()) 2895 2896 returning = self._parse_returning() 2897 2898 return self.expression( 2899 exp.Insert, 2900 comments=comments, 2901 hint=hint, 2902 is_function=is_function, 2903 this=this, 2904 stored=self._match_text_seq("STORED") and self._parse_stored(), 2905 by_name=self._match_text_seq("BY", "NAME"), 2906 exists=self._parse_exists(), 2907 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2908 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2909 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2910 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2911 conflict=self._parse_on_conflict(), 2912 returning=returning or self._parse_returning(), 2913 overwrite=overwrite, 2914 alternative=alternative, 2915 ignore=ignore, 2916 source=self._match(TokenType.TABLE) and self._parse_table(), 2917 ) 2918 2919 def _parse_kill(self) -> exp.Kill: 2920 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2921 2922 return self.expression( 2923 exp.Kill, 2924 this=self._parse_primary(), 2925 kind=kind, 2926 ) 2927 2928 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2929 conflict = self._match_text_seq("ON", "CONFLICT") 2930 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2931 2932 if not conflict and not duplicate: 2933 return None 2934 2935 conflict_keys = None 2936 constraint = None 2937 2938 if conflict: 2939 if self._match_text_seq("ON", "CONSTRAINT"): 2940 constraint = self._parse_id_var() 2941 elif self._match(TokenType.L_PAREN): 2942 conflict_keys = self._parse_csv(self._parse_id_var) 2943 self._match_r_paren() 2944 2945 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2946 if self._prev.token_type == TokenType.UPDATE: 2947 self._match(TokenType.SET) 2948 expressions = self._parse_csv(self._parse_equality) 2949 else: 2950 expressions = None 2951 2952 return self.expression( 2953 exp.OnConflict, 2954 duplicate=duplicate, 2955 expressions=expressions, 2956 action=action, 2957 conflict_keys=conflict_keys, 2958 constraint=constraint, 2959 where=self._parse_where(), 2960 ) 2961 2962 def _parse_returning(self) -> t.Optional[exp.Returning]: 2963 if not self._match(TokenType.RETURNING): 2964 return None 2965 return self.expression( 2966 exp.Returning, 2967 expressions=self._parse_csv(self._parse_expression), 2968 into=self._match(TokenType.INTO) and self._parse_table_part(), 2969 ) 2970 2971 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2972 if not self._match(TokenType.FORMAT): 2973 return None 2974 return self._parse_row_format() 2975 2976 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2977 index = self._index 2978 with_ = with_ or self._match_text_seq("WITH") 2979 2980 if not self._match(TokenType.SERDE_PROPERTIES): 2981 self._retreat(index) 2982 return None 2983 return self.expression( 2984 exp.SerdeProperties, 2985 **{ # type: ignore 2986 "expressions": self._parse_wrapped_properties(), 2987 "with": with_, 2988 }, 2989 ) 2990 2991 def _parse_row_format( 2992 self, match_row: bool = False 2993 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2994 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2995 return None 2996 2997 if self._match_text_seq("SERDE"): 2998 this = self._parse_string() 2999 3000 serde_properties = self._parse_serde_properties() 3001 3002 return self.expression( 3003 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3004 ) 3005 3006 self._match_text_seq("DELIMITED") 3007 3008 kwargs = {} 3009 3010 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3011 kwargs["fields"] = self._parse_string() 3012 if self._match_text_seq("ESCAPED", "BY"): 3013 kwargs["escaped"] = self._parse_string() 3014 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3015 kwargs["collection_items"] = self._parse_string() 3016 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3017 kwargs["map_keys"] = self._parse_string() 3018 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3019 kwargs["lines"] = self._parse_string() 3020 if self._match_text_seq("NULL", "DEFINED", "AS"): 3021 kwargs["null"] = self._parse_string() 3022 3023 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3024 3025 def _parse_load(self) -> exp.LoadData | exp.Command: 3026 if self._match_text_seq("DATA"): 3027 local = self._match_text_seq("LOCAL") 3028 self._match_text_seq("INPATH") 3029 inpath = self._parse_string() 3030 overwrite = self._match(TokenType.OVERWRITE) 3031 self._match_pair(TokenType.INTO, TokenType.TABLE) 3032 3033 return self.expression( 3034 exp.LoadData, 3035 this=self._parse_table(schema=True), 3036 local=local, 3037 overwrite=overwrite, 3038 inpath=inpath, 3039 partition=self._parse_partition(), 3040 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3041 serde=self._match_text_seq("SERDE") and self._parse_string(), 3042 ) 3043 return self._parse_as_command(self._prev) 3044 3045 def _parse_delete(self) -> exp.Delete: 3046 # This handles MySQL's "Multiple-Table Syntax" 3047 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3048 tables = None 3049 if not self._match(TokenType.FROM, advance=False): 3050 tables = self._parse_csv(self._parse_table) or None 3051 3052 returning = self._parse_returning() 3053 3054 return self.expression( 3055 exp.Delete, 3056 tables=tables, 3057 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3058 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3059 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3060 where=self._parse_where(), 3061 returning=returning or self._parse_returning(), 3062 limit=self._parse_limit(), 3063 ) 3064 3065 def _parse_update(self) -> exp.Update: 3066 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3067 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3068 returning = self._parse_returning() 3069 return self.expression( 3070 exp.Update, 3071 **{ # type: ignore 3072 "this": this, 3073 "expressions": expressions, 3074 "from": self._parse_from(joins=True), 3075 "where": self._parse_where(), 3076 "returning": returning or self._parse_returning(), 3077 "order": self._parse_order(), 3078 "limit": self._parse_limit(), 3079 }, 3080 ) 3081 3082 def _parse_use(self) -> exp.Use: 3083 return self.expression( 3084 exp.Use, 3085 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3086 this=self._parse_table(schema=False), 3087 ) 3088 3089 def _parse_uncache(self) -> exp.Uncache: 3090 if not self._match(TokenType.TABLE): 3091 self.raise_error("Expecting TABLE after UNCACHE") 3092 3093 return self.expression( 3094 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3095 ) 3096 3097 def _parse_cache(self) -> exp.Cache: 3098 lazy = self._match_text_seq("LAZY") 3099 self._match(TokenType.TABLE) 3100 table = self._parse_table(schema=True) 3101 3102 options = [] 3103 if self._match_text_seq("OPTIONS"): 3104 self._match_l_paren() 3105 k = self._parse_string() 3106 self._match(TokenType.EQ) 3107 v = self._parse_string() 3108 options = [k, v] 3109 self._match_r_paren() 3110 3111 self._match(TokenType.ALIAS) 3112 return self.expression( 3113 exp.Cache, 3114 this=table, 3115 lazy=lazy, 3116 options=options, 3117 expression=self._parse_select(nested=True), 3118 ) 3119 3120 def _parse_partition(self) -> t.Optional[exp.Partition]: 3121 if not self._match_texts(self.PARTITION_KEYWORDS): 3122 return None 3123 3124 return self.expression( 3125 exp.Partition, 3126 subpartition=self._prev.text.upper() == "SUBPARTITION", 3127 expressions=self._parse_wrapped_csv(self._parse_assignment), 3128 ) 3129 3130 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3131 def _parse_value_expression() -> t.Optional[exp.Expression]: 3132 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3133 return exp.var(self._prev.text.upper()) 3134 return self._parse_expression() 3135 3136 if self._match(TokenType.L_PAREN): 3137 expressions = self._parse_csv(_parse_value_expression) 3138 self._match_r_paren() 3139 return self.expression(exp.Tuple, expressions=expressions) 3140 3141 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3142 expression = self._parse_expression() 3143 if expression: 3144 return self.expression(exp.Tuple, expressions=[expression]) 3145 return None 3146 3147 def _parse_projections(self) -> t.List[exp.Expression]: 3148 return self._parse_expressions() 3149 3150 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3151 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3152 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3153 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3154 ) 3155 elif self._match(TokenType.FROM): 3156 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3157 # Support parentheses for duckdb FROM-first syntax 3158 select = self._parse_select() 3159 if select: 3160 select.set("from", from_) 3161 this = select 3162 else: 3163 this = exp.select("*").from_(t.cast(exp.From, from_)) 3164 else: 3165 this = ( 3166 self._parse_table(consume_pipe=True) 3167 if table 3168 else self._parse_select(nested=True, parse_set_operation=False) 3169 ) 3170 3171 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3172 # in case a modifier (e.g. join) is following 3173 if table and isinstance(this, exp.Values) and this.alias: 3174 alias = this.args["alias"].pop() 3175 this = exp.Table(this=this, alias=alias) 3176 3177 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3178 3179 return this 3180 3181 def _parse_select( 3182 self, 3183 nested: bool = False, 3184 table: bool = False, 3185 parse_subquery_alias: bool = True, 3186 parse_set_operation: bool = True, 3187 consume_pipe: bool = True, 3188 ) -> t.Optional[exp.Expression]: 3189 query = self._parse_select_query( 3190 nested=nested, 3191 table=table, 3192 parse_subquery_alias=parse_subquery_alias, 3193 parse_set_operation=parse_set_operation, 3194 ) 3195 3196 if ( 3197 consume_pipe 3198 and self._match(TokenType.PIPE_GT, advance=False) 3199 and isinstance(query, exp.Query) 3200 ): 3201 query = self._parse_pipe_syntax_query(query) 3202 query = query.subquery(copy=False) if query and table else query 3203 3204 return query 3205 3206 def _parse_select_query( 3207 self, 3208 nested: bool = False, 3209 table: bool = False, 3210 parse_subquery_alias: bool = True, 3211 parse_set_operation: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 cte = self._parse_with() 3214 3215 if cte: 3216 this = self._parse_statement() 3217 3218 if not this: 3219 self.raise_error("Failed to parse any statement following CTE") 3220 return cte 3221 3222 if "with" in this.arg_types: 3223 this.set("with", cte) 3224 else: 3225 self.raise_error(f"{this.key} does not support CTE") 3226 this = cte 3227 3228 return this 3229 3230 # duckdb supports leading with FROM x 3231 from_ = ( 3232 self._parse_from(consume_pipe=True) 3233 if self._match(TokenType.FROM, advance=False) 3234 else None 3235 ) 3236 3237 if self._match(TokenType.SELECT): 3238 comments = self._prev_comments 3239 3240 hint = self._parse_hint() 3241 3242 if self._next and not self._next.token_type == TokenType.DOT: 3243 all_ = self._match(TokenType.ALL) 3244 distinct = self._match_set(self.DISTINCT_TOKENS) 3245 else: 3246 all_, distinct = None, None 3247 3248 kind = ( 3249 self._match(TokenType.ALIAS) 3250 and self._match_texts(("STRUCT", "VALUE")) 3251 and self._prev.text.upper() 3252 ) 3253 3254 if distinct: 3255 distinct = self.expression( 3256 exp.Distinct, 3257 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3258 ) 3259 3260 if all_ and distinct: 3261 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3262 3263 operation_modifiers = [] 3264 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3265 operation_modifiers.append(exp.var(self._prev.text.upper())) 3266 3267 limit = self._parse_limit(top=True) 3268 projections = self._parse_projections() 3269 3270 this = self.expression( 3271 exp.Select, 3272 kind=kind, 3273 hint=hint, 3274 distinct=distinct, 3275 expressions=projections, 3276 limit=limit, 3277 operation_modifiers=operation_modifiers or None, 3278 ) 3279 this.comments = comments 3280 3281 into = self._parse_into() 3282 if into: 3283 this.set("into", into) 3284 3285 if not from_: 3286 from_ = self._parse_from() 3287 3288 if from_: 3289 this.set("from", from_) 3290 3291 this = self._parse_query_modifiers(this) 3292 elif (table or nested) and self._match(TokenType.L_PAREN): 3293 this = self._parse_wrapped_select(table=table) 3294 3295 # We return early here so that the UNION isn't attached to the subquery by the 3296 # following call to _parse_set_operations, but instead becomes the parent node 3297 self._match_r_paren() 3298 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3299 elif self._match(TokenType.VALUES, advance=False): 3300 this = self._parse_derived_table_values() 3301 elif from_: 3302 this = exp.select("*").from_(from_.this, copy=False) 3303 elif self._match(TokenType.SUMMARIZE): 3304 table = self._match(TokenType.TABLE) 3305 this = self._parse_select() or self._parse_string() or self._parse_table() 3306 return self.expression(exp.Summarize, this=this, table=table) 3307 elif self._match(TokenType.DESCRIBE): 3308 this = self._parse_describe() 3309 elif self._match_text_seq("STREAM"): 3310 this = self._parse_function() 3311 if this: 3312 this = self.expression(exp.Stream, this=this) 3313 else: 3314 self._retreat(self._index - 1) 3315 else: 3316 this = None 3317 3318 return self._parse_set_operations(this) if parse_set_operation else this 3319 3320 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3321 self._match_text_seq("SEARCH") 3322 3323 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3324 3325 if not kind: 3326 return None 3327 3328 self._match_text_seq("FIRST", "BY") 3329 3330 return self.expression( 3331 exp.RecursiveWithSearch, 3332 kind=kind, 3333 this=self._parse_id_var(), 3334 expression=self._match_text_seq("SET") and self._parse_id_var(), 3335 using=self._match_text_seq("USING") and self._parse_id_var(), 3336 ) 3337 3338 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3339 if not skip_with_token and not self._match(TokenType.WITH): 3340 return None 3341 3342 comments = self._prev_comments 3343 recursive = self._match(TokenType.RECURSIVE) 3344 3345 last_comments = None 3346 expressions = [] 3347 while True: 3348 cte = self._parse_cte() 3349 if isinstance(cte, exp.CTE): 3350 expressions.append(cte) 3351 if last_comments: 3352 cte.add_comments(last_comments) 3353 3354 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3355 break 3356 else: 3357 self._match(TokenType.WITH) 3358 3359 last_comments = self._prev_comments 3360 3361 return self.expression( 3362 exp.With, 3363 comments=comments, 3364 expressions=expressions, 3365 recursive=recursive, 3366 search=self._parse_recursive_with_search(), 3367 ) 3368 3369 def _parse_cte(self) -> t.Optional[exp.CTE]: 3370 index = self._index 3371 3372 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3373 if not alias or not alias.this: 3374 self.raise_error("Expected CTE to have alias") 3375 3376 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3377 self._retreat(index) 3378 return None 3379 3380 comments = self._prev_comments 3381 3382 if self._match_text_seq("NOT", "MATERIALIZED"): 3383 materialized = False 3384 elif self._match_text_seq("MATERIALIZED"): 3385 materialized = True 3386 else: 3387 materialized = None 3388 3389 cte = self.expression( 3390 exp.CTE, 3391 this=self._parse_wrapped(self._parse_statement), 3392 alias=alias, 3393 materialized=materialized, 3394 comments=comments, 3395 ) 3396 3397 values = cte.this 3398 if isinstance(values, exp.Values): 3399 if values.alias: 3400 cte.set("this", exp.select("*").from_(values)) 3401 else: 3402 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3403 3404 return cte 3405 3406 def _parse_table_alias( 3407 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3408 ) -> t.Optional[exp.TableAlias]: 3409 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3410 # so this section tries to parse the clause version and if it fails, it treats the token 3411 # as an identifier (alias) 3412 if self._can_parse_limit_or_offset(): 3413 return None 3414 3415 any_token = self._match(TokenType.ALIAS) 3416 alias = ( 3417 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3418 or self._parse_string_as_identifier() 3419 ) 3420 3421 index = self._index 3422 if self._match(TokenType.L_PAREN): 3423 columns = self._parse_csv(self._parse_function_parameter) 3424 self._match_r_paren() if columns else self._retreat(index) 3425 else: 3426 columns = None 3427 3428 if not alias and not columns: 3429 return None 3430 3431 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3432 3433 # We bubble up comments from the Identifier to the TableAlias 3434 if isinstance(alias, exp.Identifier): 3435 table_alias.add_comments(alias.pop_comments()) 3436 3437 return table_alias 3438 3439 def _parse_subquery( 3440 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3441 ) -> t.Optional[exp.Subquery]: 3442 if not this: 3443 return None 3444 3445 return self.expression( 3446 exp.Subquery, 3447 this=this, 3448 pivots=self._parse_pivots(), 3449 alias=self._parse_table_alias() if parse_alias else None, 3450 sample=self._parse_table_sample(), 3451 ) 3452 3453 def _implicit_unnests_to_explicit(self, this: E) -> E: 3454 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3455 3456 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3457 for i, join in enumerate(this.args.get("joins") or []): 3458 table = join.this 3459 normalized_table = table.copy() 3460 normalized_table.meta["maybe_column"] = True 3461 normalized_table = _norm(normalized_table, dialect=self.dialect) 3462 3463 if isinstance(table, exp.Table) and not join.args.get("on"): 3464 if normalized_table.parts[0].name in refs: 3465 table_as_column = table.to_column() 3466 unnest = exp.Unnest(expressions=[table_as_column]) 3467 3468 # Table.to_column creates a parent Alias node that we want to convert to 3469 # a TableAlias and attach to the Unnest, so it matches the parser's output 3470 if isinstance(table.args.get("alias"), exp.TableAlias): 3471 table_as_column.replace(table_as_column.this) 3472 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3473 3474 table.replace(unnest) 3475 3476 refs.add(normalized_table.alias_or_name) 3477 3478 return this 3479 3480 def _parse_query_modifiers( 3481 self, this: t.Optional[exp.Expression] 3482 ) -> t.Optional[exp.Expression]: 3483 if isinstance(this, self.MODIFIABLES): 3484 for join in self._parse_joins(): 3485 this.append("joins", join) 3486 for lateral in iter(self._parse_lateral, None): 3487 this.append("laterals", lateral) 3488 3489 while True: 3490 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3491 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3492 key, expression = parser(self) 3493 3494 if expression: 3495 this.set(key, expression) 3496 if key == "limit": 3497 offset = expression.args.pop("offset", None) 3498 3499 if offset: 3500 offset = exp.Offset(expression=offset) 3501 this.set("offset", offset) 3502 3503 limit_by_expressions = expression.expressions 3504 expression.set("expressions", None) 3505 offset.set("expressions", limit_by_expressions) 3506 continue 3507 break 3508 3509 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3510 this = self._implicit_unnests_to_explicit(this) 3511 3512 return this 3513 3514 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3515 start = self._curr 3516 while self._curr: 3517 self._advance() 3518 3519 end = self._tokens[self._index - 1] 3520 return exp.Hint(expressions=[self._find_sql(start, end)]) 3521 3522 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3523 return self._parse_function_call() 3524 3525 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3526 start_index = self._index 3527 should_fallback_to_string = False 3528 3529 hints = [] 3530 try: 3531 for hint in iter( 3532 lambda: self._parse_csv( 3533 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3534 ), 3535 [], 3536 ): 3537 hints.extend(hint) 3538 except ParseError: 3539 should_fallback_to_string = True 3540 3541 if should_fallback_to_string or self._curr: 3542 self._retreat(start_index) 3543 return self._parse_hint_fallback_to_string() 3544 3545 return self.expression(exp.Hint, expressions=hints) 3546 3547 def _parse_hint(self) -> t.Optional[exp.Hint]: 3548 if self._match(TokenType.HINT) and self._prev_comments: 3549 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3550 3551 return None 3552 3553 def _parse_into(self) -> t.Optional[exp.Into]: 3554 if not self._match(TokenType.INTO): 3555 return None 3556 3557 temp = self._match(TokenType.TEMPORARY) 3558 unlogged = self._match_text_seq("UNLOGGED") 3559 self._match(TokenType.TABLE) 3560 3561 return self.expression( 3562 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3563 ) 3564 3565 def _parse_from( 3566 self, 3567 joins: bool = False, 3568 skip_from_token: bool = False, 3569 consume_pipe: bool = False, 3570 ) -> t.Optional[exp.From]: 3571 if not skip_from_token and not self._match(TokenType.FROM): 3572 return None 3573 3574 return self.expression( 3575 exp.From, 3576 comments=self._prev_comments, 3577 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3578 ) 3579 3580 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3581 return self.expression( 3582 exp.MatchRecognizeMeasure, 3583 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3584 this=self._parse_expression(), 3585 ) 3586 3587 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3588 if not self._match(TokenType.MATCH_RECOGNIZE): 3589 return None 3590 3591 self._match_l_paren() 3592 3593 partition = self._parse_partition_by() 3594 order = self._parse_order() 3595 3596 measures = ( 3597 self._parse_csv(self._parse_match_recognize_measure) 3598 if self._match_text_seq("MEASURES") 3599 else None 3600 ) 3601 3602 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3603 rows = exp.var("ONE ROW PER MATCH") 3604 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3605 text = "ALL ROWS PER MATCH" 3606 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3607 text += " SHOW EMPTY MATCHES" 3608 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3609 text += " OMIT EMPTY MATCHES" 3610 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3611 text += " WITH UNMATCHED ROWS" 3612 rows = exp.var(text) 3613 else: 3614 rows = None 3615 3616 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3617 text = "AFTER MATCH SKIP" 3618 if self._match_text_seq("PAST", "LAST", "ROW"): 3619 text += " PAST LAST ROW" 3620 elif self._match_text_seq("TO", "NEXT", "ROW"): 3621 text += " TO NEXT ROW" 3622 elif self._match_text_seq("TO", "FIRST"): 3623 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3624 elif self._match_text_seq("TO", "LAST"): 3625 text += f" TO LAST {self._advance_any().text}" # type: ignore 3626 after = exp.var(text) 3627 else: 3628 after = None 3629 3630 if self._match_text_seq("PATTERN"): 3631 self._match_l_paren() 3632 3633 if not self._curr: 3634 self.raise_error("Expecting )", self._curr) 3635 3636 paren = 1 3637 start = self._curr 3638 3639 while self._curr and paren > 0: 3640 if self._curr.token_type == TokenType.L_PAREN: 3641 paren += 1 3642 if self._curr.token_type == TokenType.R_PAREN: 3643 paren -= 1 3644 3645 end = self._prev 3646 self._advance() 3647 3648 if paren > 0: 3649 self.raise_error("Expecting )", self._curr) 3650 3651 pattern = exp.var(self._find_sql(start, end)) 3652 else: 3653 pattern = None 3654 3655 define = ( 3656 self._parse_csv(self._parse_name_as_expression) 3657 if self._match_text_seq("DEFINE") 3658 else None 3659 ) 3660 3661 self._match_r_paren() 3662 3663 return self.expression( 3664 exp.MatchRecognize, 3665 partition_by=partition, 3666 order=order, 3667 measures=measures, 3668 rows=rows, 3669 after=after, 3670 pattern=pattern, 3671 define=define, 3672 alias=self._parse_table_alias(), 3673 ) 3674 3675 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3676 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3677 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3678 cross_apply = False 3679 3680 if cross_apply is not None: 3681 this = self._parse_select(table=True) 3682 view = None 3683 outer = None 3684 elif self._match(TokenType.LATERAL): 3685 this = self._parse_select(table=True) 3686 view = self._match(TokenType.VIEW) 3687 outer = self._match(TokenType.OUTER) 3688 else: 3689 return None 3690 3691 if not this: 3692 this = ( 3693 self._parse_unnest() 3694 or self._parse_function() 3695 or self._parse_id_var(any_token=False) 3696 ) 3697 3698 while self._match(TokenType.DOT): 3699 this = exp.Dot( 3700 this=this, 3701 expression=self._parse_function() or self._parse_id_var(any_token=False), 3702 ) 3703 3704 ordinality: t.Optional[bool] = None 3705 3706 if view: 3707 table = self._parse_id_var(any_token=False) 3708 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3709 table_alias: t.Optional[exp.TableAlias] = self.expression( 3710 exp.TableAlias, this=table, columns=columns 3711 ) 3712 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3713 # We move the alias from the lateral's child node to the lateral itself 3714 table_alias = this.args["alias"].pop() 3715 else: 3716 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3717 table_alias = self._parse_table_alias() 3718 3719 return self.expression( 3720 exp.Lateral, 3721 this=this, 3722 view=view, 3723 outer=outer, 3724 alias=table_alias, 3725 cross_apply=cross_apply, 3726 ordinality=ordinality, 3727 ) 3728 3729 def _parse_join_parts( 3730 self, 3731 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3732 return ( 3733 self._match_set(self.JOIN_METHODS) and self._prev, 3734 self._match_set(self.JOIN_SIDES) and self._prev, 3735 self._match_set(self.JOIN_KINDS) and self._prev, 3736 ) 3737 3738 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3739 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3740 this = self._parse_column() 3741 if isinstance(this, exp.Column): 3742 return this.this 3743 return this 3744 3745 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3746 3747 def _parse_join( 3748 self, skip_join_token: bool = False, parse_bracket: bool = False 3749 ) -> t.Optional[exp.Join]: 3750 if self._match(TokenType.COMMA): 3751 table = self._try_parse(self._parse_table) 3752 cross_join = self.expression(exp.Join, this=table) if table else None 3753 3754 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3755 cross_join.set("kind", "CROSS") 3756 3757 return cross_join 3758 3759 index = self._index 3760 method, side, kind = self._parse_join_parts() 3761 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3762 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3763 3764 if not skip_join_token and not join: 3765 self._retreat(index) 3766 kind = None 3767 method = None 3768 side = None 3769 3770 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3771 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3772 3773 if not skip_join_token and not join and not outer_apply and not cross_apply: 3774 return None 3775 3776 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3777 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3778 kwargs["expressions"] = self._parse_csv( 3779 lambda: self._parse_table(parse_bracket=parse_bracket) 3780 ) 3781 3782 if method: 3783 kwargs["method"] = method.text 3784 if side: 3785 kwargs["side"] = side.text 3786 if kind: 3787 kwargs["kind"] = kind.text 3788 if hint: 3789 kwargs["hint"] = hint 3790 3791 if self._match(TokenType.MATCH_CONDITION): 3792 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3793 3794 if self._match(TokenType.ON): 3795 kwargs["on"] = self._parse_assignment() 3796 elif self._match(TokenType.USING): 3797 kwargs["using"] = self._parse_using_identifiers() 3798 elif ( 3799 not (outer_apply or cross_apply) 3800 and not isinstance(kwargs["this"], exp.Unnest) 3801 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3802 ): 3803 index = self._index 3804 joins: t.Optional[list] = list(self._parse_joins()) 3805 3806 if joins and self._match(TokenType.ON): 3807 kwargs["on"] = self._parse_assignment() 3808 elif joins and self._match(TokenType.USING): 3809 kwargs["using"] = self._parse_using_identifiers() 3810 else: 3811 joins = None 3812 self._retreat(index) 3813 3814 kwargs["this"].set("joins", joins if joins else None) 3815 3816 kwargs["pivots"] = self._parse_pivots() 3817 3818 comments = [c for token in (method, side, kind) if token for c in token.comments] 3819 return self.expression(exp.Join, comments=comments, **kwargs) 3820 3821 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3822 this = self._parse_assignment() 3823 3824 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3825 return this 3826 3827 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3828 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3829 3830 return this 3831 3832 def _parse_index_params(self) -> exp.IndexParameters: 3833 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3834 3835 if self._match(TokenType.L_PAREN, advance=False): 3836 columns = self._parse_wrapped_csv(self._parse_with_operator) 3837 else: 3838 columns = None 3839 3840 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3841 partition_by = self._parse_partition_by() 3842 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3843 tablespace = ( 3844 self._parse_var(any_token=True) 3845 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3846 else None 3847 ) 3848 where = self._parse_where() 3849 3850 on = self._parse_field() if self._match(TokenType.ON) else None 3851 3852 return self.expression( 3853 exp.IndexParameters, 3854 using=using, 3855 columns=columns, 3856 include=include, 3857 partition_by=partition_by, 3858 where=where, 3859 with_storage=with_storage, 3860 tablespace=tablespace, 3861 on=on, 3862 ) 3863 3864 def _parse_index( 3865 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3866 ) -> t.Optional[exp.Index]: 3867 if index or anonymous: 3868 unique = None 3869 primary = None 3870 amp = None 3871 3872 self._match(TokenType.ON) 3873 self._match(TokenType.TABLE) # hive 3874 table = self._parse_table_parts(schema=True) 3875 else: 3876 unique = self._match(TokenType.UNIQUE) 3877 primary = self._match_text_seq("PRIMARY") 3878 amp = self._match_text_seq("AMP") 3879 3880 if not self._match(TokenType.INDEX): 3881 return None 3882 3883 index = self._parse_id_var() 3884 table = None 3885 3886 params = self._parse_index_params() 3887 3888 return self.expression( 3889 exp.Index, 3890 this=index, 3891 table=table, 3892 unique=unique, 3893 primary=primary, 3894 amp=amp, 3895 params=params, 3896 ) 3897 3898 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3899 hints: t.List[exp.Expression] = [] 3900 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3901 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3902 hints.append( 3903 self.expression( 3904 exp.WithTableHint, 3905 expressions=self._parse_csv( 3906 lambda: self._parse_function() or self._parse_var(any_token=True) 3907 ), 3908 ) 3909 ) 3910 self._match_r_paren() 3911 else: 3912 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3913 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3914 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3915 3916 self._match_set((TokenType.INDEX, TokenType.KEY)) 3917 if self._match(TokenType.FOR): 3918 hint.set("target", self._advance_any() and self._prev.text.upper()) 3919 3920 hint.set("expressions", self._parse_wrapped_id_vars()) 3921 hints.append(hint) 3922 3923 return hints or None 3924 3925 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3926 return ( 3927 (not schema and self._parse_function(optional_parens=False)) 3928 or self._parse_id_var(any_token=False) 3929 or self._parse_string_as_identifier() 3930 or self._parse_placeholder() 3931 ) 3932 3933 def _parse_table_parts( 3934 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3935 ) -> exp.Table: 3936 catalog = None 3937 db = None 3938 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3939 3940 while self._match(TokenType.DOT): 3941 if catalog: 3942 # This allows nesting the table in arbitrarily many dot expressions if needed 3943 table = self.expression( 3944 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3945 ) 3946 else: 3947 catalog = db 3948 db = table 3949 # "" used for tsql FROM a..b case 3950 table = self._parse_table_part(schema=schema) or "" 3951 3952 if ( 3953 wildcard 3954 and self._is_connected() 3955 and (isinstance(table, exp.Identifier) or not table) 3956 and self._match(TokenType.STAR) 3957 ): 3958 if isinstance(table, exp.Identifier): 3959 table.args["this"] += "*" 3960 else: 3961 table = exp.Identifier(this="*") 3962 3963 # We bubble up comments from the Identifier to the Table 3964 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3965 3966 if is_db_reference: 3967 catalog = db 3968 db = table 3969 table = None 3970 3971 if not table and not is_db_reference: 3972 self.raise_error(f"Expected table name but got {self._curr}") 3973 if not db and is_db_reference: 3974 self.raise_error(f"Expected database name but got {self._curr}") 3975 3976 table = self.expression( 3977 exp.Table, 3978 comments=comments, 3979 this=table, 3980 db=db, 3981 catalog=catalog, 3982 ) 3983 3984 changes = self._parse_changes() 3985 if changes: 3986 table.set("changes", changes) 3987 3988 at_before = self._parse_historical_data() 3989 if at_before: 3990 table.set("when", at_before) 3991 3992 pivots = self._parse_pivots() 3993 if pivots: 3994 table.set("pivots", pivots) 3995 3996 return table 3997 3998 def _parse_table( 3999 self, 4000 schema: bool = False, 4001 joins: bool = False, 4002 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4003 parse_bracket: bool = False, 4004 is_db_reference: bool = False, 4005 parse_partition: bool = False, 4006 consume_pipe: bool = False, 4007 ) -> t.Optional[exp.Expression]: 4008 lateral = self._parse_lateral() 4009 if lateral: 4010 return lateral 4011 4012 unnest = self._parse_unnest() 4013 if unnest: 4014 return unnest 4015 4016 values = self._parse_derived_table_values() 4017 if values: 4018 return values 4019 4020 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4021 if subquery: 4022 if not subquery.args.get("pivots"): 4023 subquery.set("pivots", self._parse_pivots()) 4024 return subquery 4025 4026 bracket = parse_bracket and self._parse_bracket(None) 4027 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4028 4029 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4030 self._parse_table 4031 ) 4032 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4033 4034 only = self._match(TokenType.ONLY) 4035 4036 this = t.cast( 4037 exp.Expression, 4038 bracket 4039 or rows_from 4040 or self._parse_bracket( 4041 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4042 ), 4043 ) 4044 4045 if only: 4046 this.set("only", only) 4047 4048 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4049 self._match_text_seq("*") 4050 4051 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4052 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4053 this.set("partition", self._parse_partition()) 4054 4055 if schema: 4056 return self._parse_schema(this=this) 4057 4058 version = self._parse_version() 4059 4060 if version: 4061 this.set("version", version) 4062 4063 if self.dialect.ALIAS_POST_TABLESAMPLE: 4064 this.set("sample", self._parse_table_sample()) 4065 4066 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4067 if alias: 4068 this.set("alias", alias) 4069 4070 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4071 return self.expression( 4072 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4073 ) 4074 4075 this.set("hints", self._parse_table_hints()) 4076 4077 if not this.args.get("pivots"): 4078 this.set("pivots", self._parse_pivots()) 4079 4080 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4081 this.set("sample", self._parse_table_sample()) 4082 4083 if joins: 4084 for join in self._parse_joins(): 4085 this.append("joins", join) 4086 4087 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4088 this.set("ordinality", True) 4089 this.set("alias", self._parse_table_alias()) 4090 4091 return this 4092 4093 def _parse_version(self) -> t.Optional[exp.Version]: 4094 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4095 this = "TIMESTAMP" 4096 elif self._match(TokenType.VERSION_SNAPSHOT): 4097 this = "VERSION" 4098 else: 4099 return None 4100 4101 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4102 kind = self._prev.text.upper() 4103 start = self._parse_bitwise() 4104 self._match_texts(("TO", "AND")) 4105 end = self._parse_bitwise() 4106 expression: t.Optional[exp.Expression] = self.expression( 4107 exp.Tuple, expressions=[start, end] 4108 ) 4109 elif self._match_text_seq("CONTAINED", "IN"): 4110 kind = "CONTAINED IN" 4111 expression = self.expression( 4112 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4113 ) 4114 elif self._match(TokenType.ALL): 4115 kind = "ALL" 4116 expression = None 4117 else: 4118 self._match_text_seq("AS", "OF") 4119 kind = "AS OF" 4120 expression = self._parse_type() 4121 4122 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4123 4124 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4125 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4126 index = self._index 4127 historical_data = None 4128 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4129 this = self._prev.text.upper() 4130 kind = ( 4131 self._match(TokenType.L_PAREN) 4132 and self._match_texts(self.HISTORICAL_DATA_KIND) 4133 and self._prev.text.upper() 4134 ) 4135 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4136 4137 if expression: 4138 self._match_r_paren() 4139 historical_data = self.expression( 4140 exp.HistoricalData, this=this, kind=kind, expression=expression 4141 ) 4142 else: 4143 self._retreat(index) 4144 4145 return historical_data 4146 4147 def _parse_changes(self) -> t.Optional[exp.Changes]: 4148 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4149 return None 4150 4151 information = self._parse_var(any_token=True) 4152 self._match_r_paren() 4153 4154 return self.expression( 4155 exp.Changes, 4156 information=information, 4157 at_before=self._parse_historical_data(), 4158 end=self._parse_historical_data(), 4159 ) 4160 4161 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4162 if not self._match(TokenType.UNNEST): 4163 return None 4164 4165 expressions = self._parse_wrapped_csv(self._parse_equality) 4166 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4167 4168 alias = self._parse_table_alias() if with_alias else None 4169 4170 if alias: 4171 if self.dialect.UNNEST_COLUMN_ONLY: 4172 if alias.args.get("columns"): 4173 self.raise_error("Unexpected extra column alias in unnest.") 4174 4175 alias.set("columns", [alias.this]) 4176 alias.set("this", None) 4177 4178 columns = alias.args.get("columns") or [] 4179 if offset and len(expressions) < len(columns): 4180 offset = columns.pop() 4181 4182 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4183 self._match(TokenType.ALIAS) 4184 offset = self._parse_id_var( 4185 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4186 ) or exp.to_identifier("offset") 4187 4188 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4189 4190 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4191 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4192 if not is_derived and not ( 4193 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4194 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4195 ): 4196 return None 4197 4198 expressions = self._parse_csv(self._parse_value) 4199 alias = self._parse_table_alias() 4200 4201 if is_derived: 4202 self._match_r_paren() 4203 4204 return self.expression( 4205 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4206 ) 4207 4208 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4209 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4210 as_modifier and self._match_text_seq("USING", "SAMPLE") 4211 ): 4212 return None 4213 4214 bucket_numerator = None 4215 bucket_denominator = None 4216 bucket_field = None 4217 percent = None 4218 size = None 4219 seed = None 4220 4221 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4222 matched_l_paren = self._match(TokenType.L_PAREN) 4223 4224 if self.TABLESAMPLE_CSV: 4225 num = None 4226 expressions = self._parse_csv(self._parse_primary) 4227 else: 4228 expressions = None 4229 num = ( 4230 self._parse_factor() 4231 if self._match(TokenType.NUMBER, advance=False) 4232 else self._parse_primary() or self._parse_placeholder() 4233 ) 4234 4235 if self._match_text_seq("BUCKET"): 4236 bucket_numerator = self._parse_number() 4237 self._match_text_seq("OUT", "OF") 4238 bucket_denominator = bucket_denominator = self._parse_number() 4239 self._match(TokenType.ON) 4240 bucket_field = self._parse_field() 4241 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4242 percent = num 4243 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4244 size = num 4245 else: 4246 percent = num 4247 4248 if matched_l_paren: 4249 self._match_r_paren() 4250 4251 if self._match(TokenType.L_PAREN): 4252 method = self._parse_var(upper=True) 4253 seed = self._match(TokenType.COMMA) and self._parse_number() 4254 self._match_r_paren() 4255 elif self._match_texts(("SEED", "REPEATABLE")): 4256 seed = self._parse_wrapped(self._parse_number) 4257 4258 if not method and self.DEFAULT_SAMPLING_METHOD: 4259 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4260 4261 return self.expression( 4262 exp.TableSample, 4263 expressions=expressions, 4264 method=method, 4265 bucket_numerator=bucket_numerator, 4266 bucket_denominator=bucket_denominator, 4267 bucket_field=bucket_field, 4268 percent=percent, 4269 size=size, 4270 seed=seed, 4271 ) 4272 4273 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4274 return list(iter(self._parse_pivot, None)) or None 4275 4276 def _parse_joins(self) -> t.Iterator[exp.Join]: 4277 return iter(self._parse_join, None) 4278 4279 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4280 if not self._match(TokenType.INTO): 4281 return None 4282 4283 return self.expression( 4284 exp.UnpivotColumns, 4285 this=self._match_text_seq("NAME") and self._parse_column(), 4286 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4287 ) 4288 4289 # https://duckdb.org/docs/sql/statements/pivot 4290 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4291 def _parse_on() -> t.Optional[exp.Expression]: 4292 this = self._parse_bitwise() 4293 4294 if self._match(TokenType.IN): 4295 # PIVOT ... ON col IN (row_val1, row_val2) 4296 return self._parse_in(this) 4297 if self._match(TokenType.ALIAS, advance=False): 4298 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4299 return self._parse_alias(this) 4300 4301 return this 4302 4303 this = self._parse_table() 4304 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4305 into = self._parse_unpivot_columns() 4306 using = self._match(TokenType.USING) and self._parse_csv( 4307 lambda: self._parse_alias(self._parse_function()) 4308 ) 4309 group = self._parse_group() 4310 4311 return self.expression( 4312 exp.Pivot, 4313 this=this, 4314 expressions=expressions, 4315 using=using, 4316 group=group, 4317 unpivot=is_unpivot, 4318 into=into, 4319 ) 4320 4321 def _parse_pivot_in(self) -> exp.In: 4322 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4323 this = self._parse_select_or_expression() 4324 4325 self._match(TokenType.ALIAS) 4326 alias = self._parse_bitwise() 4327 if alias: 4328 if isinstance(alias, exp.Column) and not alias.db: 4329 alias = alias.this 4330 return self.expression(exp.PivotAlias, this=this, alias=alias) 4331 4332 return this 4333 4334 value = self._parse_column() 4335 4336 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4337 self.raise_error("Expecting IN (") 4338 4339 if self._match(TokenType.ANY): 4340 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4341 else: 4342 exprs = self._parse_csv(_parse_aliased_expression) 4343 4344 self._match_r_paren() 4345 return self.expression(exp.In, this=value, expressions=exprs) 4346 4347 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4348 index = self._index 4349 include_nulls = None 4350 4351 if self._match(TokenType.PIVOT): 4352 unpivot = False 4353 elif self._match(TokenType.UNPIVOT): 4354 unpivot = True 4355 4356 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4357 if self._match_text_seq("INCLUDE", "NULLS"): 4358 include_nulls = True 4359 elif self._match_text_seq("EXCLUDE", "NULLS"): 4360 include_nulls = False 4361 else: 4362 return None 4363 4364 expressions = [] 4365 4366 if not self._match(TokenType.L_PAREN): 4367 self._retreat(index) 4368 return None 4369 4370 if unpivot: 4371 expressions = self._parse_csv(self._parse_column) 4372 else: 4373 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4374 4375 if not expressions: 4376 self.raise_error("Failed to parse PIVOT's aggregation list") 4377 4378 if not self._match(TokenType.FOR): 4379 self.raise_error("Expecting FOR") 4380 4381 fields = [] 4382 while True: 4383 field = self._try_parse(self._parse_pivot_in) 4384 if not field: 4385 break 4386 fields.append(field) 4387 4388 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4389 self._parse_bitwise 4390 ) 4391 4392 group = self._parse_group() 4393 4394 self._match_r_paren() 4395 4396 pivot = self.expression( 4397 exp.Pivot, 4398 expressions=expressions, 4399 fields=fields, 4400 unpivot=unpivot, 4401 include_nulls=include_nulls, 4402 default_on_null=default_on_null, 4403 group=group, 4404 ) 4405 4406 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4407 pivot.set("alias", self._parse_table_alias()) 4408 4409 if not unpivot: 4410 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4411 4412 columns: t.List[exp.Expression] = [] 4413 all_fields = [] 4414 for pivot_field in pivot.fields: 4415 pivot_field_expressions = pivot_field.expressions 4416 4417 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4418 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4419 continue 4420 4421 all_fields.append( 4422 [ 4423 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4424 for fld in pivot_field_expressions 4425 ] 4426 ) 4427 4428 if all_fields: 4429 if names: 4430 all_fields.append(names) 4431 4432 # Generate all possible combinations of the pivot columns 4433 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4434 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4435 for fld_parts_tuple in itertools.product(*all_fields): 4436 fld_parts = list(fld_parts_tuple) 4437 4438 if names and self.PREFIXED_PIVOT_COLUMNS: 4439 # Move the "name" to the front of the list 4440 fld_parts.insert(0, fld_parts.pop(-1)) 4441 4442 columns.append(exp.to_identifier("_".join(fld_parts))) 4443 4444 pivot.set("columns", columns) 4445 4446 return pivot 4447 4448 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4449 return [agg.alias for agg in aggregations if agg.alias] 4450 4451 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4452 if not skip_where_token and not self._match(TokenType.PREWHERE): 4453 return None 4454 4455 return self.expression( 4456 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4457 ) 4458 4459 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4460 if not skip_where_token and not self._match(TokenType.WHERE): 4461 return None 4462 4463 return self.expression( 4464 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4465 ) 4466 4467 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4468 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4469 return None 4470 4471 elements: t.Dict[str, t.Any] = defaultdict(list) 4472 4473 if self._match(TokenType.ALL): 4474 elements["all"] = True 4475 elif self._match(TokenType.DISTINCT): 4476 elements["all"] = False 4477 4478 while True: 4479 index = self._index 4480 4481 elements["expressions"].extend( 4482 self._parse_csv( 4483 lambda: None 4484 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4485 else self._parse_assignment() 4486 ) 4487 ) 4488 4489 before_with_index = self._index 4490 with_prefix = self._match(TokenType.WITH) 4491 4492 if self._match(TokenType.ROLLUP): 4493 elements["rollup"].append( 4494 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4495 ) 4496 elif self._match(TokenType.CUBE): 4497 elements["cube"].append( 4498 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4499 ) 4500 elif self._match(TokenType.GROUPING_SETS): 4501 elements["grouping_sets"].append( 4502 self.expression( 4503 exp.GroupingSets, 4504 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4505 ) 4506 ) 4507 elif self._match_text_seq("TOTALS"): 4508 elements["totals"] = True # type: ignore 4509 4510 if before_with_index <= self._index <= before_with_index + 1: 4511 self._retreat(before_with_index) 4512 break 4513 4514 if index == self._index: 4515 break 4516 4517 return self.expression(exp.Group, **elements) # type: ignore 4518 4519 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4520 return self.expression( 4521 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4522 ) 4523 4524 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4525 if self._match(TokenType.L_PAREN): 4526 grouping_set = self._parse_csv(self._parse_column) 4527 self._match_r_paren() 4528 return self.expression(exp.Tuple, expressions=grouping_set) 4529 4530 return self._parse_column() 4531 4532 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4533 if not skip_having_token and not self._match(TokenType.HAVING): 4534 return None 4535 return self.expression(exp.Having, this=self._parse_assignment()) 4536 4537 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4538 if not self._match(TokenType.QUALIFY): 4539 return None 4540 return self.expression(exp.Qualify, this=self._parse_assignment()) 4541 4542 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4543 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4544 exp.Prior, this=self._parse_bitwise() 4545 ) 4546 connect = self._parse_assignment() 4547 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4548 return connect 4549 4550 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4551 if skip_start_token: 4552 start = None 4553 elif self._match(TokenType.START_WITH): 4554 start = self._parse_assignment() 4555 else: 4556 return None 4557 4558 self._match(TokenType.CONNECT_BY) 4559 nocycle = self._match_text_seq("NOCYCLE") 4560 connect = self._parse_connect_with_prior() 4561 4562 if not start and self._match(TokenType.START_WITH): 4563 start = self._parse_assignment() 4564 4565 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4566 4567 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4568 this = self._parse_id_var(any_token=True) 4569 if self._match(TokenType.ALIAS): 4570 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4571 return this 4572 4573 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4574 if self._match_text_seq("INTERPOLATE"): 4575 return self._parse_wrapped_csv(self._parse_name_as_expression) 4576 return None 4577 4578 def _parse_order( 4579 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4580 ) -> t.Optional[exp.Expression]: 4581 siblings = None 4582 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4583 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4584 return this 4585 4586 siblings = True 4587 4588 return self.expression( 4589 exp.Order, 4590 this=this, 4591 expressions=self._parse_csv(self._parse_ordered), 4592 siblings=siblings, 4593 ) 4594 4595 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4596 if not self._match(token): 4597 return None 4598 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4599 4600 def _parse_ordered( 4601 self, parse_method: t.Optional[t.Callable] = None 4602 ) -> t.Optional[exp.Ordered]: 4603 this = parse_method() if parse_method else self._parse_assignment() 4604 if not this: 4605 return None 4606 4607 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4608 this = exp.var("ALL") 4609 4610 asc = self._match(TokenType.ASC) 4611 desc = self._match(TokenType.DESC) or (asc and False) 4612 4613 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4614 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4615 4616 nulls_first = is_nulls_first or False 4617 explicitly_null_ordered = is_nulls_first or is_nulls_last 4618 4619 if ( 4620 not explicitly_null_ordered 4621 and ( 4622 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4623 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4624 ) 4625 and self.dialect.NULL_ORDERING != "nulls_are_last" 4626 ): 4627 nulls_first = True 4628 4629 if self._match_text_seq("WITH", "FILL"): 4630 with_fill = self.expression( 4631 exp.WithFill, 4632 **{ # type: ignore 4633 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4634 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4635 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4636 "interpolate": self._parse_interpolate(), 4637 }, 4638 ) 4639 else: 4640 with_fill = None 4641 4642 return self.expression( 4643 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4644 ) 4645 4646 def _parse_limit_options(self) -> exp.LimitOptions: 4647 percent = self._match(TokenType.PERCENT) 4648 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4649 self._match_text_seq("ONLY") 4650 with_ties = self._match_text_seq("WITH", "TIES") 4651 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4652 4653 def _parse_limit( 4654 self, 4655 this: t.Optional[exp.Expression] = None, 4656 top: bool = False, 4657 skip_limit_token: bool = False, 4658 ) -> t.Optional[exp.Expression]: 4659 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4660 comments = self._prev_comments 4661 if top: 4662 limit_paren = self._match(TokenType.L_PAREN) 4663 expression = self._parse_term() if limit_paren else self._parse_number() 4664 4665 if limit_paren: 4666 self._match_r_paren() 4667 4668 limit_options = self._parse_limit_options() 4669 else: 4670 limit_options = None 4671 expression = self._parse_term() 4672 4673 if self._match(TokenType.COMMA): 4674 offset = expression 4675 expression = self._parse_term() 4676 else: 4677 offset = None 4678 4679 limit_exp = self.expression( 4680 exp.Limit, 4681 this=this, 4682 expression=expression, 4683 offset=offset, 4684 comments=comments, 4685 limit_options=limit_options, 4686 expressions=self._parse_limit_by(), 4687 ) 4688 4689 return limit_exp 4690 4691 if self._match(TokenType.FETCH): 4692 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4693 direction = self._prev.text.upper() if direction else "FIRST" 4694 4695 count = self._parse_field(tokens=self.FETCH_TOKENS) 4696 4697 return self.expression( 4698 exp.Fetch, 4699 direction=direction, 4700 count=count, 4701 limit_options=self._parse_limit_options(), 4702 ) 4703 4704 return this 4705 4706 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4707 if not self._match(TokenType.OFFSET): 4708 return this 4709 4710 count = self._parse_term() 4711 self._match_set((TokenType.ROW, TokenType.ROWS)) 4712 4713 return self.expression( 4714 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4715 ) 4716 4717 def _can_parse_limit_or_offset(self) -> bool: 4718 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4719 return False 4720 4721 index = self._index 4722 result = bool( 4723 self._try_parse(self._parse_limit, retreat=True) 4724 or self._try_parse(self._parse_offset, retreat=True) 4725 ) 4726 self._retreat(index) 4727 return result 4728 4729 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4730 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4731 4732 def _parse_locks(self) -> t.List[exp.Lock]: 4733 locks = [] 4734 while True: 4735 if self._match_text_seq("FOR", "UPDATE"): 4736 update = True 4737 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4738 "LOCK", "IN", "SHARE", "MODE" 4739 ): 4740 update = False 4741 else: 4742 break 4743 4744 expressions = None 4745 if self._match_text_seq("OF"): 4746 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4747 4748 wait: t.Optional[bool | exp.Expression] = None 4749 if self._match_text_seq("NOWAIT"): 4750 wait = True 4751 elif self._match_text_seq("WAIT"): 4752 wait = self._parse_primary() 4753 elif self._match_text_seq("SKIP", "LOCKED"): 4754 wait = False 4755 4756 locks.append( 4757 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4758 ) 4759 4760 return locks 4761 4762 def parse_set_operation( 4763 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4764 ) -> t.Optional[exp.Expression]: 4765 start = self._index 4766 _, side_token, kind_token = self._parse_join_parts() 4767 4768 side = side_token.text if side_token else None 4769 kind = kind_token.text if kind_token else None 4770 4771 if not self._match_set(self.SET_OPERATIONS): 4772 self._retreat(start) 4773 return None 4774 4775 token_type = self._prev.token_type 4776 4777 if token_type == TokenType.UNION: 4778 operation: t.Type[exp.SetOperation] = exp.Union 4779 elif token_type == TokenType.EXCEPT: 4780 operation = exp.Except 4781 else: 4782 operation = exp.Intersect 4783 4784 comments = self._prev.comments 4785 4786 if self._match(TokenType.DISTINCT): 4787 distinct: t.Optional[bool] = True 4788 elif self._match(TokenType.ALL): 4789 distinct = False 4790 else: 4791 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4792 if distinct is None: 4793 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4794 4795 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4796 "STRICT", "CORRESPONDING" 4797 ) 4798 if self._match_text_seq("CORRESPONDING"): 4799 by_name = True 4800 if not side and not kind: 4801 kind = "INNER" 4802 4803 on_column_list = None 4804 if by_name and self._match_texts(("ON", "BY")): 4805 on_column_list = self._parse_wrapped_csv(self._parse_column) 4806 4807 expression = self._parse_select( 4808 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4809 ) 4810 4811 return self.expression( 4812 operation, 4813 comments=comments, 4814 this=this, 4815 distinct=distinct, 4816 by_name=by_name, 4817 expression=expression, 4818 side=side, 4819 kind=kind, 4820 on=on_column_list, 4821 ) 4822 4823 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4824 while this: 4825 setop = self.parse_set_operation(this) 4826 if not setop: 4827 break 4828 this = setop 4829 4830 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4831 expression = this.expression 4832 4833 if expression: 4834 for arg in self.SET_OP_MODIFIERS: 4835 expr = expression.args.get(arg) 4836 if expr: 4837 this.set(arg, expr.pop()) 4838 4839 return this 4840 4841 def _parse_expression(self) -> t.Optional[exp.Expression]: 4842 return self._parse_alias(self._parse_assignment()) 4843 4844 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4845 this = self._parse_disjunction() 4846 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4847 # This allows us to parse <non-identifier token> := <expr> 4848 this = exp.column( 4849 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4850 ) 4851 4852 while self._match_set(self.ASSIGNMENT): 4853 if isinstance(this, exp.Column) and len(this.parts) == 1: 4854 this = this.this 4855 4856 this = self.expression( 4857 self.ASSIGNMENT[self._prev.token_type], 4858 this=this, 4859 comments=self._prev_comments, 4860 expression=self._parse_assignment(), 4861 ) 4862 4863 return this 4864 4865 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4866 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4867 4868 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4869 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4870 4871 def _parse_equality(self) -> t.Optional[exp.Expression]: 4872 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4873 4874 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4875 return self._parse_tokens(self._parse_range, self.COMPARISON) 4876 4877 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4878 this = this or self._parse_bitwise() 4879 negate = self._match(TokenType.NOT) 4880 4881 if self._match_set(self.RANGE_PARSERS): 4882 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4883 if not expression: 4884 return this 4885 4886 this = expression 4887 elif self._match(TokenType.ISNULL): 4888 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4889 4890 # Postgres supports ISNULL and NOTNULL for conditions. 4891 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4892 if self._match(TokenType.NOTNULL): 4893 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4894 this = self.expression(exp.Not, this=this) 4895 4896 if negate: 4897 this = self._negate_range(this) 4898 4899 if self._match(TokenType.IS): 4900 this = self._parse_is(this) 4901 4902 return this 4903 4904 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4905 if not this: 4906 return this 4907 4908 return self.expression(exp.Not, this=this) 4909 4910 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4911 index = self._index - 1 4912 negate = self._match(TokenType.NOT) 4913 4914 if self._match_text_seq("DISTINCT", "FROM"): 4915 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4916 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4917 4918 if self._match(TokenType.JSON): 4919 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4920 4921 if self._match_text_seq("WITH"): 4922 _with = True 4923 elif self._match_text_seq("WITHOUT"): 4924 _with = False 4925 else: 4926 _with = None 4927 4928 unique = self._match(TokenType.UNIQUE) 4929 self._match_text_seq("KEYS") 4930 expression: t.Optional[exp.Expression] = self.expression( 4931 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4932 ) 4933 else: 4934 expression = self._parse_primary() or self._parse_null() 4935 if not expression: 4936 self._retreat(index) 4937 return None 4938 4939 this = self.expression(exp.Is, this=this, expression=expression) 4940 return self.expression(exp.Not, this=this) if negate else this 4941 4942 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4943 unnest = self._parse_unnest(with_alias=False) 4944 if unnest: 4945 this = self.expression(exp.In, this=this, unnest=unnest) 4946 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4947 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4948 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4949 4950 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4951 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4952 else: 4953 this = self.expression(exp.In, this=this, expressions=expressions) 4954 4955 if matched_l_paren: 4956 self._match_r_paren(this) 4957 elif not self._match(TokenType.R_BRACKET, expression=this): 4958 self.raise_error("Expecting ]") 4959 else: 4960 this = self.expression(exp.In, this=this, field=self._parse_column()) 4961 4962 return this 4963 4964 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4965 low = self._parse_bitwise() 4966 self._match(TokenType.AND) 4967 high = self._parse_bitwise() 4968 return self.expression(exp.Between, this=this, low=low, high=high) 4969 4970 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4971 if not self._match(TokenType.ESCAPE): 4972 return this 4973 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4974 4975 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4976 index = self._index 4977 4978 if not self._match(TokenType.INTERVAL) and match_interval: 4979 return None 4980 4981 if self._match(TokenType.STRING, advance=False): 4982 this = self._parse_primary() 4983 else: 4984 this = self._parse_term() 4985 4986 if not this or ( 4987 isinstance(this, exp.Column) 4988 and not this.table 4989 and not this.this.quoted 4990 and this.name.upper() == "IS" 4991 ): 4992 self._retreat(index) 4993 return None 4994 4995 unit = self._parse_function() or ( 4996 not self._match(TokenType.ALIAS, advance=False) 4997 and self._parse_var(any_token=True, upper=True) 4998 ) 4999 5000 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5001 # each INTERVAL expression into this canonical form so it's easy to transpile 5002 if this and this.is_number: 5003 this = exp.Literal.string(this.to_py()) 5004 elif this and this.is_string: 5005 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5006 if parts and unit: 5007 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5008 unit = None 5009 self._retreat(self._index - 1) 5010 5011 if len(parts) == 1: 5012 this = exp.Literal.string(parts[0][0]) 5013 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5014 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5015 unit = self.expression( 5016 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5017 ) 5018 5019 interval = self.expression(exp.Interval, this=this, unit=unit) 5020 5021 index = self._index 5022 self._match(TokenType.PLUS) 5023 5024 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5025 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5026 return self.expression( 5027 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5028 ) 5029 5030 self._retreat(index) 5031 return interval 5032 5033 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5034 this = self._parse_term() 5035 5036 while True: 5037 if self._match_set(self.BITWISE): 5038 this = self.expression( 5039 self.BITWISE[self._prev.token_type], 5040 this=this, 5041 expression=self._parse_term(), 5042 ) 5043 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5044 this = self.expression( 5045 exp.DPipe, 5046 this=this, 5047 expression=self._parse_term(), 5048 safe=not self.dialect.STRICT_STRING_CONCAT, 5049 ) 5050 elif self._match(TokenType.DQMARK): 5051 this = self.expression( 5052 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5053 ) 5054 elif self._match_pair(TokenType.LT, TokenType.LT): 5055 this = self.expression( 5056 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5057 ) 5058 elif self._match_pair(TokenType.GT, TokenType.GT): 5059 this = self.expression( 5060 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5061 ) 5062 else: 5063 break 5064 5065 return this 5066 5067 def _parse_term(self) -> t.Optional[exp.Expression]: 5068 this = self._parse_factor() 5069 5070 while self._match_set(self.TERM): 5071 klass = self.TERM[self._prev.token_type] 5072 comments = self._prev_comments 5073 expression = self._parse_factor() 5074 5075 this = self.expression(klass, this=this, comments=comments, expression=expression) 5076 5077 if isinstance(this, exp.Collate): 5078 expr = this.expression 5079 5080 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5081 # fallback to Identifier / Var 5082 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5083 ident = expr.this 5084 if isinstance(ident, exp.Identifier): 5085 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5086 5087 return this 5088 5089 def _parse_factor(self) -> t.Optional[exp.Expression]: 5090 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5091 this = parse_method() 5092 5093 while self._match_set(self.FACTOR): 5094 klass = self.FACTOR[self._prev.token_type] 5095 comments = self._prev_comments 5096 expression = parse_method() 5097 5098 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5099 self._retreat(self._index - 1) 5100 return this 5101 5102 this = self.expression(klass, this=this, comments=comments, expression=expression) 5103 5104 if isinstance(this, exp.Div): 5105 this.args["typed"] = self.dialect.TYPED_DIVISION 5106 this.args["safe"] = self.dialect.SAFE_DIVISION 5107 5108 return this 5109 5110 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5111 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5112 5113 def _parse_unary(self) -> t.Optional[exp.Expression]: 5114 if self._match_set(self.UNARY_PARSERS): 5115 return self.UNARY_PARSERS[self._prev.token_type](self) 5116 return self._parse_at_time_zone(self._parse_type()) 5117 5118 def _parse_type( 5119 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5120 ) -> t.Optional[exp.Expression]: 5121 interval = parse_interval and self._parse_interval() 5122 if interval: 5123 return interval 5124 5125 index = self._index 5126 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5127 5128 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5129 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5130 if isinstance(data_type, exp.Cast): 5131 # This constructor can contain ops directly after it, for instance struct unnesting: 5132 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5133 return self._parse_column_ops(data_type) 5134 5135 if data_type: 5136 index2 = self._index 5137 this = self._parse_primary() 5138 5139 if isinstance(this, exp.Literal): 5140 literal = this.name 5141 this = self._parse_column_ops(this) 5142 5143 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5144 if parser: 5145 return parser(self, this, data_type) 5146 5147 if ( 5148 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5149 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5150 and TIME_ZONE_RE.search(literal) 5151 ): 5152 data_type = exp.DataType.build("TIMESTAMPTZ") 5153 5154 return self.expression(exp.Cast, this=this, to=data_type) 5155 5156 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5157 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5158 # 5159 # If the index difference here is greater than 1, that means the parser itself must have 5160 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5161 # 5162 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5163 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5164 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5165 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5166 # 5167 # In these cases, we don't really want to return the converted type, but instead retreat 5168 # and try to parse a Column or Identifier in the section below. 5169 if data_type.expressions and index2 - index > 1: 5170 self._retreat(index2) 5171 return self._parse_column_ops(data_type) 5172 5173 self._retreat(index) 5174 5175 if fallback_to_identifier: 5176 return self._parse_id_var() 5177 5178 this = self._parse_column() 5179 return this and self._parse_column_ops(this) 5180 5181 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5182 this = self._parse_type() 5183 if not this: 5184 return None 5185 5186 if isinstance(this, exp.Column) and not this.table: 5187 this = exp.var(this.name.upper()) 5188 5189 return self.expression( 5190 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5191 ) 5192 5193 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5194 type_name = identifier.name 5195 5196 while self._match(TokenType.DOT): 5197 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5198 5199 return exp.DataType.build(type_name, udt=True) 5200 5201 def _parse_types( 5202 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5203 ) -> t.Optional[exp.Expression]: 5204 index = self._index 5205 5206 this: t.Optional[exp.Expression] = None 5207 prefix = self._match_text_seq("SYSUDTLIB", ".") 5208 5209 if not self._match_set(self.TYPE_TOKENS): 5210 identifier = allow_identifiers and self._parse_id_var( 5211 any_token=False, tokens=(TokenType.VAR,) 5212 ) 5213 if isinstance(identifier, exp.Identifier): 5214 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5215 5216 if len(tokens) != 1: 5217 self.raise_error("Unexpected identifier", self._prev) 5218 5219 if tokens[0].token_type in self.TYPE_TOKENS: 5220 self._prev = tokens[0] 5221 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5222 this = self._parse_user_defined_type(identifier) 5223 else: 5224 self._retreat(self._index - 1) 5225 return None 5226 else: 5227 return None 5228 5229 type_token = self._prev.token_type 5230 5231 if type_token == TokenType.PSEUDO_TYPE: 5232 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5233 5234 if type_token == TokenType.OBJECT_IDENTIFIER: 5235 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5236 5237 # https://materialize.com/docs/sql/types/map/ 5238 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5239 key_type = self._parse_types( 5240 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5241 ) 5242 if not self._match(TokenType.FARROW): 5243 self._retreat(index) 5244 return None 5245 5246 value_type = self._parse_types( 5247 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5248 ) 5249 if not self._match(TokenType.R_BRACKET): 5250 self._retreat(index) 5251 return None 5252 5253 return exp.DataType( 5254 this=exp.DataType.Type.MAP, 5255 expressions=[key_type, value_type], 5256 nested=True, 5257 prefix=prefix, 5258 ) 5259 5260 nested = type_token in self.NESTED_TYPE_TOKENS 5261 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5262 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5263 expressions = None 5264 maybe_func = False 5265 5266 if self._match(TokenType.L_PAREN): 5267 if is_struct: 5268 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5269 elif nested: 5270 expressions = self._parse_csv( 5271 lambda: self._parse_types( 5272 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5273 ) 5274 ) 5275 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5276 this = expressions[0] 5277 this.set("nullable", True) 5278 self._match_r_paren() 5279 return this 5280 elif type_token in self.ENUM_TYPE_TOKENS: 5281 expressions = self._parse_csv(self._parse_equality) 5282 elif is_aggregate: 5283 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5284 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5285 ) 5286 if not func_or_ident: 5287 return None 5288 expressions = [func_or_ident] 5289 if self._match(TokenType.COMMA): 5290 expressions.extend( 5291 self._parse_csv( 5292 lambda: self._parse_types( 5293 check_func=check_func, 5294 schema=schema, 5295 allow_identifiers=allow_identifiers, 5296 ) 5297 ) 5298 ) 5299 else: 5300 expressions = self._parse_csv(self._parse_type_size) 5301 5302 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5303 if type_token == TokenType.VECTOR and len(expressions) == 2: 5304 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5305 5306 if not expressions or not self._match(TokenType.R_PAREN): 5307 self._retreat(index) 5308 return None 5309 5310 maybe_func = True 5311 5312 values: t.Optional[t.List[exp.Expression]] = None 5313 5314 if nested and self._match(TokenType.LT): 5315 if is_struct: 5316 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5317 else: 5318 expressions = self._parse_csv( 5319 lambda: self._parse_types( 5320 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5321 ) 5322 ) 5323 5324 if not self._match(TokenType.GT): 5325 self.raise_error("Expecting >") 5326 5327 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5328 values = self._parse_csv(self._parse_assignment) 5329 if not values and is_struct: 5330 values = None 5331 self._retreat(self._index - 1) 5332 else: 5333 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5334 5335 if type_token in self.TIMESTAMPS: 5336 if self._match_text_seq("WITH", "TIME", "ZONE"): 5337 maybe_func = False 5338 tz_type = ( 5339 exp.DataType.Type.TIMETZ 5340 if type_token in self.TIMES 5341 else exp.DataType.Type.TIMESTAMPTZ 5342 ) 5343 this = exp.DataType(this=tz_type, expressions=expressions) 5344 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5345 maybe_func = False 5346 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5347 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5348 maybe_func = False 5349 elif type_token == TokenType.INTERVAL: 5350 unit = self._parse_var(upper=True) 5351 if unit: 5352 if self._match_text_seq("TO"): 5353 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5354 5355 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5356 else: 5357 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5358 elif type_token == TokenType.VOID: 5359 this = exp.DataType(this=exp.DataType.Type.NULL) 5360 5361 if maybe_func and check_func: 5362 index2 = self._index 5363 peek = self._parse_string() 5364 5365 if not peek: 5366 self._retreat(index) 5367 return None 5368 5369 self._retreat(index2) 5370 5371 if not this: 5372 if self._match_text_seq("UNSIGNED"): 5373 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5374 if not unsigned_type_token: 5375 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5376 5377 type_token = unsigned_type_token or type_token 5378 5379 this = exp.DataType( 5380 this=exp.DataType.Type[type_token.value], 5381 expressions=expressions, 5382 nested=nested, 5383 prefix=prefix, 5384 ) 5385 5386 # Empty arrays/structs are allowed 5387 if values is not None: 5388 cls = exp.Struct if is_struct else exp.Array 5389 this = exp.cast(cls(expressions=values), this, copy=False) 5390 5391 elif expressions: 5392 this.set("expressions", expressions) 5393 5394 # https://materialize.com/docs/sql/types/list/#type-name 5395 while self._match(TokenType.LIST): 5396 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5397 5398 index = self._index 5399 5400 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5401 matched_array = self._match(TokenType.ARRAY) 5402 5403 while self._curr: 5404 datatype_token = self._prev.token_type 5405 matched_l_bracket = self._match(TokenType.L_BRACKET) 5406 5407 if (not matched_l_bracket and not matched_array) or ( 5408 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5409 ): 5410 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5411 # not to be confused with the fixed size array parsing 5412 break 5413 5414 matched_array = False 5415 values = self._parse_csv(self._parse_assignment) or None 5416 if ( 5417 values 5418 and not schema 5419 and ( 5420 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5421 ) 5422 ): 5423 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5424 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5425 self._retreat(index) 5426 break 5427 5428 this = exp.DataType( 5429 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5430 ) 5431 self._match(TokenType.R_BRACKET) 5432 5433 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5434 converter = self.TYPE_CONVERTERS.get(this.this) 5435 if converter: 5436 this = converter(t.cast(exp.DataType, this)) 5437 5438 return this 5439 5440 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5441 index = self._index 5442 5443 if ( 5444 self._curr 5445 and self._next 5446 and self._curr.token_type in self.TYPE_TOKENS 5447 and self._next.token_type in self.TYPE_TOKENS 5448 ): 5449 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5450 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5451 this = self._parse_id_var() 5452 else: 5453 this = ( 5454 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5455 or self._parse_id_var() 5456 ) 5457 5458 self._match(TokenType.COLON) 5459 5460 if ( 5461 type_required 5462 and not isinstance(this, exp.DataType) 5463 and not self._match_set(self.TYPE_TOKENS, advance=False) 5464 ): 5465 self._retreat(index) 5466 return self._parse_types() 5467 5468 return self._parse_column_def(this) 5469 5470 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5471 if not self._match_text_seq("AT", "TIME", "ZONE"): 5472 return this 5473 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5474 5475 def _parse_column(self) -> t.Optional[exp.Expression]: 5476 this = self._parse_column_reference() 5477 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5478 5479 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5480 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5481 5482 return column 5483 5484 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5485 this = self._parse_field() 5486 if ( 5487 not this 5488 and self._match(TokenType.VALUES, advance=False) 5489 and self.VALUES_FOLLOWED_BY_PAREN 5490 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5491 ): 5492 this = self._parse_id_var() 5493 5494 if isinstance(this, exp.Identifier): 5495 # We bubble up comments from the Identifier to the Column 5496 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5497 5498 return this 5499 5500 def _parse_colon_as_variant_extract( 5501 self, this: t.Optional[exp.Expression] 5502 ) -> t.Optional[exp.Expression]: 5503 casts = [] 5504 json_path = [] 5505 escape = None 5506 5507 while self._match(TokenType.COLON): 5508 start_index = self._index 5509 5510 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5511 path = self._parse_column_ops( 5512 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5513 ) 5514 5515 # The cast :: operator has a lower precedence than the extraction operator :, so 5516 # we rearrange the AST appropriately to avoid casting the JSON path 5517 while isinstance(path, exp.Cast): 5518 casts.append(path.to) 5519 path = path.this 5520 5521 if casts: 5522 dcolon_offset = next( 5523 i 5524 for i, t in enumerate(self._tokens[start_index:]) 5525 if t.token_type == TokenType.DCOLON 5526 ) 5527 end_token = self._tokens[start_index + dcolon_offset - 1] 5528 else: 5529 end_token = self._prev 5530 5531 if path: 5532 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5533 # it'll roundtrip to a string literal in GET_PATH 5534 if isinstance(path, exp.Identifier) and path.quoted: 5535 escape = True 5536 5537 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5538 5539 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5540 # Databricks transforms it back to the colon/dot notation 5541 if json_path: 5542 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5543 5544 if json_path_expr: 5545 json_path_expr.set("escape", escape) 5546 5547 this = self.expression( 5548 exp.JSONExtract, 5549 this=this, 5550 expression=json_path_expr, 5551 variant_extract=True, 5552 ) 5553 5554 while casts: 5555 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5556 5557 return this 5558 5559 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5560 return self._parse_types() 5561 5562 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5563 this = self._parse_bracket(this) 5564 5565 while self._match_set(self.COLUMN_OPERATORS): 5566 op_token = self._prev.token_type 5567 op = self.COLUMN_OPERATORS.get(op_token) 5568 5569 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5570 field = self._parse_dcolon() 5571 if not field: 5572 self.raise_error("Expected type") 5573 elif op and self._curr: 5574 field = self._parse_column_reference() or self._parse_bracket() 5575 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5576 field = self._parse_column_ops(field) 5577 else: 5578 field = self._parse_field(any_token=True, anonymous_func=True) 5579 5580 # Function calls can be qualified, e.g., x.y.FOO() 5581 # This converts the final AST to a series of Dots leading to the function call 5582 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5583 if isinstance(field, (exp.Func, exp.Window)) and this: 5584 this = this.transform( 5585 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5586 ) 5587 5588 if op: 5589 this = op(self, this, field) 5590 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5591 this = self.expression( 5592 exp.Column, 5593 comments=this.comments, 5594 this=field, 5595 table=this.this, 5596 db=this.args.get("table"), 5597 catalog=this.args.get("db"), 5598 ) 5599 elif isinstance(field, exp.Window): 5600 # Move the exp.Dot's to the window's function 5601 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5602 field.set("this", window_func) 5603 this = field 5604 else: 5605 this = self.expression(exp.Dot, this=this, expression=field) 5606 5607 if field and field.comments: 5608 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5609 5610 this = self._parse_bracket(this) 5611 5612 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5613 5614 def _parse_paren(self) -> t.Optional[exp.Expression]: 5615 if not self._match(TokenType.L_PAREN): 5616 return None 5617 5618 comments = self._prev_comments 5619 query = self._parse_select() 5620 5621 if query: 5622 expressions = [query] 5623 else: 5624 expressions = self._parse_expressions() 5625 5626 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5627 5628 if not this and self._match(TokenType.R_PAREN, advance=False): 5629 this = self.expression(exp.Tuple) 5630 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5631 this = self._parse_subquery(this=this, parse_alias=False) 5632 elif isinstance(this, exp.Subquery): 5633 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5634 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5635 this = self.expression(exp.Tuple, expressions=expressions) 5636 else: 5637 this = self.expression(exp.Paren, this=this) 5638 5639 if this: 5640 this.add_comments(comments) 5641 5642 self._match_r_paren(expression=this) 5643 return this 5644 5645 def _parse_primary(self) -> t.Optional[exp.Expression]: 5646 if self._match_set(self.PRIMARY_PARSERS): 5647 token_type = self._prev.token_type 5648 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5649 5650 if token_type == TokenType.STRING: 5651 expressions = [primary] 5652 while self._match(TokenType.STRING): 5653 expressions.append(exp.Literal.string(self._prev.text)) 5654 5655 if len(expressions) > 1: 5656 return self.expression(exp.Concat, expressions=expressions) 5657 5658 return primary 5659 5660 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5661 return exp.Literal.number(f"0.{self._prev.text}") 5662 5663 return self._parse_paren() 5664 5665 def _parse_field( 5666 self, 5667 any_token: bool = False, 5668 tokens: t.Optional[t.Collection[TokenType]] = None, 5669 anonymous_func: bool = False, 5670 ) -> t.Optional[exp.Expression]: 5671 if anonymous_func: 5672 field = ( 5673 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5674 or self._parse_primary() 5675 ) 5676 else: 5677 field = self._parse_primary() or self._parse_function( 5678 anonymous=anonymous_func, any_token=any_token 5679 ) 5680 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5681 5682 def _parse_function( 5683 self, 5684 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5685 anonymous: bool = False, 5686 optional_parens: bool = True, 5687 any_token: bool = False, 5688 ) -> t.Optional[exp.Expression]: 5689 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5690 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5691 fn_syntax = False 5692 if ( 5693 self._match(TokenType.L_BRACE, advance=False) 5694 and self._next 5695 and self._next.text.upper() == "FN" 5696 ): 5697 self._advance(2) 5698 fn_syntax = True 5699 5700 func = self._parse_function_call( 5701 functions=functions, 5702 anonymous=anonymous, 5703 optional_parens=optional_parens, 5704 any_token=any_token, 5705 ) 5706 5707 if fn_syntax: 5708 self._match(TokenType.R_BRACE) 5709 5710 return func 5711 5712 def _parse_function_call( 5713 self, 5714 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5715 anonymous: bool = False, 5716 optional_parens: bool = True, 5717 any_token: bool = False, 5718 ) -> t.Optional[exp.Expression]: 5719 if not self._curr: 5720 return None 5721 5722 comments = self._curr.comments 5723 token = self._curr 5724 token_type = self._curr.token_type 5725 this = self._curr.text 5726 upper = this.upper() 5727 5728 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5729 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5730 self._advance() 5731 return self._parse_window(parser(self)) 5732 5733 if not self._next or self._next.token_type != TokenType.L_PAREN: 5734 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5735 self._advance() 5736 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5737 5738 return None 5739 5740 if any_token: 5741 if token_type in self.RESERVED_TOKENS: 5742 return None 5743 elif token_type not in self.FUNC_TOKENS: 5744 return None 5745 5746 self._advance(2) 5747 5748 parser = self.FUNCTION_PARSERS.get(upper) 5749 if parser and not anonymous: 5750 this = parser(self) 5751 else: 5752 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5753 5754 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5755 this = self.expression( 5756 subquery_predicate, comments=comments, this=self._parse_select() 5757 ) 5758 self._match_r_paren() 5759 return this 5760 5761 if functions is None: 5762 functions = self.FUNCTIONS 5763 5764 function = functions.get(upper) 5765 known_function = function and not anonymous 5766 5767 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5768 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5769 5770 post_func_comments = self._curr and self._curr.comments 5771 if known_function and post_func_comments: 5772 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5773 # call we'll construct it as exp.Anonymous, even if it's "known" 5774 if any( 5775 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5776 for comment in post_func_comments 5777 ): 5778 known_function = False 5779 5780 if alias and known_function: 5781 args = self._kv_to_prop_eq(args) 5782 5783 if known_function: 5784 func_builder = t.cast(t.Callable, function) 5785 5786 if "dialect" in func_builder.__code__.co_varnames: 5787 func = func_builder(args, dialect=self.dialect) 5788 else: 5789 func = func_builder(args) 5790 5791 func = self.validate_expression(func, args) 5792 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5793 func.meta["name"] = this 5794 5795 this = func 5796 else: 5797 if token_type == TokenType.IDENTIFIER: 5798 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5799 5800 this = self.expression(exp.Anonymous, this=this, expressions=args) 5801 this = this.update_positions(token) 5802 5803 if isinstance(this, exp.Expression): 5804 this.add_comments(comments) 5805 5806 self._match_r_paren(this) 5807 return self._parse_window(this) 5808 5809 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5810 return expression 5811 5812 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5813 transformed = [] 5814 5815 for index, e in enumerate(expressions): 5816 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5817 if isinstance(e, exp.Alias): 5818 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5819 5820 if not isinstance(e, exp.PropertyEQ): 5821 e = self.expression( 5822 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5823 ) 5824 5825 if isinstance(e.this, exp.Column): 5826 e.this.replace(e.this.this) 5827 else: 5828 e = self._to_prop_eq(e, index) 5829 5830 transformed.append(e) 5831 5832 return transformed 5833 5834 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5835 return self._parse_statement() 5836 5837 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5838 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5839 5840 def _parse_user_defined_function( 5841 self, kind: t.Optional[TokenType] = None 5842 ) -> t.Optional[exp.Expression]: 5843 this = self._parse_table_parts(schema=True) 5844 5845 if not self._match(TokenType.L_PAREN): 5846 return this 5847 5848 expressions = self._parse_csv(self._parse_function_parameter) 5849 self._match_r_paren() 5850 return self.expression( 5851 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5852 ) 5853 5854 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5855 literal = self._parse_primary() 5856 if literal: 5857 return self.expression(exp.Introducer, this=token.text, expression=literal) 5858 5859 return self._identifier_expression(token) 5860 5861 def _parse_session_parameter(self) -> exp.SessionParameter: 5862 kind = None 5863 this = self._parse_id_var() or self._parse_primary() 5864 5865 if this and self._match(TokenType.DOT): 5866 kind = this.name 5867 this = self._parse_var() or self._parse_primary() 5868 5869 return self.expression(exp.SessionParameter, this=this, kind=kind) 5870 5871 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5872 return self._parse_id_var() 5873 5874 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5875 index = self._index 5876 5877 if self._match(TokenType.L_PAREN): 5878 expressions = t.cast( 5879 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5880 ) 5881 5882 if not self._match(TokenType.R_PAREN): 5883 self._retreat(index) 5884 else: 5885 expressions = [self._parse_lambda_arg()] 5886 5887 if self._match_set(self.LAMBDAS): 5888 return self.LAMBDAS[self._prev.token_type](self, expressions) 5889 5890 self._retreat(index) 5891 5892 this: t.Optional[exp.Expression] 5893 5894 if self._match(TokenType.DISTINCT): 5895 this = self.expression( 5896 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5897 ) 5898 else: 5899 this = self._parse_select_or_expression(alias=alias) 5900 5901 return self._parse_limit( 5902 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5903 ) 5904 5905 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5906 index = self._index 5907 if not self._match(TokenType.L_PAREN): 5908 return this 5909 5910 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5911 # expr can be of both types 5912 if self._match_set(self.SELECT_START_TOKENS): 5913 self._retreat(index) 5914 return this 5915 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5916 self._match_r_paren() 5917 return self.expression(exp.Schema, this=this, expressions=args) 5918 5919 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5920 return self._parse_column_def(self._parse_field(any_token=True)) 5921 5922 def _parse_column_def( 5923 self, this: t.Optional[exp.Expression], computed_column: bool = True 5924 ) -> t.Optional[exp.Expression]: 5925 # column defs are not really columns, they're identifiers 5926 if isinstance(this, exp.Column): 5927 this = this.this 5928 5929 if not computed_column: 5930 self._match(TokenType.ALIAS) 5931 5932 kind = self._parse_types(schema=True) 5933 5934 if self._match_text_seq("FOR", "ORDINALITY"): 5935 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5936 5937 constraints: t.List[exp.Expression] = [] 5938 5939 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5940 ("ALIAS", "MATERIALIZED") 5941 ): 5942 persisted = self._prev.text.upper() == "MATERIALIZED" 5943 constraint_kind = exp.ComputedColumnConstraint( 5944 this=self._parse_assignment(), 5945 persisted=persisted or self._match_text_seq("PERSISTED"), 5946 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5947 ) 5948 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5949 elif ( 5950 kind 5951 and self._match(TokenType.ALIAS, advance=False) 5952 and ( 5953 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5954 or (self._next and self._next.token_type == TokenType.L_PAREN) 5955 ) 5956 ): 5957 self._advance() 5958 constraints.append( 5959 self.expression( 5960 exp.ColumnConstraint, 5961 kind=exp.ComputedColumnConstraint( 5962 this=self._parse_disjunction(), 5963 persisted=self._match_texts(("STORED", "VIRTUAL")) 5964 and self._prev.text.upper() == "STORED", 5965 ), 5966 ) 5967 ) 5968 5969 while True: 5970 constraint = self._parse_column_constraint() 5971 if not constraint: 5972 break 5973 constraints.append(constraint) 5974 5975 if not kind and not constraints: 5976 return this 5977 5978 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5979 5980 def _parse_auto_increment( 5981 self, 5982 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5983 start = None 5984 increment = None 5985 order = None 5986 5987 if self._match(TokenType.L_PAREN, advance=False): 5988 args = self._parse_wrapped_csv(self._parse_bitwise) 5989 start = seq_get(args, 0) 5990 increment = seq_get(args, 1) 5991 elif self._match_text_seq("START"): 5992 start = self._parse_bitwise() 5993 self._match_text_seq("INCREMENT") 5994 increment = self._parse_bitwise() 5995 if self._match_text_seq("ORDER"): 5996 order = True 5997 elif self._match_text_seq("NOORDER"): 5998 order = False 5999 6000 if start and increment: 6001 return exp.GeneratedAsIdentityColumnConstraint( 6002 start=start, increment=increment, this=False, order=order 6003 ) 6004 6005 return exp.AutoIncrementColumnConstraint() 6006 6007 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6008 if not self._match_text_seq("REFRESH"): 6009 self._retreat(self._index - 1) 6010 return None 6011 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6012 6013 def _parse_compress(self) -> exp.CompressColumnConstraint: 6014 if self._match(TokenType.L_PAREN, advance=False): 6015 return self.expression( 6016 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6017 ) 6018 6019 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6020 6021 def _parse_generated_as_identity( 6022 self, 6023 ) -> ( 6024 exp.GeneratedAsIdentityColumnConstraint 6025 | exp.ComputedColumnConstraint 6026 | exp.GeneratedAsRowColumnConstraint 6027 ): 6028 if self._match_text_seq("BY", "DEFAULT"): 6029 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6030 this = self.expression( 6031 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6032 ) 6033 else: 6034 self._match_text_seq("ALWAYS") 6035 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6036 6037 self._match(TokenType.ALIAS) 6038 6039 if self._match_text_seq("ROW"): 6040 start = self._match_text_seq("START") 6041 if not start: 6042 self._match(TokenType.END) 6043 hidden = self._match_text_seq("HIDDEN") 6044 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6045 6046 identity = self._match_text_seq("IDENTITY") 6047 6048 if self._match(TokenType.L_PAREN): 6049 if self._match(TokenType.START_WITH): 6050 this.set("start", self._parse_bitwise()) 6051 if self._match_text_seq("INCREMENT", "BY"): 6052 this.set("increment", self._parse_bitwise()) 6053 if self._match_text_seq("MINVALUE"): 6054 this.set("minvalue", self._parse_bitwise()) 6055 if self._match_text_seq("MAXVALUE"): 6056 this.set("maxvalue", self._parse_bitwise()) 6057 6058 if self._match_text_seq("CYCLE"): 6059 this.set("cycle", True) 6060 elif self._match_text_seq("NO", "CYCLE"): 6061 this.set("cycle", False) 6062 6063 if not identity: 6064 this.set("expression", self._parse_range()) 6065 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6066 args = self._parse_csv(self._parse_bitwise) 6067 this.set("start", seq_get(args, 0)) 6068 this.set("increment", seq_get(args, 1)) 6069 6070 self._match_r_paren() 6071 6072 return this 6073 6074 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6075 self._match_text_seq("LENGTH") 6076 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6077 6078 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6079 if self._match_text_seq("NULL"): 6080 return self.expression(exp.NotNullColumnConstraint) 6081 if self._match_text_seq("CASESPECIFIC"): 6082 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6083 if self._match_text_seq("FOR", "REPLICATION"): 6084 return self.expression(exp.NotForReplicationColumnConstraint) 6085 6086 # Unconsume the `NOT` token 6087 self._retreat(self._index - 1) 6088 return None 6089 6090 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6091 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6092 6093 procedure_option_follows = ( 6094 self._match(TokenType.WITH, advance=False) 6095 and self._next 6096 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6097 ) 6098 6099 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6100 return self.expression( 6101 exp.ColumnConstraint, 6102 this=this, 6103 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6104 ) 6105 6106 return this 6107 6108 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6109 if not self._match(TokenType.CONSTRAINT): 6110 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6111 6112 return self.expression( 6113 exp.Constraint, 6114 this=self._parse_id_var(), 6115 expressions=self._parse_unnamed_constraints(), 6116 ) 6117 6118 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6119 constraints = [] 6120 while True: 6121 constraint = self._parse_unnamed_constraint() or self._parse_function() 6122 if not constraint: 6123 break 6124 constraints.append(constraint) 6125 6126 return constraints 6127 6128 def _parse_unnamed_constraint( 6129 self, constraints: t.Optional[t.Collection[str]] = None 6130 ) -> t.Optional[exp.Expression]: 6131 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6132 constraints or self.CONSTRAINT_PARSERS 6133 ): 6134 return None 6135 6136 constraint = self._prev.text.upper() 6137 if constraint not in self.CONSTRAINT_PARSERS: 6138 self.raise_error(f"No parser found for schema constraint {constraint}.") 6139 6140 return self.CONSTRAINT_PARSERS[constraint](self) 6141 6142 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6143 return self._parse_id_var(any_token=False) 6144 6145 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6146 self._match_text_seq("KEY") 6147 return self.expression( 6148 exp.UniqueColumnConstraint, 6149 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6150 this=self._parse_schema(self._parse_unique_key()), 6151 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6152 on_conflict=self._parse_on_conflict(), 6153 options=self._parse_key_constraint_options(), 6154 ) 6155 6156 def _parse_key_constraint_options(self) -> t.List[str]: 6157 options = [] 6158 while True: 6159 if not self._curr: 6160 break 6161 6162 if self._match(TokenType.ON): 6163 action = None 6164 on = self._advance_any() and self._prev.text 6165 6166 if self._match_text_seq("NO", "ACTION"): 6167 action = "NO ACTION" 6168 elif self._match_text_seq("CASCADE"): 6169 action = "CASCADE" 6170 elif self._match_text_seq("RESTRICT"): 6171 action = "RESTRICT" 6172 elif self._match_pair(TokenType.SET, TokenType.NULL): 6173 action = "SET NULL" 6174 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6175 action = "SET DEFAULT" 6176 else: 6177 self.raise_error("Invalid key constraint") 6178 6179 options.append(f"ON {on} {action}") 6180 else: 6181 var = self._parse_var_from_options( 6182 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6183 ) 6184 if not var: 6185 break 6186 options.append(var.name) 6187 6188 return options 6189 6190 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6191 if match and not self._match(TokenType.REFERENCES): 6192 return None 6193 6194 expressions = None 6195 this = self._parse_table(schema=True) 6196 options = self._parse_key_constraint_options() 6197 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6198 6199 def _parse_foreign_key(self) -> exp.ForeignKey: 6200 expressions = ( 6201 self._parse_wrapped_id_vars() 6202 if not self._match(TokenType.REFERENCES, advance=False) 6203 else None 6204 ) 6205 reference = self._parse_references() 6206 on_options = {} 6207 6208 while self._match(TokenType.ON): 6209 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6210 self.raise_error("Expected DELETE or UPDATE") 6211 6212 kind = self._prev.text.lower() 6213 6214 if self._match_text_seq("NO", "ACTION"): 6215 action = "NO ACTION" 6216 elif self._match(TokenType.SET): 6217 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6218 action = "SET " + self._prev.text.upper() 6219 else: 6220 self._advance() 6221 action = self._prev.text.upper() 6222 6223 on_options[kind] = action 6224 6225 return self.expression( 6226 exp.ForeignKey, 6227 expressions=expressions, 6228 reference=reference, 6229 options=self._parse_key_constraint_options(), 6230 **on_options, # type: ignore 6231 ) 6232 6233 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6234 return self._parse_ordered() or self._parse_field() 6235 6236 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6237 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6238 self._retreat(self._index - 1) 6239 return None 6240 6241 id_vars = self._parse_wrapped_id_vars() 6242 return self.expression( 6243 exp.PeriodForSystemTimeConstraint, 6244 this=seq_get(id_vars, 0), 6245 expression=seq_get(id_vars, 1), 6246 ) 6247 6248 def _parse_primary_key( 6249 self, wrapped_optional: bool = False, in_props: bool = False 6250 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6251 desc = ( 6252 self._match_set((TokenType.ASC, TokenType.DESC)) 6253 and self._prev.token_type == TokenType.DESC 6254 ) 6255 6256 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6257 return self.expression( 6258 exp.PrimaryKeyColumnConstraint, 6259 desc=desc, 6260 options=self._parse_key_constraint_options(), 6261 ) 6262 6263 expressions = self._parse_wrapped_csv( 6264 self._parse_primary_key_part, optional=wrapped_optional 6265 ) 6266 options = self._parse_key_constraint_options() 6267 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6268 6269 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6270 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6271 6272 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6273 """ 6274 Parses a datetime column in ODBC format. We parse the column into the corresponding 6275 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6276 same as we did for `DATE('yyyy-mm-dd')`. 6277 6278 Reference: 6279 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6280 """ 6281 self._match(TokenType.VAR) 6282 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6283 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6284 if not self._match(TokenType.R_BRACE): 6285 self.raise_error("Expected }") 6286 return expression 6287 6288 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6289 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6290 return this 6291 6292 bracket_kind = self._prev.token_type 6293 if ( 6294 bracket_kind == TokenType.L_BRACE 6295 and self._curr 6296 and self._curr.token_type == TokenType.VAR 6297 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6298 ): 6299 return self._parse_odbc_datetime_literal() 6300 6301 expressions = self._parse_csv( 6302 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6303 ) 6304 6305 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6306 self.raise_error("Expected ]") 6307 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6308 self.raise_error("Expected }") 6309 6310 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6311 if bracket_kind == TokenType.L_BRACE: 6312 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6313 elif not this: 6314 this = build_array_constructor( 6315 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6316 ) 6317 else: 6318 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6319 if constructor_type: 6320 return build_array_constructor( 6321 constructor_type, 6322 args=expressions, 6323 bracket_kind=bracket_kind, 6324 dialect=self.dialect, 6325 ) 6326 6327 expressions = apply_index_offset( 6328 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6329 ) 6330 this = self.expression( 6331 exp.Bracket, 6332 this=this, 6333 expressions=expressions, 6334 comments=this.pop_comments(), 6335 ) 6336 6337 self._add_comments(this) 6338 return self._parse_bracket(this) 6339 6340 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6341 if self._match(TokenType.COLON): 6342 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6343 return this 6344 6345 def _parse_case(self) -> t.Optional[exp.Expression]: 6346 ifs = [] 6347 default = None 6348 6349 comments = self._prev_comments 6350 expression = self._parse_assignment() 6351 6352 while self._match(TokenType.WHEN): 6353 this = self._parse_assignment() 6354 self._match(TokenType.THEN) 6355 then = self._parse_assignment() 6356 ifs.append(self.expression(exp.If, this=this, true=then)) 6357 6358 if self._match(TokenType.ELSE): 6359 default = self._parse_assignment() 6360 6361 if not self._match(TokenType.END): 6362 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6363 default = exp.column("interval") 6364 else: 6365 self.raise_error("Expected END after CASE", self._prev) 6366 6367 return self.expression( 6368 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6369 ) 6370 6371 def _parse_if(self) -> t.Optional[exp.Expression]: 6372 if self._match(TokenType.L_PAREN): 6373 args = self._parse_csv( 6374 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6375 ) 6376 this = self.validate_expression(exp.If.from_arg_list(args), args) 6377 self._match_r_paren() 6378 else: 6379 index = self._index - 1 6380 6381 if self.NO_PAREN_IF_COMMANDS and index == 0: 6382 return self._parse_as_command(self._prev) 6383 6384 condition = self._parse_assignment() 6385 6386 if not condition: 6387 self._retreat(index) 6388 return None 6389 6390 self._match(TokenType.THEN) 6391 true = self._parse_assignment() 6392 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6393 self._match(TokenType.END) 6394 this = self.expression(exp.If, this=condition, true=true, false=false) 6395 6396 return this 6397 6398 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6399 if not self._match_text_seq("VALUE", "FOR"): 6400 self._retreat(self._index - 1) 6401 return None 6402 6403 return self.expression( 6404 exp.NextValueFor, 6405 this=self._parse_column(), 6406 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6407 ) 6408 6409 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6410 this = self._parse_function() or self._parse_var_or_string(upper=True) 6411 6412 if self._match(TokenType.FROM): 6413 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6414 6415 if not self._match(TokenType.COMMA): 6416 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6417 6418 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6419 6420 def _parse_gap_fill(self) -> exp.GapFill: 6421 self._match(TokenType.TABLE) 6422 this = self._parse_table() 6423 6424 self._match(TokenType.COMMA) 6425 args = [this, *self._parse_csv(self._parse_lambda)] 6426 6427 gap_fill = exp.GapFill.from_arg_list(args) 6428 return self.validate_expression(gap_fill, args) 6429 6430 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6431 this = self._parse_assignment() 6432 6433 if not self._match(TokenType.ALIAS): 6434 if self._match(TokenType.COMMA): 6435 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6436 6437 self.raise_error("Expected AS after CAST") 6438 6439 fmt = None 6440 to = self._parse_types() 6441 6442 default = self._match(TokenType.DEFAULT) 6443 if default: 6444 default = self._parse_bitwise() 6445 self._match_text_seq("ON", "CONVERSION", "ERROR") 6446 6447 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6448 fmt_string = self._parse_string() 6449 fmt = self._parse_at_time_zone(fmt_string) 6450 6451 if not to: 6452 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6453 if to.this in exp.DataType.TEMPORAL_TYPES: 6454 this = self.expression( 6455 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6456 this=this, 6457 format=exp.Literal.string( 6458 format_time( 6459 fmt_string.this if fmt_string else "", 6460 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6461 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6462 ) 6463 ), 6464 safe=safe, 6465 ) 6466 6467 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6468 this.set("zone", fmt.args["zone"]) 6469 return this 6470 elif not to: 6471 self.raise_error("Expected TYPE after CAST") 6472 elif isinstance(to, exp.Identifier): 6473 to = exp.DataType.build(to.name, udt=True) 6474 elif to.this == exp.DataType.Type.CHAR: 6475 if self._match(TokenType.CHARACTER_SET): 6476 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6477 6478 return self.expression( 6479 exp.Cast if strict else exp.TryCast, 6480 this=this, 6481 to=to, 6482 format=fmt, 6483 safe=safe, 6484 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6485 default=default, 6486 ) 6487 6488 def _parse_string_agg(self) -> exp.GroupConcat: 6489 if self._match(TokenType.DISTINCT): 6490 args: t.List[t.Optional[exp.Expression]] = [ 6491 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6492 ] 6493 if self._match(TokenType.COMMA): 6494 args.extend(self._parse_csv(self._parse_assignment)) 6495 else: 6496 args = self._parse_csv(self._parse_assignment) # type: ignore 6497 6498 if self._match_text_seq("ON", "OVERFLOW"): 6499 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6500 if self._match_text_seq("ERROR"): 6501 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6502 else: 6503 self._match_text_seq("TRUNCATE") 6504 on_overflow = self.expression( 6505 exp.OverflowTruncateBehavior, 6506 this=self._parse_string(), 6507 with_count=( 6508 self._match_text_seq("WITH", "COUNT") 6509 or not self._match_text_seq("WITHOUT", "COUNT") 6510 ), 6511 ) 6512 else: 6513 on_overflow = None 6514 6515 index = self._index 6516 if not self._match(TokenType.R_PAREN) and args: 6517 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6518 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6519 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6520 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6521 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6522 6523 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6524 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6525 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6526 if not self._match_text_seq("WITHIN", "GROUP"): 6527 self._retreat(index) 6528 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6529 6530 # The corresponding match_r_paren will be called in parse_function (caller) 6531 self._match_l_paren() 6532 6533 return self.expression( 6534 exp.GroupConcat, 6535 this=self._parse_order(this=seq_get(args, 0)), 6536 separator=seq_get(args, 1), 6537 on_overflow=on_overflow, 6538 ) 6539 6540 def _parse_convert( 6541 self, strict: bool, safe: t.Optional[bool] = None 6542 ) -> t.Optional[exp.Expression]: 6543 this = self._parse_bitwise() 6544 6545 if self._match(TokenType.USING): 6546 to: t.Optional[exp.Expression] = self.expression( 6547 exp.CharacterSet, this=self._parse_var() 6548 ) 6549 elif self._match(TokenType.COMMA): 6550 to = self._parse_types() 6551 else: 6552 to = None 6553 6554 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6555 6556 def _parse_xml_table(self) -> exp.XMLTable: 6557 namespaces = None 6558 passing = None 6559 columns = None 6560 6561 if self._match_text_seq("XMLNAMESPACES", "("): 6562 namespaces = self._parse_xml_namespace() 6563 self._match_text_seq(")", ",") 6564 6565 this = self._parse_string() 6566 6567 if self._match_text_seq("PASSING"): 6568 # The BY VALUE keywords are optional and are provided for semantic clarity 6569 self._match_text_seq("BY", "VALUE") 6570 passing = self._parse_csv(self._parse_column) 6571 6572 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6573 6574 if self._match_text_seq("COLUMNS"): 6575 columns = self._parse_csv(self._parse_field_def) 6576 6577 return self.expression( 6578 exp.XMLTable, 6579 this=this, 6580 namespaces=namespaces, 6581 passing=passing, 6582 columns=columns, 6583 by_ref=by_ref, 6584 ) 6585 6586 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6587 namespaces = [] 6588 6589 while True: 6590 if self._match(TokenType.DEFAULT): 6591 uri = self._parse_string() 6592 else: 6593 uri = self._parse_alias(self._parse_string()) 6594 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6595 if not self._match(TokenType.COMMA): 6596 break 6597 6598 return namespaces 6599 6600 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6601 """ 6602 There are generally two variants of the DECODE function: 6603 6604 - DECODE(bin, charset) 6605 - DECODE(expression, search, result [, search, result] ... [, default]) 6606 6607 The second variant will always be parsed into a CASE expression. Note that NULL 6608 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6609 instead of relying on pattern matching. 6610 """ 6611 args = self._parse_csv(self._parse_assignment) 6612 6613 if len(args) < 3: 6614 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6615 6616 expression, *expressions = args 6617 if not expression: 6618 return None 6619 6620 ifs = [] 6621 for search, result in zip(expressions[::2], expressions[1::2]): 6622 if not search or not result: 6623 return None 6624 6625 if isinstance(search, exp.Literal): 6626 ifs.append( 6627 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6628 ) 6629 elif isinstance(search, exp.Null): 6630 ifs.append( 6631 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6632 ) 6633 else: 6634 cond = exp.or_( 6635 exp.EQ(this=expression.copy(), expression=search), 6636 exp.and_( 6637 exp.Is(this=expression.copy(), expression=exp.Null()), 6638 exp.Is(this=search.copy(), expression=exp.Null()), 6639 copy=False, 6640 ), 6641 copy=False, 6642 ) 6643 ifs.append(exp.If(this=cond, true=result)) 6644 6645 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6646 6647 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6648 self._match_text_seq("KEY") 6649 key = self._parse_column() 6650 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6651 self._match_text_seq("VALUE") 6652 value = self._parse_bitwise() 6653 6654 if not key and not value: 6655 return None 6656 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6657 6658 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6659 if not this or not self._match_text_seq("FORMAT", "JSON"): 6660 return this 6661 6662 return self.expression(exp.FormatJson, this=this) 6663 6664 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6665 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6666 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6667 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6668 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6669 else: 6670 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6671 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6672 6673 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6674 6675 if not empty and not error and not null: 6676 return None 6677 6678 return self.expression( 6679 exp.OnCondition, 6680 empty=empty, 6681 error=error, 6682 null=null, 6683 ) 6684 6685 def _parse_on_handling( 6686 self, on: str, *values: str 6687 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6688 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6689 for value in values: 6690 if self._match_text_seq(value, "ON", on): 6691 return f"{value} ON {on}" 6692 6693 index = self._index 6694 if self._match(TokenType.DEFAULT): 6695 default_value = self._parse_bitwise() 6696 if self._match_text_seq("ON", on): 6697 return default_value 6698 6699 self._retreat(index) 6700 6701 return None 6702 6703 @t.overload 6704 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6705 6706 @t.overload 6707 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6708 6709 def _parse_json_object(self, agg=False): 6710 star = self._parse_star() 6711 expressions = ( 6712 [star] 6713 if star 6714 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6715 ) 6716 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6717 6718 unique_keys = None 6719 if self._match_text_seq("WITH", "UNIQUE"): 6720 unique_keys = True 6721 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6722 unique_keys = False 6723 6724 self._match_text_seq("KEYS") 6725 6726 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6727 self._parse_type() 6728 ) 6729 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6730 6731 return self.expression( 6732 exp.JSONObjectAgg if agg else exp.JSONObject, 6733 expressions=expressions, 6734 null_handling=null_handling, 6735 unique_keys=unique_keys, 6736 return_type=return_type, 6737 encoding=encoding, 6738 ) 6739 6740 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6741 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6742 if not self._match_text_seq("NESTED"): 6743 this = self._parse_id_var() 6744 kind = self._parse_types(allow_identifiers=False) 6745 nested = None 6746 else: 6747 this = None 6748 kind = None 6749 nested = True 6750 6751 path = self._match_text_seq("PATH") and self._parse_string() 6752 nested_schema = nested and self._parse_json_schema() 6753 6754 return self.expression( 6755 exp.JSONColumnDef, 6756 this=this, 6757 kind=kind, 6758 path=path, 6759 nested_schema=nested_schema, 6760 ) 6761 6762 def _parse_json_schema(self) -> exp.JSONSchema: 6763 self._match_text_seq("COLUMNS") 6764 return self.expression( 6765 exp.JSONSchema, 6766 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6767 ) 6768 6769 def _parse_json_table(self) -> exp.JSONTable: 6770 this = self._parse_format_json(self._parse_bitwise()) 6771 path = self._match(TokenType.COMMA) and self._parse_string() 6772 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6773 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6774 schema = self._parse_json_schema() 6775 6776 return exp.JSONTable( 6777 this=this, 6778 schema=schema, 6779 path=path, 6780 error_handling=error_handling, 6781 empty_handling=empty_handling, 6782 ) 6783 6784 def _parse_match_against(self) -> exp.MatchAgainst: 6785 expressions = self._parse_csv(self._parse_column) 6786 6787 self._match_text_seq(")", "AGAINST", "(") 6788 6789 this = self._parse_string() 6790 6791 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6792 modifier = "IN NATURAL LANGUAGE MODE" 6793 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6794 modifier = f"{modifier} WITH QUERY EXPANSION" 6795 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6796 modifier = "IN BOOLEAN MODE" 6797 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6798 modifier = "WITH QUERY EXPANSION" 6799 else: 6800 modifier = None 6801 6802 return self.expression( 6803 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6804 ) 6805 6806 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6807 def _parse_open_json(self) -> exp.OpenJSON: 6808 this = self._parse_bitwise() 6809 path = self._match(TokenType.COMMA) and self._parse_string() 6810 6811 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6812 this = self._parse_field(any_token=True) 6813 kind = self._parse_types() 6814 path = self._parse_string() 6815 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6816 6817 return self.expression( 6818 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6819 ) 6820 6821 expressions = None 6822 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6823 self._match_l_paren() 6824 expressions = self._parse_csv(_parse_open_json_column_def) 6825 6826 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6827 6828 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6829 args = self._parse_csv(self._parse_bitwise) 6830 6831 if self._match(TokenType.IN): 6832 return self.expression( 6833 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6834 ) 6835 6836 if haystack_first: 6837 haystack = seq_get(args, 0) 6838 needle = seq_get(args, 1) 6839 else: 6840 haystack = seq_get(args, 1) 6841 needle = seq_get(args, 0) 6842 6843 return self.expression( 6844 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6845 ) 6846 6847 def _parse_predict(self) -> exp.Predict: 6848 self._match_text_seq("MODEL") 6849 this = self._parse_table() 6850 6851 self._match(TokenType.COMMA) 6852 self._match_text_seq("TABLE") 6853 6854 return self.expression( 6855 exp.Predict, 6856 this=this, 6857 expression=self._parse_table(), 6858 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6859 ) 6860 6861 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6862 args = self._parse_csv(self._parse_table) 6863 return exp.JoinHint(this=func_name.upper(), expressions=args) 6864 6865 def _parse_substring(self) -> exp.Substring: 6866 # Postgres supports the form: substring(string [from int] [for int]) 6867 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6868 6869 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6870 6871 if self._match(TokenType.FROM): 6872 args.append(self._parse_bitwise()) 6873 if self._match(TokenType.FOR): 6874 if len(args) == 1: 6875 args.append(exp.Literal.number(1)) 6876 args.append(self._parse_bitwise()) 6877 6878 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6879 6880 def _parse_trim(self) -> exp.Trim: 6881 # https://www.w3resource.com/sql/character-functions/trim.php 6882 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6883 6884 position = None 6885 collation = None 6886 expression = None 6887 6888 if self._match_texts(self.TRIM_TYPES): 6889 position = self._prev.text.upper() 6890 6891 this = self._parse_bitwise() 6892 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6893 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6894 expression = self._parse_bitwise() 6895 6896 if invert_order: 6897 this, expression = expression, this 6898 6899 if self._match(TokenType.COLLATE): 6900 collation = self._parse_bitwise() 6901 6902 return self.expression( 6903 exp.Trim, this=this, position=position, expression=expression, collation=collation 6904 ) 6905 6906 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6907 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6908 6909 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6910 return self._parse_window(self._parse_id_var(), alias=True) 6911 6912 def _parse_respect_or_ignore_nulls( 6913 self, this: t.Optional[exp.Expression] 6914 ) -> t.Optional[exp.Expression]: 6915 if self._match_text_seq("IGNORE", "NULLS"): 6916 return self.expression(exp.IgnoreNulls, this=this) 6917 if self._match_text_seq("RESPECT", "NULLS"): 6918 return self.expression(exp.RespectNulls, this=this) 6919 return this 6920 6921 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6922 if self._match(TokenType.HAVING): 6923 self._match_texts(("MAX", "MIN")) 6924 max = self._prev.text.upper() != "MIN" 6925 return self.expression( 6926 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6927 ) 6928 6929 return this 6930 6931 def _parse_window( 6932 self, this: t.Optional[exp.Expression], alias: bool = False 6933 ) -> t.Optional[exp.Expression]: 6934 func = this 6935 comments = func.comments if isinstance(func, exp.Expression) else None 6936 6937 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6938 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6939 if self._match_text_seq("WITHIN", "GROUP"): 6940 order = self._parse_wrapped(self._parse_order) 6941 this = self.expression(exp.WithinGroup, this=this, expression=order) 6942 6943 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6944 self._match(TokenType.WHERE) 6945 this = self.expression( 6946 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6947 ) 6948 self._match_r_paren() 6949 6950 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6951 # Some dialects choose to implement and some do not. 6952 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6953 6954 # There is some code above in _parse_lambda that handles 6955 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6956 6957 # The below changes handle 6958 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6959 6960 # Oracle allows both formats 6961 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6962 # and Snowflake chose to do the same for familiarity 6963 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6964 if isinstance(this, exp.AggFunc): 6965 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6966 6967 if ignore_respect and ignore_respect is not this: 6968 ignore_respect.replace(ignore_respect.this) 6969 this = self.expression(ignore_respect.__class__, this=this) 6970 6971 this = self._parse_respect_or_ignore_nulls(this) 6972 6973 # bigquery select from window x AS (partition by ...) 6974 if alias: 6975 over = None 6976 self._match(TokenType.ALIAS) 6977 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6978 return this 6979 else: 6980 over = self._prev.text.upper() 6981 6982 if comments and isinstance(func, exp.Expression): 6983 func.pop_comments() 6984 6985 if not self._match(TokenType.L_PAREN): 6986 return self.expression( 6987 exp.Window, 6988 comments=comments, 6989 this=this, 6990 alias=self._parse_id_var(False), 6991 over=over, 6992 ) 6993 6994 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6995 6996 first = self._match(TokenType.FIRST) 6997 if self._match_text_seq("LAST"): 6998 first = False 6999 7000 partition, order = self._parse_partition_and_order() 7001 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7002 7003 if kind: 7004 self._match(TokenType.BETWEEN) 7005 start = self._parse_window_spec() 7006 self._match(TokenType.AND) 7007 end = self._parse_window_spec() 7008 exclude = ( 7009 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7010 if self._match_text_seq("EXCLUDE") 7011 else None 7012 ) 7013 7014 spec = self.expression( 7015 exp.WindowSpec, 7016 kind=kind, 7017 start=start["value"], 7018 start_side=start["side"], 7019 end=end["value"], 7020 end_side=end["side"], 7021 exclude=exclude, 7022 ) 7023 else: 7024 spec = None 7025 7026 self._match_r_paren() 7027 7028 window = self.expression( 7029 exp.Window, 7030 comments=comments, 7031 this=this, 7032 partition_by=partition, 7033 order=order, 7034 spec=spec, 7035 alias=window_alias, 7036 over=over, 7037 first=first, 7038 ) 7039 7040 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7041 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7042 return self._parse_window(window, alias=alias) 7043 7044 return window 7045 7046 def _parse_partition_and_order( 7047 self, 7048 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7049 return self._parse_partition_by(), self._parse_order() 7050 7051 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7052 self._match(TokenType.BETWEEN) 7053 7054 return { 7055 "value": ( 7056 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7057 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7058 or self._parse_bitwise() 7059 ), 7060 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7061 } 7062 7063 def _parse_alias( 7064 self, this: t.Optional[exp.Expression], explicit: bool = False 7065 ) -> t.Optional[exp.Expression]: 7066 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7067 # so this section tries to parse the clause version and if it fails, it treats the token 7068 # as an identifier (alias) 7069 if self._can_parse_limit_or_offset(): 7070 return this 7071 7072 any_token = self._match(TokenType.ALIAS) 7073 comments = self._prev_comments or [] 7074 7075 if explicit and not any_token: 7076 return this 7077 7078 if self._match(TokenType.L_PAREN): 7079 aliases = self.expression( 7080 exp.Aliases, 7081 comments=comments, 7082 this=this, 7083 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7084 ) 7085 self._match_r_paren(aliases) 7086 return aliases 7087 7088 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7089 self.STRING_ALIASES and self._parse_string_as_identifier() 7090 ) 7091 7092 if alias: 7093 comments.extend(alias.pop_comments()) 7094 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7095 column = this.this 7096 7097 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7098 if not this.comments and column and column.comments: 7099 this.comments = column.pop_comments() 7100 7101 return this 7102 7103 def _parse_id_var( 7104 self, 7105 any_token: bool = True, 7106 tokens: t.Optional[t.Collection[TokenType]] = None, 7107 ) -> t.Optional[exp.Expression]: 7108 expression = self._parse_identifier() 7109 if not expression and ( 7110 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7111 ): 7112 quoted = self._prev.token_type == TokenType.STRING 7113 expression = self._identifier_expression(quoted=quoted) 7114 7115 return expression 7116 7117 def _parse_string(self) -> t.Optional[exp.Expression]: 7118 if self._match_set(self.STRING_PARSERS): 7119 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7120 return self._parse_placeholder() 7121 7122 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7123 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7124 if output: 7125 output.update_positions(self._prev) 7126 return output 7127 7128 def _parse_number(self) -> t.Optional[exp.Expression]: 7129 if self._match_set(self.NUMERIC_PARSERS): 7130 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7131 return self._parse_placeholder() 7132 7133 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7134 if self._match(TokenType.IDENTIFIER): 7135 return self._identifier_expression(quoted=True) 7136 return self._parse_placeholder() 7137 7138 def _parse_var( 7139 self, 7140 any_token: bool = False, 7141 tokens: t.Optional[t.Collection[TokenType]] = None, 7142 upper: bool = False, 7143 ) -> t.Optional[exp.Expression]: 7144 if ( 7145 (any_token and self._advance_any()) 7146 or self._match(TokenType.VAR) 7147 or (self._match_set(tokens) if tokens else False) 7148 ): 7149 return self.expression( 7150 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7151 ) 7152 return self._parse_placeholder() 7153 7154 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7155 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7156 self._advance() 7157 return self._prev 7158 return None 7159 7160 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7161 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7162 7163 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7164 return self._parse_primary() or self._parse_var(any_token=True) 7165 7166 def _parse_null(self) -> t.Optional[exp.Expression]: 7167 if self._match_set(self.NULL_TOKENS): 7168 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7169 return self._parse_placeholder() 7170 7171 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7172 if self._match(TokenType.TRUE): 7173 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7174 if self._match(TokenType.FALSE): 7175 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7176 return self._parse_placeholder() 7177 7178 def _parse_star(self) -> t.Optional[exp.Expression]: 7179 if self._match(TokenType.STAR): 7180 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7181 return self._parse_placeholder() 7182 7183 def _parse_parameter(self) -> exp.Parameter: 7184 this = self._parse_identifier() or self._parse_primary_or_var() 7185 return self.expression(exp.Parameter, this=this) 7186 7187 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7188 if self._match_set(self.PLACEHOLDER_PARSERS): 7189 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7190 if placeholder: 7191 return placeholder 7192 self._advance(-1) 7193 return None 7194 7195 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7196 if not self._match_texts(keywords): 7197 return None 7198 if self._match(TokenType.L_PAREN, advance=False): 7199 return self._parse_wrapped_csv(self._parse_expression) 7200 7201 expression = self._parse_expression() 7202 return [expression] if expression else None 7203 7204 def _parse_csv( 7205 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7206 ) -> t.List[exp.Expression]: 7207 parse_result = parse_method() 7208 items = [parse_result] if parse_result is not None else [] 7209 7210 while self._match(sep): 7211 self._add_comments(parse_result) 7212 parse_result = parse_method() 7213 if parse_result is not None: 7214 items.append(parse_result) 7215 7216 return items 7217 7218 def _parse_tokens( 7219 self, parse_method: t.Callable, expressions: t.Dict 7220 ) -> t.Optional[exp.Expression]: 7221 this = parse_method() 7222 7223 while self._match_set(expressions): 7224 this = self.expression( 7225 expressions[self._prev.token_type], 7226 this=this, 7227 comments=self._prev_comments, 7228 expression=parse_method(), 7229 ) 7230 7231 return this 7232 7233 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7234 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7235 7236 def _parse_wrapped_csv( 7237 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7238 ) -> t.List[exp.Expression]: 7239 return self._parse_wrapped( 7240 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7241 ) 7242 7243 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7244 wrapped = self._match(TokenType.L_PAREN) 7245 if not wrapped and not optional: 7246 self.raise_error("Expecting (") 7247 parse_result = parse_method() 7248 if wrapped: 7249 self._match_r_paren() 7250 return parse_result 7251 7252 def _parse_expressions(self) -> t.List[exp.Expression]: 7253 return self._parse_csv(self._parse_expression) 7254 7255 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7256 return self._parse_select() or self._parse_set_operations( 7257 self._parse_alias(self._parse_assignment(), explicit=True) 7258 if alias 7259 else self._parse_assignment() 7260 ) 7261 7262 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7263 return self._parse_query_modifiers( 7264 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7265 ) 7266 7267 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7268 this = None 7269 if self._match_texts(self.TRANSACTION_KIND): 7270 this = self._prev.text 7271 7272 self._match_texts(("TRANSACTION", "WORK")) 7273 7274 modes = [] 7275 while True: 7276 mode = [] 7277 while self._match(TokenType.VAR): 7278 mode.append(self._prev.text) 7279 7280 if mode: 7281 modes.append(" ".join(mode)) 7282 if not self._match(TokenType.COMMA): 7283 break 7284 7285 return self.expression(exp.Transaction, this=this, modes=modes) 7286 7287 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7288 chain = None 7289 savepoint = None 7290 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7291 7292 self._match_texts(("TRANSACTION", "WORK")) 7293 7294 if self._match_text_seq("TO"): 7295 self._match_text_seq("SAVEPOINT") 7296 savepoint = self._parse_id_var() 7297 7298 if self._match(TokenType.AND): 7299 chain = not self._match_text_seq("NO") 7300 self._match_text_seq("CHAIN") 7301 7302 if is_rollback: 7303 return self.expression(exp.Rollback, savepoint=savepoint) 7304 7305 return self.expression(exp.Commit, chain=chain) 7306 7307 def _parse_refresh(self) -> exp.Refresh: 7308 self._match(TokenType.TABLE) 7309 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7310 7311 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7312 if not self._prev.text.upper() == "ADD": 7313 return None 7314 7315 start = self._index 7316 self._match(TokenType.COLUMN) 7317 7318 exists_column = self._parse_exists(not_=True) 7319 expression = self._parse_field_def() 7320 7321 if not isinstance(expression, exp.ColumnDef): 7322 self._retreat(start) 7323 return None 7324 7325 expression.set("exists", exists_column) 7326 7327 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7328 if self._match_texts(("FIRST", "AFTER")): 7329 position = self._prev.text 7330 column_position = self.expression( 7331 exp.ColumnPosition, this=self._parse_column(), position=position 7332 ) 7333 expression.set("position", column_position) 7334 7335 return expression 7336 7337 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7338 drop = self._match(TokenType.DROP) and self._parse_drop() 7339 if drop and not isinstance(drop, exp.Command): 7340 drop.set("kind", drop.args.get("kind", "COLUMN")) 7341 return drop 7342 7343 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7344 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7345 return self.expression( 7346 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7347 ) 7348 7349 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7350 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7351 self._match_text_seq("ADD") 7352 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7353 return self.expression( 7354 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7355 ) 7356 7357 column_def = self._parse_add_column() 7358 if isinstance(column_def, exp.ColumnDef): 7359 return column_def 7360 7361 exists = self._parse_exists(not_=True) 7362 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7363 return self.expression( 7364 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7365 ) 7366 7367 return None 7368 7369 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7370 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7371 or self._match_text_seq("COLUMNS") 7372 ): 7373 schema = self._parse_schema() 7374 7375 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7376 7377 return self._parse_csv(_parse_add_alteration) 7378 7379 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7380 if self._match_texts(self.ALTER_ALTER_PARSERS): 7381 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7382 7383 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7384 # keyword after ALTER we default to parsing this statement 7385 self._match(TokenType.COLUMN) 7386 column = self._parse_field(any_token=True) 7387 7388 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7389 return self.expression(exp.AlterColumn, this=column, drop=True) 7390 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7391 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7392 if self._match(TokenType.COMMENT): 7393 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7394 if self._match_text_seq("DROP", "NOT", "NULL"): 7395 return self.expression( 7396 exp.AlterColumn, 7397 this=column, 7398 drop=True, 7399 allow_null=True, 7400 ) 7401 if self._match_text_seq("SET", "NOT", "NULL"): 7402 return self.expression( 7403 exp.AlterColumn, 7404 this=column, 7405 allow_null=False, 7406 ) 7407 7408 if self._match_text_seq("SET", "VISIBLE"): 7409 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7410 if self._match_text_seq("SET", "INVISIBLE"): 7411 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7412 7413 self._match_text_seq("SET", "DATA") 7414 self._match_text_seq("TYPE") 7415 return self.expression( 7416 exp.AlterColumn, 7417 this=column, 7418 dtype=self._parse_types(), 7419 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7420 using=self._match(TokenType.USING) and self._parse_assignment(), 7421 ) 7422 7423 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7424 if self._match_texts(("ALL", "EVEN", "AUTO")): 7425 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7426 7427 self._match_text_seq("KEY", "DISTKEY") 7428 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7429 7430 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7431 if compound: 7432 self._match_text_seq("SORTKEY") 7433 7434 if self._match(TokenType.L_PAREN, advance=False): 7435 return self.expression( 7436 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7437 ) 7438 7439 self._match_texts(("AUTO", "NONE")) 7440 return self.expression( 7441 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7442 ) 7443 7444 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7445 index = self._index - 1 7446 7447 partition_exists = self._parse_exists() 7448 if self._match(TokenType.PARTITION, advance=False): 7449 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7450 7451 self._retreat(index) 7452 return self._parse_csv(self._parse_drop_column) 7453 7454 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7455 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7456 exists = self._parse_exists() 7457 old_column = self._parse_column() 7458 to = self._match_text_seq("TO") 7459 new_column = self._parse_column() 7460 7461 if old_column is None or to is None or new_column is None: 7462 return None 7463 7464 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7465 7466 self._match_text_seq("TO") 7467 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7468 7469 def _parse_alter_table_set(self) -> exp.AlterSet: 7470 alter_set = self.expression(exp.AlterSet) 7471 7472 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7473 "TABLE", "PROPERTIES" 7474 ): 7475 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7476 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7477 alter_set.set("expressions", [self._parse_assignment()]) 7478 elif self._match_texts(("LOGGED", "UNLOGGED")): 7479 alter_set.set("option", exp.var(self._prev.text.upper())) 7480 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7481 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7482 elif self._match_text_seq("LOCATION"): 7483 alter_set.set("location", self._parse_field()) 7484 elif self._match_text_seq("ACCESS", "METHOD"): 7485 alter_set.set("access_method", self._parse_field()) 7486 elif self._match_text_seq("TABLESPACE"): 7487 alter_set.set("tablespace", self._parse_field()) 7488 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7489 alter_set.set("file_format", [self._parse_field()]) 7490 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7491 alter_set.set("file_format", self._parse_wrapped_options()) 7492 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7493 alter_set.set("copy_options", self._parse_wrapped_options()) 7494 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7495 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7496 else: 7497 if self._match_text_seq("SERDE"): 7498 alter_set.set("serde", self._parse_field()) 7499 7500 properties = self._parse_wrapped(self._parse_properties, optional=True) 7501 alter_set.set("expressions", [properties]) 7502 7503 return alter_set 7504 7505 def _parse_alter(self) -> exp.Alter | exp.Command: 7506 start = self._prev 7507 7508 alter_token = self._match_set(self.ALTERABLES) and self._prev 7509 if not alter_token: 7510 return self._parse_as_command(start) 7511 7512 exists = self._parse_exists() 7513 only = self._match_text_seq("ONLY") 7514 this = self._parse_table(schema=True) 7515 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7516 7517 if self._next: 7518 self._advance() 7519 7520 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7521 if parser: 7522 actions = ensure_list(parser(self)) 7523 not_valid = self._match_text_seq("NOT", "VALID") 7524 options = self._parse_csv(self._parse_property) 7525 7526 if not self._curr and actions: 7527 return self.expression( 7528 exp.Alter, 7529 this=this, 7530 kind=alter_token.text.upper(), 7531 exists=exists, 7532 actions=actions, 7533 only=only, 7534 options=options, 7535 cluster=cluster, 7536 not_valid=not_valid, 7537 ) 7538 7539 return self._parse_as_command(start) 7540 7541 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7542 start = self._prev 7543 # https://duckdb.org/docs/sql/statements/analyze 7544 if not self._curr: 7545 return self.expression(exp.Analyze) 7546 7547 options = [] 7548 while self._match_texts(self.ANALYZE_STYLES): 7549 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7550 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7551 else: 7552 options.append(self._prev.text.upper()) 7553 7554 this: t.Optional[exp.Expression] = None 7555 inner_expression: t.Optional[exp.Expression] = None 7556 7557 kind = self._curr and self._curr.text.upper() 7558 7559 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7560 this = self._parse_table_parts() 7561 elif self._match_text_seq("TABLES"): 7562 if self._match_set((TokenType.FROM, TokenType.IN)): 7563 kind = f"{kind} {self._prev.text.upper()}" 7564 this = self._parse_table(schema=True, is_db_reference=True) 7565 elif self._match_text_seq("DATABASE"): 7566 this = self._parse_table(schema=True, is_db_reference=True) 7567 elif self._match_text_seq("CLUSTER"): 7568 this = self._parse_table() 7569 # Try matching inner expr keywords before fallback to parse table. 7570 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7571 kind = None 7572 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7573 else: 7574 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7575 kind = None 7576 this = self._parse_table_parts() 7577 7578 partition = self._try_parse(self._parse_partition) 7579 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7580 return self._parse_as_command(start) 7581 7582 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7583 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7584 "WITH", "ASYNC", "MODE" 7585 ): 7586 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7587 else: 7588 mode = None 7589 7590 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7591 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7592 7593 properties = self._parse_properties() 7594 return self.expression( 7595 exp.Analyze, 7596 kind=kind, 7597 this=this, 7598 mode=mode, 7599 partition=partition, 7600 properties=properties, 7601 expression=inner_expression, 7602 options=options, 7603 ) 7604 7605 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7606 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7607 this = None 7608 kind = self._prev.text.upper() 7609 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7610 expressions = [] 7611 7612 if not self._match_text_seq("STATISTICS"): 7613 self.raise_error("Expecting token STATISTICS") 7614 7615 if self._match_text_seq("NOSCAN"): 7616 this = "NOSCAN" 7617 elif self._match(TokenType.FOR): 7618 if self._match_text_seq("ALL", "COLUMNS"): 7619 this = "FOR ALL COLUMNS" 7620 if self._match_texts("COLUMNS"): 7621 this = "FOR COLUMNS" 7622 expressions = self._parse_csv(self._parse_column_reference) 7623 elif self._match_text_seq("SAMPLE"): 7624 sample = self._parse_number() 7625 expressions = [ 7626 self.expression( 7627 exp.AnalyzeSample, 7628 sample=sample, 7629 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7630 ) 7631 ] 7632 7633 return self.expression( 7634 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7635 ) 7636 7637 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7638 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7639 kind = None 7640 this = None 7641 expression: t.Optional[exp.Expression] = None 7642 if self._match_text_seq("REF", "UPDATE"): 7643 kind = "REF" 7644 this = "UPDATE" 7645 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7646 this = "UPDATE SET DANGLING TO NULL" 7647 elif self._match_text_seq("STRUCTURE"): 7648 kind = "STRUCTURE" 7649 if self._match_text_seq("CASCADE", "FAST"): 7650 this = "CASCADE FAST" 7651 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7652 ("ONLINE", "OFFLINE") 7653 ): 7654 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7655 expression = self._parse_into() 7656 7657 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7658 7659 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7660 this = self._prev.text.upper() 7661 if self._match_text_seq("COLUMNS"): 7662 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7663 return None 7664 7665 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7666 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7667 if self._match_text_seq("STATISTICS"): 7668 return self.expression(exp.AnalyzeDelete, kind=kind) 7669 return None 7670 7671 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7672 if self._match_text_seq("CHAINED", "ROWS"): 7673 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7674 return None 7675 7676 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7677 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7678 this = self._prev.text.upper() 7679 expression: t.Optional[exp.Expression] = None 7680 expressions = [] 7681 update_options = None 7682 7683 if self._match_text_seq("HISTOGRAM", "ON"): 7684 expressions = self._parse_csv(self._parse_column_reference) 7685 with_expressions = [] 7686 while self._match(TokenType.WITH): 7687 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7688 if self._match_texts(("SYNC", "ASYNC")): 7689 if self._match_text_seq("MODE", advance=False): 7690 with_expressions.append(f"{self._prev.text.upper()} MODE") 7691 self._advance() 7692 else: 7693 buckets = self._parse_number() 7694 if self._match_text_seq("BUCKETS"): 7695 with_expressions.append(f"{buckets} BUCKETS") 7696 if with_expressions: 7697 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7698 7699 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7700 TokenType.UPDATE, advance=False 7701 ): 7702 update_options = self._prev.text.upper() 7703 self._advance() 7704 elif self._match_text_seq("USING", "DATA"): 7705 expression = self.expression(exp.UsingData, this=self._parse_string()) 7706 7707 return self.expression( 7708 exp.AnalyzeHistogram, 7709 this=this, 7710 expressions=expressions, 7711 expression=expression, 7712 update_options=update_options, 7713 ) 7714 7715 def _parse_merge(self) -> exp.Merge: 7716 self._match(TokenType.INTO) 7717 target = self._parse_table() 7718 7719 if target and self._match(TokenType.ALIAS, advance=False): 7720 target.set("alias", self._parse_table_alias()) 7721 7722 self._match(TokenType.USING) 7723 using = self._parse_table() 7724 7725 self._match(TokenType.ON) 7726 on = self._parse_assignment() 7727 7728 return self.expression( 7729 exp.Merge, 7730 this=target, 7731 using=using, 7732 on=on, 7733 whens=self._parse_when_matched(), 7734 returning=self._parse_returning(), 7735 ) 7736 7737 def _parse_when_matched(self) -> exp.Whens: 7738 whens = [] 7739 7740 while self._match(TokenType.WHEN): 7741 matched = not self._match(TokenType.NOT) 7742 self._match_text_seq("MATCHED") 7743 source = ( 7744 False 7745 if self._match_text_seq("BY", "TARGET") 7746 else self._match_text_seq("BY", "SOURCE") 7747 ) 7748 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7749 7750 self._match(TokenType.THEN) 7751 7752 if self._match(TokenType.INSERT): 7753 this = self._parse_star() 7754 if this: 7755 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7756 else: 7757 then = self.expression( 7758 exp.Insert, 7759 this=exp.var("ROW") 7760 if self._match_text_seq("ROW") 7761 else self._parse_value(values=False), 7762 expression=self._match_text_seq("VALUES") and self._parse_value(), 7763 ) 7764 elif self._match(TokenType.UPDATE): 7765 expressions = self._parse_star() 7766 if expressions: 7767 then = self.expression(exp.Update, expressions=expressions) 7768 else: 7769 then = self.expression( 7770 exp.Update, 7771 expressions=self._match(TokenType.SET) 7772 and self._parse_csv(self._parse_equality), 7773 ) 7774 elif self._match(TokenType.DELETE): 7775 then = self.expression(exp.Var, this=self._prev.text) 7776 else: 7777 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7778 7779 whens.append( 7780 self.expression( 7781 exp.When, 7782 matched=matched, 7783 source=source, 7784 condition=condition, 7785 then=then, 7786 ) 7787 ) 7788 return self.expression(exp.Whens, expressions=whens) 7789 7790 def _parse_show(self) -> t.Optional[exp.Expression]: 7791 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7792 if parser: 7793 return parser(self) 7794 return self._parse_as_command(self._prev) 7795 7796 def _parse_set_item_assignment( 7797 self, kind: t.Optional[str] = None 7798 ) -> t.Optional[exp.Expression]: 7799 index = self._index 7800 7801 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7802 return self._parse_set_transaction(global_=kind == "GLOBAL") 7803 7804 left = self._parse_primary() or self._parse_column() 7805 assignment_delimiter = self._match_texts(("=", "TO")) 7806 7807 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7808 self._retreat(index) 7809 return None 7810 7811 right = self._parse_statement() or self._parse_id_var() 7812 if isinstance(right, (exp.Column, exp.Identifier)): 7813 right = exp.var(right.name) 7814 7815 this = self.expression(exp.EQ, this=left, expression=right) 7816 return self.expression(exp.SetItem, this=this, kind=kind) 7817 7818 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7819 self._match_text_seq("TRANSACTION") 7820 characteristics = self._parse_csv( 7821 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7822 ) 7823 return self.expression( 7824 exp.SetItem, 7825 expressions=characteristics, 7826 kind="TRANSACTION", 7827 **{"global": global_}, # type: ignore 7828 ) 7829 7830 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7831 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7832 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7833 7834 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7835 index = self._index 7836 set_ = self.expression( 7837 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7838 ) 7839 7840 if self._curr: 7841 self._retreat(index) 7842 return self._parse_as_command(self._prev) 7843 7844 return set_ 7845 7846 def _parse_var_from_options( 7847 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7848 ) -> t.Optional[exp.Var]: 7849 start = self._curr 7850 if not start: 7851 return None 7852 7853 option = start.text.upper() 7854 continuations = options.get(option) 7855 7856 index = self._index 7857 self._advance() 7858 for keywords in continuations or []: 7859 if isinstance(keywords, str): 7860 keywords = (keywords,) 7861 7862 if self._match_text_seq(*keywords): 7863 option = f"{option} {' '.join(keywords)}" 7864 break 7865 else: 7866 if continuations or continuations is None: 7867 if raise_unmatched: 7868 self.raise_error(f"Unknown option {option}") 7869 7870 self._retreat(index) 7871 return None 7872 7873 return exp.var(option) 7874 7875 def _parse_as_command(self, start: Token) -> exp.Command: 7876 while self._curr: 7877 self._advance() 7878 text = self._find_sql(start, self._prev) 7879 size = len(start.text) 7880 self._warn_unsupported() 7881 return exp.Command(this=text[:size], expression=text[size:]) 7882 7883 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7884 settings = [] 7885 7886 self._match_l_paren() 7887 kind = self._parse_id_var() 7888 7889 if self._match(TokenType.L_PAREN): 7890 while True: 7891 key = self._parse_id_var() 7892 value = self._parse_primary() 7893 if not key and value is None: 7894 break 7895 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7896 self._match(TokenType.R_PAREN) 7897 7898 self._match_r_paren() 7899 7900 return self.expression( 7901 exp.DictProperty, 7902 this=this, 7903 kind=kind.this if kind else None, 7904 settings=settings, 7905 ) 7906 7907 def _parse_dict_range(self, this: str) -> exp.DictRange: 7908 self._match_l_paren() 7909 has_min = self._match_text_seq("MIN") 7910 if has_min: 7911 min = self._parse_var() or self._parse_primary() 7912 self._match_text_seq("MAX") 7913 max = self._parse_var() or self._parse_primary() 7914 else: 7915 max = self._parse_var() or self._parse_primary() 7916 min = exp.Literal.number(0) 7917 self._match_r_paren() 7918 return self.expression(exp.DictRange, this=this, min=min, max=max) 7919 7920 def _parse_comprehension( 7921 self, this: t.Optional[exp.Expression] 7922 ) -> t.Optional[exp.Comprehension]: 7923 index = self._index 7924 expression = self._parse_column() 7925 if not self._match(TokenType.IN): 7926 self._retreat(index - 1) 7927 return None 7928 iterator = self._parse_column() 7929 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7930 return self.expression( 7931 exp.Comprehension, 7932 this=this, 7933 expression=expression, 7934 iterator=iterator, 7935 condition=condition, 7936 ) 7937 7938 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7939 if self._match(TokenType.HEREDOC_STRING): 7940 return self.expression(exp.Heredoc, this=self._prev.text) 7941 7942 if not self._match_text_seq("$"): 7943 return None 7944 7945 tags = ["$"] 7946 tag_text = None 7947 7948 if self._is_connected(): 7949 self._advance() 7950 tags.append(self._prev.text.upper()) 7951 else: 7952 self.raise_error("No closing $ found") 7953 7954 if tags[-1] != "$": 7955 if self._is_connected() and self._match_text_seq("$"): 7956 tag_text = tags[-1] 7957 tags.append("$") 7958 else: 7959 self.raise_error("No closing $ found") 7960 7961 heredoc_start = self._curr 7962 7963 while self._curr: 7964 if self._match_text_seq(*tags, advance=False): 7965 this = self._find_sql(heredoc_start, self._prev) 7966 self._advance(len(tags)) 7967 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7968 7969 self._advance() 7970 7971 self.raise_error(f"No closing {''.join(tags)} found") 7972 return None 7973 7974 def _find_parser( 7975 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7976 ) -> t.Optional[t.Callable]: 7977 if not self._curr: 7978 return None 7979 7980 index = self._index 7981 this = [] 7982 while True: 7983 # The current token might be multiple words 7984 curr = self._curr.text.upper() 7985 key = curr.split(" ") 7986 this.append(curr) 7987 7988 self._advance() 7989 result, trie = in_trie(trie, key) 7990 if result == TrieResult.FAILED: 7991 break 7992 7993 if result == TrieResult.EXISTS: 7994 subparser = parsers[" ".join(this)] 7995 return subparser 7996 7997 self._retreat(index) 7998 return None 7999 8000 def _match(self, token_type, advance=True, expression=None): 8001 if not self._curr: 8002 return None 8003 8004 if self._curr.token_type == token_type: 8005 if advance: 8006 self._advance() 8007 self._add_comments(expression) 8008 return True 8009 8010 return None 8011 8012 def _match_set(self, types, advance=True): 8013 if not self._curr: 8014 return None 8015 8016 if self._curr.token_type in types: 8017 if advance: 8018 self._advance() 8019 return True 8020 8021 return None 8022 8023 def _match_pair(self, token_type_a, token_type_b, advance=True): 8024 if not self._curr or not self._next: 8025 return None 8026 8027 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8028 if advance: 8029 self._advance(2) 8030 return True 8031 8032 return None 8033 8034 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8035 if not self._match(TokenType.L_PAREN, expression=expression): 8036 self.raise_error("Expecting (") 8037 8038 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8039 if not self._match(TokenType.R_PAREN, expression=expression): 8040 self.raise_error("Expecting )") 8041 8042 def _match_texts(self, texts, advance=True): 8043 if ( 8044 self._curr 8045 and self._curr.token_type != TokenType.STRING 8046 and self._curr.text.upper() in texts 8047 ): 8048 if advance: 8049 self._advance() 8050 return True 8051 return None 8052 8053 def _match_text_seq(self, *texts, advance=True): 8054 index = self._index 8055 for text in texts: 8056 if ( 8057 self._curr 8058 and self._curr.token_type != TokenType.STRING 8059 and self._curr.text.upper() == text 8060 ): 8061 self._advance() 8062 else: 8063 self._retreat(index) 8064 return None 8065 8066 if not advance: 8067 self._retreat(index) 8068 8069 return True 8070 8071 def _replace_lambda( 8072 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8073 ) -> t.Optional[exp.Expression]: 8074 if not node: 8075 return node 8076 8077 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8078 8079 for column in node.find_all(exp.Column): 8080 typ = lambda_types.get(column.parts[0].name) 8081 if typ is not None: 8082 dot_or_id = column.to_dot() if column.table else column.this 8083 8084 if typ: 8085 dot_or_id = self.expression( 8086 exp.Cast, 8087 this=dot_or_id, 8088 to=typ, 8089 ) 8090 8091 parent = column.parent 8092 8093 while isinstance(parent, exp.Dot): 8094 if not isinstance(parent.parent, exp.Dot): 8095 parent.replace(dot_or_id) 8096 break 8097 parent = parent.parent 8098 else: 8099 if column is node: 8100 node = dot_or_id 8101 else: 8102 column.replace(dot_or_id) 8103 return node 8104 8105 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8106 start = self._prev 8107 8108 # Not to be confused with TRUNCATE(number, decimals) function call 8109 if self._match(TokenType.L_PAREN): 8110 self._retreat(self._index - 2) 8111 return self._parse_function() 8112 8113 # Clickhouse supports TRUNCATE DATABASE as well 8114 is_database = self._match(TokenType.DATABASE) 8115 8116 self._match(TokenType.TABLE) 8117 8118 exists = self._parse_exists(not_=False) 8119 8120 expressions = self._parse_csv( 8121 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8122 ) 8123 8124 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8125 8126 if self._match_text_seq("RESTART", "IDENTITY"): 8127 identity = "RESTART" 8128 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8129 identity = "CONTINUE" 8130 else: 8131 identity = None 8132 8133 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8134 option = self._prev.text 8135 else: 8136 option = None 8137 8138 partition = self._parse_partition() 8139 8140 # Fallback case 8141 if self._curr: 8142 return self._parse_as_command(start) 8143 8144 return self.expression( 8145 exp.TruncateTable, 8146 expressions=expressions, 8147 is_database=is_database, 8148 exists=exists, 8149 cluster=cluster, 8150 identity=identity, 8151 option=option, 8152 partition=partition, 8153 ) 8154 8155 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8156 this = self._parse_ordered(self._parse_opclass) 8157 8158 if not self._match(TokenType.WITH): 8159 return this 8160 8161 op = self._parse_var(any_token=True) 8162 8163 return self.expression(exp.WithOperator, this=this, op=op) 8164 8165 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8166 self._match(TokenType.EQ) 8167 self._match(TokenType.L_PAREN) 8168 8169 opts: t.List[t.Optional[exp.Expression]] = [] 8170 option: exp.Expression | None 8171 while self._curr and not self._match(TokenType.R_PAREN): 8172 if self._match_text_seq("FORMAT_NAME", "="): 8173 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8174 option = self._parse_format_name() 8175 else: 8176 option = self._parse_property() 8177 8178 if option is None: 8179 self.raise_error("Unable to parse option") 8180 break 8181 8182 opts.append(option) 8183 8184 return opts 8185 8186 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8187 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8188 8189 options = [] 8190 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8191 option = self._parse_var(any_token=True) 8192 prev = self._prev.text.upper() 8193 8194 # Different dialects might separate options and values by white space, "=" and "AS" 8195 self._match(TokenType.EQ) 8196 self._match(TokenType.ALIAS) 8197 8198 param = self.expression(exp.CopyParameter, this=option) 8199 8200 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8201 TokenType.L_PAREN, advance=False 8202 ): 8203 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8204 param.set("expressions", self._parse_wrapped_options()) 8205 elif prev == "FILE_FORMAT": 8206 # T-SQL's external file format case 8207 param.set("expression", self._parse_field()) 8208 else: 8209 param.set("expression", self._parse_unquoted_field()) 8210 8211 options.append(param) 8212 self._match(sep) 8213 8214 return options 8215 8216 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8217 expr = self.expression(exp.Credentials) 8218 8219 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8220 expr.set("storage", self._parse_field()) 8221 if self._match_text_seq("CREDENTIALS"): 8222 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8223 creds = ( 8224 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8225 ) 8226 expr.set("credentials", creds) 8227 if self._match_text_seq("ENCRYPTION"): 8228 expr.set("encryption", self._parse_wrapped_options()) 8229 if self._match_text_seq("IAM_ROLE"): 8230 expr.set("iam_role", self._parse_field()) 8231 if self._match_text_seq("REGION"): 8232 expr.set("region", self._parse_field()) 8233 8234 return expr 8235 8236 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8237 return self._parse_field() 8238 8239 def _parse_copy(self) -> exp.Copy | exp.Command: 8240 start = self._prev 8241 8242 self._match(TokenType.INTO) 8243 8244 this = ( 8245 self._parse_select(nested=True, parse_subquery_alias=False) 8246 if self._match(TokenType.L_PAREN, advance=False) 8247 else self._parse_table(schema=True) 8248 ) 8249 8250 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8251 8252 files = self._parse_csv(self._parse_file_location) 8253 credentials = self._parse_credentials() 8254 8255 self._match_text_seq("WITH") 8256 8257 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8258 8259 # Fallback case 8260 if self._curr: 8261 return self._parse_as_command(start) 8262 8263 return self.expression( 8264 exp.Copy, 8265 this=this, 8266 kind=kind, 8267 credentials=credentials, 8268 files=files, 8269 params=params, 8270 ) 8271 8272 def _parse_normalize(self) -> exp.Normalize: 8273 return self.expression( 8274 exp.Normalize, 8275 this=self._parse_bitwise(), 8276 form=self._match(TokenType.COMMA) and self._parse_var(), 8277 ) 8278 8279 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8280 args = self._parse_csv(lambda: self._parse_lambda()) 8281 8282 this = seq_get(args, 0) 8283 decimals = seq_get(args, 1) 8284 8285 return expr_type( 8286 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8287 ) 8288 8289 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8290 star_token = self._prev 8291 8292 if self._match_text_seq("COLUMNS", "(", advance=False): 8293 this = self._parse_function() 8294 if isinstance(this, exp.Columns): 8295 this.set("unpack", True) 8296 return this 8297 8298 return self.expression( 8299 exp.Star, 8300 **{ # type: ignore 8301 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8302 "replace": self._parse_star_op("REPLACE"), 8303 "rename": self._parse_star_op("RENAME"), 8304 }, 8305 ).update_positions(star_token) 8306 8307 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8308 privilege_parts = [] 8309 8310 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8311 # (end of privilege list) or L_PAREN (start of column list) are met 8312 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8313 privilege_parts.append(self._curr.text.upper()) 8314 self._advance() 8315 8316 this = exp.var(" ".join(privilege_parts)) 8317 expressions = ( 8318 self._parse_wrapped_csv(self._parse_column) 8319 if self._match(TokenType.L_PAREN, advance=False) 8320 else None 8321 ) 8322 8323 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8324 8325 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8326 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8327 principal = self._parse_id_var() 8328 8329 if not principal: 8330 return None 8331 8332 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8333 8334 def _parse_grant(self) -> exp.Grant | exp.Command: 8335 start = self._prev 8336 8337 privileges = self._parse_csv(self._parse_grant_privilege) 8338 8339 self._match(TokenType.ON) 8340 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8341 8342 # Attempt to parse the securable e.g. MySQL allows names 8343 # such as "foo.*", "*.*" which are not easily parseable yet 8344 securable = self._try_parse(self._parse_table_parts) 8345 8346 if not securable or not self._match_text_seq("TO"): 8347 return self._parse_as_command(start) 8348 8349 principals = self._parse_csv(self._parse_grant_principal) 8350 8351 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8352 8353 if self._curr: 8354 return self._parse_as_command(start) 8355 8356 return self.expression( 8357 exp.Grant, 8358 privileges=privileges, 8359 kind=kind, 8360 securable=securable, 8361 principals=principals, 8362 grant_option=grant_option, 8363 ) 8364 8365 def _parse_overlay(self) -> exp.Overlay: 8366 return self.expression( 8367 exp.Overlay, 8368 **{ # type: ignore 8369 "this": self._parse_bitwise(), 8370 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8371 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8372 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8373 }, 8374 ) 8375 8376 def _parse_format_name(self) -> exp.Property: 8377 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8378 # for FILE_FORMAT = <format_name> 8379 return self.expression( 8380 exp.Property, 8381 this=exp.var("FORMAT_NAME"), 8382 value=self._parse_string() or self._parse_table_parts(), 8383 ) 8384 8385 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8386 args: t.List[exp.Expression] = [] 8387 8388 if self._match(TokenType.DISTINCT): 8389 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8390 self._match(TokenType.COMMA) 8391 8392 args.extend(self._parse_csv(self._parse_assignment)) 8393 8394 return self.expression( 8395 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8396 ) 8397 8398 def _identifier_expression( 8399 self, token: t.Optional[Token] = None, **kwargs: t.Any 8400 ) -> exp.Identifier: 8401 token = token or self._prev 8402 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8403 expression.update_positions(token) 8404 return expression 8405 8406 def _build_pipe_cte( 8407 self, 8408 query: exp.Query, 8409 expressions: t.List[exp.Expression], 8410 alias_cte: t.Optional[exp.TableAlias] = None, 8411 ) -> exp.Select: 8412 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8413 if alias_cte: 8414 new_cte = alias_cte 8415 else: 8416 self._pipe_cte_counter += 1 8417 new_cte = f"__tmp{self._pipe_cte_counter}" 8418 8419 with_ = query.args.get("with") 8420 ctes = with_.pop() if with_ else None 8421 8422 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8423 if ctes: 8424 new_select.set("with", ctes) 8425 8426 return new_select.with_(new_cte, as_=query, copy=False) 8427 8428 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8429 select = self._parse_select(consume_pipe=False) 8430 if not select: 8431 return query 8432 8433 return self._build_pipe_cte( 8434 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8435 ) 8436 8437 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8438 limit = self._parse_limit() 8439 offset = self._parse_offset() 8440 if limit: 8441 curr_limit = query.args.get("limit", limit) 8442 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8443 query.limit(limit, copy=False) 8444 if offset: 8445 curr_offset = query.args.get("offset") 8446 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8447 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8448 8449 return query 8450 8451 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8452 this = self._parse_assignment() 8453 if self._match_text_seq("GROUP", "AND", advance=False): 8454 return this 8455 8456 this = self._parse_alias(this) 8457 8458 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8459 return self._parse_ordered(lambda: this) 8460 8461 return this 8462 8463 def _parse_pipe_syntax_aggregate_group_order_by( 8464 self, query: exp.Select, group_by_exists: bool = True 8465 ) -> exp.Select: 8466 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8467 aggregates_or_groups, orders = [], [] 8468 for element in expr: 8469 if isinstance(element, exp.Ordered): 8470 this = element.this 8471 if isinstance(this, exp.Alias): 8472 element.set("this", this.args["alias"]) 8473 orders.append(element) 8474 else: 8475 this = element 8476 aggregates_or_groups.append(this) 8477 8478 if group_by_exists: 8479 query.select(*aggregates_or_groups, copy=False).group_by( 8480 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8481 copy=False, 8482 ) 8483 else: 8484 query.select(*aggregates_or_groups, append=False, copy=False) 8485 8486 if orders: 8487 return query.order_by(*orders, append=False, copy=False) 8488 8489 return query 8490 8491 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8492 self._match_text_seq("AGGREGATE") 8493 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8494 8495 if self._match(TokenType.GROUP_BY) or ( 8496 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8497 ): 8498 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8499 8500 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8501 8502 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8503 first_setop = self.parse_set_operation(this=query) 8504 if not first_setop: 8505 return None 8506 8507 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8508 expr = self._parse_paren() 8509 return expr.assert_is(exp.Subquery).unnest() if expr else None 8510 8511 first_setop.this.pop() 8512 8513 setops = [ 8514 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8515 *self._parse_csv(_parse_and_unwrap_query), 8516 ] 8517 8518 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8519 with_ = query.args.get("with") 8520 ctes = with_.pop() if with_ else None 8521 8522 if isinstance(first_setop, exp.Union): 8523 query = query.union(*setops, copy=False, **first_setop.args) 8524 elif isinstance(first_setop, exp.Except): 8525 query = query.except_(*setops, copy=False, **first_setop.args) 8526 else: 8527 query = query.intersect(*setops, copy=False, **first_setop.args) 8528 8529 query.set("with", ctes) 8530 8531 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8532 8533 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8534 join = self._parse_join() 8535 if not join: 8536 return None 8537 8538 if isinstance(query, exp.Select): 8539 return query.join(join, copy=False) 8540 8541 return query 8542 8543 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8544 pivots = self._parse_pivots() 8545 if not pivots: 8546 return query 8547 8548 from_ = query.args.get("from") 8549 if from_: 8550 from_.this.set("pivots", pivots) 8551 8552 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8553 8554 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8555 self._match_text_seq("EXTEND") 8556 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8557 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8558 8559 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8560 sample = self._parse_table_sample() 8561 8562 with_ = query.args.get("with") 8563 if with_: 8564 with_.expressions[-1].this.set("sample", sample) 8565 else: 8566 query.set("sample", sample) 8567 8568 return query 8569 8570 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8571 if isinstance(query, exp.Subquery): 8572 query = exp.select("*").from_(query, copy=False) 8573 8574 if not query.args.get("from"): 8575 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8576 8577 while self._match(TokenType.PIPE_GT): 8578 start = self._curr 8579 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8580 if not parser: 8581 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8582 # keywords, making it tricky to disambiguate them without lookahead. The approach 8583 # here is to try and parse a set operation and if that fails, then try to parse a 8584 # join operator. If that fails as well, then the operator is not supported. 8585 parsed_query = self._parse_pipe_syntax_set_operator(query) 8586 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8587 if not parsed_query: 8588 self._retreat(start) 8589 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8590 break 8591 query = parsed_query 8592 else: 8593 query = parser(self, query) 8594 8595 return query
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEQUENCE, 456 TokenType.SINK, 457 TokenType.SOURCE, 458 TokenType.STAGE, 459 TokenType.STORAGE_INTEGRATION, 460 TokenType.STREAMLIT, 461 TokenType.TABLE, 462 TokenType.TAG, 463 TokenType.VIEW, 464 TokenType.WAREHOUSE, 465 } 466 467 CREATABLES = { 468 TokenType.COLUMN, 469 TokenType.CONSTRAINT, 470 TokenType.FOREIGN_KEY, 471 TokenType.FUNCTION, 472 TokenType.INDEX, 473 TokenType.PROCEDURE, 474 *DB_CREATABLES, 475 } 476 477 ALTERABLES = { 478 TokenType.INDEX, 479 TokenType.TABLE, 480 TokenType.VIEW, 481 } 482 483 # Tokens that can represent identifiers 484 ID_VAR_TOKENS = { 485 TokenType.ALL, 486 TokenType.ATTACH, 487 TokenType.VAR, 488 TokenType.ANTI, 489 TokenType.APPLY, 490 TokenType.ASC, 491 TokenType.ASOF, 492 TokenType.AUTO_INCREMENT, 493 TokenType.BEGIN, 494 TokenType.BPCHAR, 495 TokenType.CACHE, 496 TokenType.CASE, 497 TokenType.COLLATE, 498 TokenType.COMMAND, 499 TokenType.COMMENT, 500 TokenType.COMMIT, 501 TokenType.CONSTRAINT, 502 TokenType.COPY, 503 TokenType.CUBE, 504 TokenType.CURRENT_SCHEMA, 505 TokenType.DEFAULT, 506 TokenType.DELETE, 507 TokenType.DESC, 508 TokenType.DESCRIBE, 509 TokenType.DETACH, 510 TokenType.DICTIONARY, 511 TokenType.DIV, 512 TokenType.END, 513 TokenType.EXECUTE, 514 TokenType.EXPORT, 515 TokenType.ESCAPE, 516 TokenType.FALSE, 517 TokenType.FIRST, 518 TokenType.FILTER, 519 TokenType.FINAL, 520 TokenType.FORMAT, 521 TokenType.FULL, 522 TokenType.GET, 523 TokenType.IDENTIFIER, 524 TokenType.IS, 525 TokenType.ISNULL, 526 TokenType.INTERVAL, 527 TokenType.KEEP, 528 TokenType.KILL, 529 TokenType.LEFT, 530 TokenType.LIMIT, 531 TokenType.LOAD, 532 TokenType.MERGE, 533 TokenType.NATURAL, 534 TokenType.NEXT, 535 TokenType.OFFSET, 536 TokenType.OPERATOR, 537 TokenType.ORDINALITY, 538 TokenType.OVERLAPS, 539 TokenType.OVERWRITE, 540 TokenType.PARTITION, 541 TokenType.PERCENT, 542 TokenType.PIVOT, 543 TokenType.PRAGMA, 544 TokenType.PUT, 545 TokenType.RANGE, 546 TokenType.RECURSIVE, 547 TokenType.REFERENCES, 548 TokenType.REFRESH, 549 TokenType.RENAME, 550 TokenType.REPLACE, 551 TokenType.RIGHT, 552 TokenType.ROLLUP, 553 TokenType.ROW, 554 TokenType.ROWS, 555 TokenType.SEMI, 556 TokenType.SET, 557 TokenType.SETTINGS, 558 TokenType.SHOW, 559 TokenType.TEMPORARY, 560 TokenType.TOP, 561 TokenType.TRUE, 562 TokenType.TRUNCATE, 563 TokenType.UNIQUE, 564 TokenType.UNNEST, 565 TokenType.UNPIVOT, 566 TokenType.UPDATE, 567 TokenType.USE, 568 TokenType.VOLATILE, 569 TokenType.WINDOW, 570 *CREATABLES, 571 *SUBQUERY_PREDICATES, 572 *TYPE_TOKENS, 573 *NO_PAREN_FUNCTIONS, 574 } 575 ID_VAR_TOKENS.remove(TokenType.UNION) 576 577 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 578 TokenType.ANTI, 579 TokenType.APPLY, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.expression( 763 exp.Cast if self.STRICT_CAST else exp.TryCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 EXPRESSION_PARSERS = { 797 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 798 exp.Column: lambda self: self._parse_column(), 799 exp.Condition: lambda self: self._parse_assignment(), 800 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 801 exp.Expression: lambda self: self._parse_expression(), 802 exp.From: lambda self: self._parse_from(joins=True), 803 exp.Group: lambda self: self._parse_group(), 804 exp.Having: lambda self: self._parse_having(), 805 exp.Hint: lambda self: self._parse_hint_body(), 806 exp.Identifier: lambda self: self._parse_id_var(), 807 exp.Join: lambda self: self._parse_join(), 808 exp.Lambda: lambda self: self._parse_lambda(), 809 exp.Lateral: lambda self: self._parse_lateral(), 810 exp.Limit: lambda self: self._parse_limit(), 811 exp.Offset: lambda self: self._parse_offset(), 812 exp.Order: lambda self: self._parse_order(), 813 exp.Ordered: lambda self: self._parse_ordered(), 814 exp.Properties: lambda self: self._parse_properties(), 815 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 816 exp.Qualify: lambda self: self._parse_qualify(), 817 exp.Returning: lambda self: self._parse_returning(), 818 exp.Select: lambda self: self._parse_select(), 819 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 820 exp.Table: lambda self: self._parse_table_parts(), 821 exp.TableAlias: lambda self: self._parse_table_alias(), 822 exp.Tuple: lambda self: self._parse_value(values=False), 823 exp.Whens: lambda self: self._parse_when_matched(), 824 exp.Where: lambda self: self._parse_where(), 825 exp.Window: lambda self: self._parse_named_window(), 826 exp.With: lambda self: self._parse_with(), 827 "JOIN_TYPE": lambda self: self._parse_join_parts(), 828 } 829 830 STATEMENT_PARSERS = { 831 TokenType.ALTER: lambda self: self._parse_alter(), 832 TokenType.ANALYZE: lambda self: self._parse_analyze(), 833 TokenType.BEGIN: lambda self: self._parse_transaction(), 834 TokenType.CACHE: lambda self: self._parse_cache(), 835 TokenType.COMMENT: lambda self: self._parse_comment(), 836 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 837 TokenType.COPY: lambda self: self._parse_copy(), 838 TokenType.CREATE: lambda self: self._parse_create(), 839 TokenType.DELETE: lambda self: self._parse_delete(), 840 TokenType.DESC: lambda self: self._parse_describe(), 841 TokenType.DESCRIBE: lambda self: self._parse_describe(), 842 TokenType.DROP: lambda self: self._parse_drop(), 843 TokenType.GRANT: lambda self: self._parse_grant(), 844 TokenType.INSERT: lambda self: self._parse_insert(), 845 TokenType.KILL: lambda self: self._parse_kill(), 846 TokenType.LOAD: lambda self: self._parse_load(), 847 TokenType.MERGE: lambda self: self._parse_merge(), 848 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 849 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 850 TokenType.REFRESH: lambda self: self._parse_refresh(), 851 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 852 TokenType.SET: lambda self: self._parse_set(), 853 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 854 TokenType.UNCACHE: lambda self: self._parse_uncache(), 855 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 856 TokenType.UPDATE: lambda self: self._parse_update(), 857 TokenType.USE: lambda self: self._parse_use(), 858 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 859 } 860 861 UNARY_PARSERS = { 862 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 863 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 864 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 865 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 866 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 867 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 868 } 869 870 STRING_PARSERS = { 871 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 872 exp.RawString, this=token.text 873 ), 874 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 875 exp.National, this=token.text 876 ), 877 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 878 TokenType.STRING: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=True 880 ), 881 TokenType.UNICODE_STRING: lambda self, token: self.expression( 882 exp.UnicodeString, 883 this=token.text, 884 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 885 ), 886 } 887 888 NUMERIC_PARSERS = { 889 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 890 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 891 TokenType.HEX_STRING: lambda self, token: self.expression( 892 exp.HexString, 893 this=token.text, 894 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 895 ), 896 TokenType.NUMBER: lambda self, token: self.expression( 897 exp.Literal, this=token.text, is_string=False 898 ), 899 } 900 901 PRIMARY_PARSERS = { 902 **STRING_PARSERS, 903 **NUMERIC_PARSERS, 904 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 905 TokenType.NULL: lambda self, _: self.expression(exp.Null), 906 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 907 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 908 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 909 TokenType.STAR: lambda self, _: self._parse_star_ops(), 910 } 911 912 PLACEHOLDER_PARSERS = { 913 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 914 TokenType.PARAMETER: lambda self: self._parse_parameter(), 915 TokenType.COLON: lambda self: ( 916 self.expression(exp.Placeholder, this=self._prev.text) 917 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 918 else None 919 ), 920 } 921 922 RANGE_PARSERS = { 923 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 924 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 925 TokenType.GLOB: binary_range_parser(exp.Glob), 926 TokenType.ILIKE: binary_range_parser(exp.ILike), 927 TokenType.IN: lambda self, this: self._parse_in(this), 928 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 929 TokenType.IS: lambda self, this: self._parse_is(this), 930 TokenType.LIKE: binary_range_parser(exp.Like), 931 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 932 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 933 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 934 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 935 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 936 } 937 938 PIPE_SYNTAX_TRANSFORM_PARSERS = { 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 "AS": lambda self, query: self._build_pipe_cte( 941 query, [exp.Star()], self._parse_table_alias() 942 ), 943 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 944 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 945 "ORDER BY": lambda self, query: query.order_by( 946 self._parse_order(), append=False, copy=False 947 ), 948 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 950 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 951 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 953 } 954 955 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 956 "ALLOWED_VALUES": lambda self: self.expression( 957 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 958 ), 959 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 960 "AUTO": lambda self: self._parse_auto_property(), 961 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 962 "BACKUP": lambda self: self.expression( 963 exp.BackupProperty, this=self._parse_var(any_token=True) 964 ), 965 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 966 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 968 "CHECKSUM": lambda self: self._parse_checksum(), 969 "CLUSTER BY": lambda self: self._parse_cluster(), 970 "CLUSTERED": lambda self: self._parse_clustered_by(), 971 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 972 exp.CollateProperty, **kwargs 973 ), 974 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 975 "CONTAINS": lambda self: self._parse_contains_property(), 976 "COPY": lambda self: self._parse_copy_property(), 977 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 978 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 979 "DEFINER": lambda self: self._parse_definer(), 980 "DETERMINISTIC": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 984 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 985 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 986 "DISTKEY": lambda self: self._parse_distkey(), 987 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 988 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 989 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 990 "ENVIRONMENT": lambda self: self.expression( 991 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 992 ), 993 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 994 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 995 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 996 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "FREESPACE": lambda self: self._parse_freespace(), 998 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 999 "HEAP": lambda self: self.expression(exp.HeapProperty), 1000 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1001 "IMMUTABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1003 ), 1004 "INHERITS": lambda self: self.expression( 1005 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1006 ), 1007 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1008 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1009 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1010 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1011 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1014 "LOCK": lambda self: self._parse_locking(), 1015 "LOCKING": lambda self: self._parse_locking(), 1016 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1017 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1018 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1019 "MODIFIES": lambda self: self._parse_modifies_property(), 1020 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1021 "NO": lambda self: self._parse_no_property(), 1022 "ON": lambda self: self._parse_on_property(), 1023 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1024 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1025 "PARTITION": lambda self: self._parse_partitioned_of(), 1026 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1028 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1029 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1030 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1031 "READS": lambda self: self._parse_reads_property(), 1032 "REMOTE": lambda self: self._parse_remote_with_connection(), 1033 "RETURNS": lambda self: self._parse_returns(), 1034 "STRICT": lambda self: self.expression(exp.StrictProperty), 1035 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1036 "ROW": lambda self: self._parse_row(), 1037 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1038 "SAMPLE": lambda self: self.expression( 1039 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1040 ), 1041 "SECURE": lambda self: self.expression(exp.SecureProperty), 1042 "SECURITY": lambda self: self._parse_security(), 1043 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1044 "SETTINGS": lambda self: self._parse_settings_property(), 1045 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1046 "SORTKEY": lambda self: self._parse_sortkey(), 1047 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1048 "STABLE": lambda self: self.expression( 1049 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1050 ), 1051 "STORED": lambda self: self._parse_stored(), 1052 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1053 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1054 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1055 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1056 "TO": lambda self: self._parse_to_table(), 1057 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1058 "TRANSFORM": lambda self: self.expression( 1059 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1060 ), 1061 "TTL": lambda self: self._parse_ttl(), 1062 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1063 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1064 "VOLATILE": lambda self: self._parse_volatile_property(), 1065 "WITH": lambda self: self._parse_with_property(), 1066 } 1067 1068 CONSTRAINT_PARSERS = { 1069 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1070 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1071 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1072 "CHARACTER SET": lambda self: self.expression( 1073 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "CHECK": lambda self: self.expression( 1076 exp.CheckColumnConstraint, 1077 this=self._parse_wrapped(self._parse_assignment), 1078 enforced=self._match_text_seq("ENFORCED"), 1079 ), 1080 "COLLATE": lambda self: self.expression( 1081 exp.CollateColumnConstraint, 1082 this=self._parse_identifier() or self._parse_column(), 1083 ), 1084 "COMMENT": lambda self: self.expression( 1085 exp.CommentColumnConstraint, this=self._parse_string() 1086 ), 1087 "COMPRESS": lambda self: self._parse_compress(), 1088 "CLUSTERED": lambda self: self.expression( 1089 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "NONCLUSTERED": lambda self: self.expression( 1092 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "DEFAULT": lambda self: self.expression( 1095 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1096 ), 1097 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1098 "EPHEMERAL": lambda self: self.expression( 1099 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "EXCLUDE": lambda self: self.expression( 1102 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1103 ), 1104 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1105 "FORMAT": lambda self: self.expression( 1106 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1107 ), 1108 "GENERATED": lambda self: self._parse_generated_as_identity(), 1109 "IDENTITY": lambda self: self._parse_auto_increment(), 1110 "INLINE": lambda self: self._parse_inline(), 1111 "LIKE": lambda self: self._parse_create_like(), 1112 "NOT": lambda self: self._parse_not_constraint(), 1113 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1114 "ON": lambda self: ( 1115 self._match(TokenType.UPDATE) 1116 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1117 ) 1118 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1119 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1120 "PERIOD": lambda self: self._parse_period_for_system_time(), 1121 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1122 "REFERENCES": lambda self: self._parse_references(match=False), 1123 "TITLE": lambda self: self.expression( 1124 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1125 ), 1126 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1127 "UNIQUE": lambda self: self._parse_unique(), 1128 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1129 "WATERMARK": lambda self: self.expression( 1130 exp.WatermarkColumnConstraint, 1131 this=self._match(TokenType.FOR) and self._parse_column(), 1132 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1133 ), 1134 "WITH": lambda self: self.expression( 1135 exp.Properties, expressions=self._parse_wrapped_properties() 1136 ), 1137 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1139 } 1140 1141 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1142 klass = ( 1143 exp.PartitionedByBucket 1144 if self._prev.text.upper() == "BUCKET" 1145 else exp.PartitionByTruncate 1146 ) 1147 1148 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1149 this, expression = seq_get(args, 0), seq_get(args, 1) 1150 1151 if isinstance(this, exp.Literal): 1152 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1153 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1154 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1155 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1156 # 1157 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1158 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1159 this, expression = expression, this 1160 1161 return self.expression(klass, this=this, expression=expression) 1162 1163 ALTER_PARSERS = { 1164 "ADD": lambda self: self._parse_alter_table_add(), 1165 "AS": lambda self: self._parse_select(), 1166 "ALTER": lambda self: self._parse_alter_table_alter(), 1167 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1168 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1169 "DROP": lambda self: self._parse_alter_table_drop(), 1170 "RENAME": lambda self: self._parse_alter_table_rename(), 1171 "SET": lambda self: self._parse_alter_table_set(), 1172 "SWAP": lambda self: self.expression( 1173 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1174 ), 1175 } 1176 1177 ALTER_ALTER_PARSERS = { 1178 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1179 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1180 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1181 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1182 } 1183 1184 SCHEMA_UNNAMED_CONSTRAINTS = { 1185 "CHECK", 1186 "EXCLUDE", 1187 "FOREIGN KEY", 1188 "LIKE", 1189 "PERIOD", 1190 "PRIMARY KEY", 1191 "UNIQUE", 1192 "WATERMARK", 1193 "BUCKET", 1194 "TRUNCATE", 1195 } 1196 1197 NO_PAREN_FUNCTION_PARSERS = { 1198 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1199 "CASE": lambda self: self._parse_case(), 1200 "CONNECT_BY_ROOT": lambda self: self.expression( 1201 exp.ConnectByRoot, this=self._parse_column() 1202 ), 1203 "IF": lambda self: self._parse_if(), 1204 } 1205 1206 INVALID_FUNC_NAME_TOKENS = { 1207 TokenType.IDENTIFIER, 1208 TokenType.STRING, 1209 } 1210 1211 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1212 1213 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1214 1215 FUNCTION_PARSERS = { 1216 **{ 1217 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1218 }, 1219 **{ 1220 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1221 }, 1222 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1223 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1224 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1225 "DECODE": lambda self: self._parse_decode(), 1226 "EXTRACT": lambda self: self._parse_extract(), 1227 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1228 "GAP_FILL": lambda self: self._parse_gap_fill(), 1229 "JSON_OBJECT": lambda self: self._parse_json_object(), 1230 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1231 "JSON_TABLE": lambda self: self._parse_json_table(), 1232 "MATCH": lambda self: self._parse_match_against(), 1233 "NORMALIZE": lambda self: self._parse_normalize(), 1234 "OPENJSON": lambda self: self._parse_open_json(), 1235 "OVERLAY": lambda self: self._parse_overlay(), 1236 "POSITION": lambda self: self._parse_position(), 1237 "PREDICT": lambda self: self._parse_predict(), 1238 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1239 "STRING_AGG": lambda self: self._parse_string_agg(), 1240 "SUBSTRING": lambda self: self._parse_substring(), 1241 "TRIM": lambda self: self._parse_trim(), 1242 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1243 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1244 "XMLELEMENT": lambda self: self.expression( 1245 exp.XMLElement, 1246 this=self._match_text_seq("NAME") and self._parse_id_var(), 1247 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1248 ), 1249 "XMLTABLE": lambda self: self._parse_xml_table(), 1250 } 1251 1252 QUERY_MODIFIER_PARSERS = { 1253 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1254 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1255 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1256 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1257 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1258 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1259 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1260 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1261 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1262 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1263 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1264 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1265 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1266 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1268 TokenType.CLUSTER_BY: lambda self: ( 1269 "cluster", 1270 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1271 ), 1272 TokenType.DISTRIBUTE_BY: lambda self: ( 1273 "distribute", 1274 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1275 ), 1276 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1277 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1278 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1279 } 1280 1281 SET_PARSERS = { 1282 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1283 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1284 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1285 "TRANSACTION": lambda self: self._parse_set_transaction(), 1286 } 1287 1288 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1289 1290 TYPE_LITERAL_PARSERS = { 1291 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1292 } 1293 1294 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1295 1296 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1297 1298 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1299 1300 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1301 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1302 "ISOLATION": ( 1303 ("LEVEL", "REPEATABLE", "READ"), 1304 ("LEVEL", "READ", "COMMITTED"), 1305 ("LEVEL", "READ", "UNCOMITTED"), 1306 ("LEVEL", "SERIALIZABLE"), 1307 ), 1308 "READ": ("WRITE", "ONLY"), 1309 } 1310 1311 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1312 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1313 ) 1314 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1315 1316 CREATE_SEQUENCE: OPTIONS_TYPE = { 1317 "SCALE": ("EXTEND", "NOEXTEND"), 1318 "SHARD": ("EXTEND", "NOEXTEND"), 1319 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1320 **dict.fromkeys( 1321 ( 1322 "SESSION", 1323 "GLOBAL", 1324 "KEEP", 1325 "NOKEEP", 1326 "ORDER", 1327 "NOORDER", 1328 "NOCACHE", 1329 "CYCLE", 1330 "NOCYCLE", 1331 "NOMINVALUE", 1332 "NOMAXVALUE", 1333 "NOSCALE", 1334 "NOSHARD", 1335 ), 1336 tuple(), 1337 ), 1338 } 1339 1340 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1341 1342 USABLES: OPTIONS_TYPE = dict.fromkeys( 1343 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1344 ) 1345 1346 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1347 1348 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1349 "TYPE": ("EVOLUTION",), 1350 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1351 } 1352 1353 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1354 1355 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1356 1357 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1358 "NOT": ("ENFORCED",), 1359 "MATCH": ( 1360 "FULL", 1361 "PARTIAL", 1362 "SIMPLE", 1363 ), 1364 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1365 "USING": ( 1366 "BTREE", 1367 "HASH", 1368 ), 1369 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1370 } 1371 1372 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1373 "NO": ("OTHERS",), 1374 "CURRENT": ("ROW",), 1375 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1376 } 1377 1378 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1379 1380 CLONE_KEYWORDS = {"CLONE", "COPY"} 1381 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1382 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1383 1384 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1385 1386 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1387 1388 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1389 1390 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1391 1392 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1393 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1394 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1395 1396 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1397 1398 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1399 1400 ADD_CONSTRAINT_TOKENS = { 1401 TokenType.CONSTRAINT, 1402 TokenType.FOREIGN_KEY, 1403 TokenType.INDEX, 1404 TokenType.KEY, 1405 TokenType.PRIMARY_KEY, 1406 TokenType.UNIQUE, 1407 } 1408 1409 DISTINCT_TOKENS = {TokenType.DISTINCT} 1410 1411 NULL_TOKENS = {TokenType.NULL} 1412 1413 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1414 1415 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1416 1417 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1418 1419 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1420 1421 ODBC_DATETIME_LITERALS = { 1422 "d": exp.Date, 1423 "t": exp.Time, 1424 "ts": exp.Timestamp, 1425 } 1426 1427 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1428 1429 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1430 1431 # The style options for the DESCRIBE statement 1432 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1433 1434 # The style options for the ANALYZE statement 1435 ANALYZE_STYLES = { 1436 "BUFFER_USAGE_LIMIT", 1437 "FULL", 1438 "LOCAL", 1439 "NO_WRITE_TO_BINLOG", 1440 "SAMPLE", 1441 "SKIP_LOCKED", 1442 "VERBOSE", 1443 } 1444 1445 ANALYZE_EXPRESSION_PARSERS = { 1446 "ALL": lambda self: self._parse_analyze_columns(), 1447 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1448 "DELETE": lambda self: self._parse_analyze_delete(), 1449 "DROP": lambda self: self._parse_analyze_histogram(), 1450 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1451 "LIST": lambda self: self._parse_analyze_list(), 1452 "PREDICATE": lambda self: self._parse_analyze_columns(), 1453 "UPDATE": lambda self: self._parse_analyze_histogram(), 1454 "VALIDATE": lambda self: self._parse_analyze_validate(), 1455 } 1456 1457 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1458 1459 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1460 1461 OPERATION_MODIFIERS: t.Set[str] = set() 1462 1463 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1464 1465 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1466 1467 STRICT_CAST = True 1468 1469 PREFIXED_PIVOT_COLUMNS = False 1470 IDENTIFY_PIVOT_STRINGS = False 1471 1472 LOG_DEFAULTS_TO_LN = False 1473 1474 # Whether the table sample clause expects CSV syntax 1475 TABLESAMPLE_CSV = False 1476 1477 # The default method used for table sampling 1478 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1479 1480 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1481 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1482 1483 # Whether the TRIM function expects the characters to trim as its first argument 1484 TRIM_PATTERN_FIRST = False 1485 1486 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1487 STRING_ALIASES = False 1488 1489 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1490 MODIFIERS_ATTACHED_TO_SET_OP = True 1491 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1492 1493 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1494 NO_PAREN_IF_COMMANDS = True 1495 1496 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1497 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1498 1499 # Whether the `:` operator is used to extract a value from a VARIANT column 1500 COLON_IS_VARIANT_EXTRACT = False 1501 1502 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1503 # If this is True and '(' is not found, the keyword will be treated as an identifier 1504 VALUES_FOLLOWED_BY_PAREN = True 1505 1506 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1507 SUPPORTS_IMPLICIT_UNNEST = False 1508 1509 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1510 INTERVAL_SPANS = True 1511 1512 # Whether a PARTITION clause can follow a table reference 1513 SUPPORTS_PARTITION_SELECTION = False 1514 1515 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1516 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1517 1518 # Whether the 'AS' keyword is optional in the CTE definition syntax 1519 OPTIONAL_ALIAS_TOKEN_CTE = True 1520 1521 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1522 ALTER_RENAME_REQUIRES_COLUMN = True 1523 1524 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1525 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1526 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1527 # as BigQuery, where all joins have the same precedence. 1528 JOINS_HAVE_EQUAL_PRECEDENCE = False 1529 1530 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1531 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1532 1533 __slots__ = ( 1534 "error_level", 1535 "error_message_context", 1536 "max_errors", 1537 "dialect", 1538 "sql", 1539 "errors", 1540 "_tokens", 1541 "_index", 1542 "_curr", 1543 "_next", 1544 "_prev", 1545 "_prev_comments", 1546 "_pipe_cte_counter", 1547 ) 1548 1549 # Autofilled 1550 SHOW_TRIE: t.Dict = {} 1551 SET_TRIE: t.Dict = {} 1552 1553 def __init__( 1554 self, 1555 error_level: t.Optional[ErrorLevel] = None, 1556 error_message_context: int = 100, 1557 max_errors: int = 3, 1558 dialect: DialectType = None, 1559 ): 1560 from sqlglot.dialects import Dialect 1561 1562 self.error_level = error_level or ErrorLevel.IMMEDIATE 1563 self.error_message_context = error_message_context 1564 self.max_errors = max_errors 1565 self.dialect = Dialect.get_or_raise(dialect) 1566 self.reset() 1567 1568 def reset(self): 1569 self.sql = "" 1570 self.errors = [] 1571 self._tokens = [] 1572 self._index = 0 1573 self._curr = None 1574 self._next = None 1575 self._prev = None 1576 self._prev_comments = None 1577 self._pipe_cte_counter = 0 1578 1579 def parse( 1580 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 """ 1583 Parses a list of tokens and returns a list of syntax trees, one tree 1584 per parsed SQL statement. 1585 1586 Args: 1587 raw_tokens: The list of tokens. 1588 sql: The original SQL string, used to produce helpful debug messages. 1589 1590 Returns: 1591 The list of the produced syntax trees. 1592 """ 1593 return self._parse( 1594 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1595 ) 1596 1597 def parse_into( 1598 self, 1599 expression_types: exp.IntoType, 1600 raw_tokens: t.List[Token], 1601 sql: t.Optional[str] = None, 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens into a given Expression type. If a collection of Expression 1605 types is given instead, this method will try to parse the token list into each one 1606 of them, stopping at the first for which the parsing succeeds. 1607 1608 Args: 1609 expression_types: The expression type(s) to try and parse the token list into. 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The target Expression. 1615 """ 1616 errors = [] 1617 for expression_type in ensure_list(expression_types): 1618 parser = self.EXPRESSION_PARSERS.get(expression_type) 1619 if not parser: 1620 raise TypeError(f"No parser registered for {expression_type}") 1621 1622 try: 1623 return self._parse(parser, raw_tokens, sql) 1624 except ParseError as e: 1625 e.errors[0]["into_expression"] = expression_type 1626 errors.append(e) 1627 1628 raise ParseError( 1629 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1630 errors=merge_errors(errors), 1631 ) from errors[-1] 1632 1633 def _parse( 1634 self, 1635 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 self.reset() 1640 self.sql = sql or "" 1641 1642 total = len(raw_tokens) 1643 chunks: t.List[t.List[Token]] = [[]] 1644 1645 for i, token in enumerate(raw_tokens): 1646 if token.token_type == TokenType.SEMICOLON: 1647 if token.comments: 1648 chunks.append([token]) 1649 1650 if i < total - 1: 1651 chunks.append([]) 1652 else: 1653 chunks[-1].append(token) 1654 1655 expressions = [] 1656 1657 for tokens in chunks: 1658 self._index = -1 1659 self._tokens = tokens 1660 self._advance() 1661 1662 expressions.append(parse_method(self)) 1663 1664 if self._index < len(self._tokens): 1665 self.raise_error("Invalid expression / Unexpected token") 1666 1667 self.check_errors() 1668 1669 return expressions 1670 1671 def check_errors(self) -> None: 1672 """Logs or raises any found errors, depending on the chosen error level setting.""" 1673 if self.error_level == ErrorLevel.WARN: 1674 for error in self.errors: 1675 logger.error(str(error)) 1676 elif self.error_level == ErrorLevel.RAISE and self.errors: 1677 raise ParseError( 1678 concat_messages(self.errors, self.max_errors), 1679 errors=merge_errors(self.errors), 1680 ) 1681 1682 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1683 """ 1684 Appends an error in the list of recorded errors or raises it, depending on the chosen 1685 error level setting. 1686 """ 1687 token = token or self._curr or self._prev or Token.string("") 1688 start = token.start 1689 end = token.end + 1 1690 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1691 highlight = self.sql[start:end] 1692 end_context = self.sql[end : end + self.error_message_context] 1693 1694 error = ParseError.new( 1695 f"{message}. Line {token.line}, Col: {token.col}.\n" 1696 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1697 description=message, 1698 line=token.line, 1699 col=token.col, 1700 start_context=start_context, 1701 highlight=highlight, 1702 end_context=end_context, 1703 ) 1704 1705 if self.error_level == ErrorLevel.IMMEDIATE: 1706 raise error 1707 1708 self.errors.append(error) 1709 1710 def expression( 1711 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1712 ) -> E: 1713 """ 1714 Creates a new, validated Expression. 1715 1716 Args: 1717 exp_class: The expression class to instantiate. 1718 comments: An optional list of comments to attach to the expression. 1719 kwargs: The arguments to set for the expression along with their respective values. 1720 1721 Returns: 1722 The target expression. 1723 """ 1724 instance = exp_class(**kwargs) 1725 instance.add_comments(comments) if comments else self._add_comments(instance) 1726 return self.validate_expression(instance) 1727 1728 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1729 if expression and self._prev_comments: 1730 expression.add_comments(self._prev_comments) 1731 self._prev_comments = None 1732 1733 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1734 """ 1735 Validates an Expression, making sure that all its mandatory arguments are set. 1736 1737 Args: 1738 expression: The expression to validate. 1739 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1740 1741 Returns: 1742 The validated expression. 1743 """ 1744 if self.error_level != ErrorLevel.IGNORE: 1745 for error_message in expression.error_messages(args): 1746 self.raise_error(error_message) 1747 1748 return expression 1749 1750 def _find_sql(self, start: Token, end: Token) -> str: 1751 return self.sql[start.start : end.end + 1] 1752 1753 def _is_connected(self) -> bool: 1754 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1755 1756 def _advance(self, times: int = 1) -> None: 1757 self._index += times 1758 self._curr = seq_get(self._tokens, self._index) 1759 self._next = seq_get(self._tokens, self._index + 1) 1760 1761 if self._index > 0: 1762 self._prev = self._tokens[self._index - 1] 1763 self._prev_comments = self._prev.comments 1764 else: 1765 self._prev = None 1766 self._prev_comments = None 1767 1768 def _retreat(self, index: int) -> None: 1769 if index != self._index: 1770 self._advance(index - self._index) 1771 1772 def _warn_unsupported(self) -> None: 1773 if len(self._tokens) <= 1: 1774 return 1775 1776 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1777 # interested in emitting a warning for the one being currently processed. 1778 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1779 1780 logger.warning( 1781 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1782 ) 1783 1784 def _parse_command(self) -> exp.Command: 1785 self._warn_unsupported() 1786 return self.expression( 1787 exp.Command, 1788 comments=self._prev_comments, 1789 this=self._prev.text.upper(), 1790 expression=self._parse_string(), 1791 ) 1792 1793 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1794 """ 1795 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1796 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1797 solve this by setting & resetting the parser state accordingly 1798 """ 1799 index = self._index 1800 error_level = self.error_level 1801 1802 self.error_level = ErrorLevel.IMMEDIATE 1803 try: 1804 this = parse_method() 1805 except ParseError: 1806 this = None 1807 finally: 1808 if not this or retreat: 1809 self._retreat(index) 1810 self.error_level = error_level 1811 1812 return this 1813 1814 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1815 start = self._prev 1816 exists = self._parse_exists() if allow_exists else None 1817 1818 self._match(TokenType.ON) 1819 1820 materialized = self._match_text_seq("MATERIALIZED") 1821 kind = self._match_set(self.CREATABLES) and self._prev 1822 if not kind: 1823 return self._parse_as_command(start) 1824 1825 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1826 this = self._parse_user_defined_function(kind=kind.token_type) 1827 elif kind.token_type == TokenType.TABLE: 1828 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1829 elif kind.token_type == TokenType.COLUMN: 1830 this = self._parse_column() 1831 else: 1832 this = self._parse_id_var() 1833 1834 self._match(TokenType.IS) 1835 1836 return self.expression( 1837 exp.Comment, 1838 this=this, 1839 kind=kind.text, 1840 expression=self._parse_string(), 1841 exists=exists, 1842 materialized=materialized, 1843 ) 1844 1845 def _parse_to_table( 1846 self, 1847 ) -> exp.ToTableProperty: 1848 table = self._parse_table_parts(schema=True) 1849 return self.expression(exp.ToTableProperty, this=table) 1850 1851 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1852 def _parse_ttl(self) -> exp.Expression: 1853 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1854 this = self._parse_bitwise() 1855 1856 if self._match_text_seq("DELETE"): 1857 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1858 if self._match_text_seq("RECOMPRESS"): 1859 return self.expression( 1860 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1861 ) 1862 if self._match_text_seq("TO", "DISK"): 1863 return self.expression( 1864 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1865 ) 1866 if self._match_text_seq("TO", "VOLUME"): 1867 return self.expression( 1868 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1869 ) 1870 1871 return this 1872 1873 expressions = self._parse_csv(_parse_ttl_action) 1874 where = self._parse_where() 1875 group = self._parse_group() 1876 1877 aggregates = None 1878 if group and self._match(TokenType.SET): 1879 aggregates = self._parse_csv(self._parse_set_item) 1880 1881 return self.expression( 1882 exp.MergeTreeTTL, 1883 expressions=expressions, 1884 where=where, 1885 group=group, 1886 aggregates=aggregates, 1887 ) 1888 1889 def _parse_statement(self) -> t.Optional[exp.Expression]: 1890 if self._curr is None: 1891 return None 1892 1893 if self._match_set(self.STATEMENT_PARSERS): 1894 comments = self._prev_comments 1895 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1896 stmt.add_comments(comments, prepend=True) 1897 return stmt 1898 1899 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1900 return self._parse_command() 1901 1902 expression = self._parse_expression() 1903 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1904 return self._parse_query_modifiers(expression) 1905 1906 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1907 start = self._prev 1908 temporary = self._match(TokenType.TEMPORARY) 1909 materialized = self._match_text_seq("MATERIALIZED") 1910 1911 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1912 if not kind: 1913 return self._parse_as_command(start) 1914 1915 concurrently = self._match_text_seq("CONCURRENTLY") 1916 if_exists = exists or self._parse_exists() 1917 1918 if kind == "COLUMN": 1919 this = self._parse_column() 1920 else: 1921 this = self._parse_table_parts( 1922 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1923 ) 1924 1925 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1926 1927 if self._match(TokenType.L_PAREN, advance=False): 1928 expressions = self._parse_wrapped_csv(self._parse_types) 1929 else: 1930 expressions = None 1931 1932 return self.expression( 1933 exp.Drop, 1934 exists=if_exists, 1935 this=this, 1936 expressions=expressions, 1937 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1938 temporary=temporary, 1939 materialized=materialized, 1940 cascade=self._match_text_seq("CASCADE"), 1941 constraints=self._match_text_seq("CONSTRAINTS"), 1942 purge=self._match_text_seq("PURGE"), 1943 cluster=cluster, 1944 concurrently=concurrently, 1945 ) 1946 1947 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1948 return ( 1949 self._match_text_seq("IF") 1950 and (not not_ or self._match(TokenType.NOT)) 1951 and self._match(TokenType.EXISTS) 1952 ) 1953 1954 def _parse_create(self) -> exp.Create | exp.Command: 1955 # Note: this can't be None because we've matched a statement parser 1956 start = self._prev 1957 1958 replace = ( 1959 start.token_type == TokenType.REPLACE 1960 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1961 or self._match_pair(TokenType.OR, TokenType.ALTER) 1962 ) 1963 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1964 1965 unique = self._match(TokenType.UNIQUE) 1966 1967 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1968 clustered = True 1969 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1970 "COLUMNSTORE" 1971 ): 1972 clustered = False 1973 else: 1974 clustered = None 1975 1976 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1977 self._advance() 1978 1979 properties = None 1980 create_token = self._match_set(self.CREATABLES) and self._prev 1981 1982 if not create_token: 1983 # exp.Properties.Location.POST_CREATE 1984 properties = self._parse_properties() 1985 create_token = self._match_set(self.CREATABLES) and self._prev 1986 1987 if not properties or not create_token: 1988 return self._parse_as_command(start) 1989 1990 concurrently = self._match_text_seq("CONCURRENTLY") 1991 exists = self._parse_exists(not_=True) 1992 this = None 1993 expression: t.Optional[exp.Expression] = None 1994 indexes = None 1995 no_schema_binding = None 1996 begin = None 1997 end = None 1998 clone = None 1999 2000 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2001 nonlocal properties 2002 if properties and temp_props: 2003 properties.expressions.extend(temp_props.expressions) 2004 elif temp_props: 2005 properties = temp_props 2006 2007 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2008 this = self._parse_user_defined_function(kind=create_token.token_type) 2009 2010 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2011 extend_props(self._parse_properties()) 2012 2013 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2014 extend_props(self._parse_properties()) 2015 2016 if not expression: 2017 if self._match(TokenType.COMMAND): 2018 expression = self._parse_as_command(self._prev) 2019 else: 2020 begin = self._match(TokenType.BEGIN) 2021 return_ = self._match_text_seq("RETURN") 2022 2023 if self._match(TokenType.STRING, advance=False): 2024 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2025 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2026 expression = self._parse_string() 2027 extend_props(self._parse_properties()) 2028 else: 2029 expression = self._parse_user_defined_function_expression() 2030 2031 end = self._match_text_seq("END") 2032 2033 if return_: 2034 expression = self.expression(exp.Return, this=expression) 2035 elif create_token.token_type == TokenType.INDEX: 2036 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2037 if not self._match(TokenType.ON): 2038 index = self._parse_id_var() 2039 anonymous = False 2040 else: 2041 index = None 2042 anonymous = True 2043 2044 this = self._parse_index(index=index, anonymous=anonymous) 2045 elif create_token.token_type in self.DB_CREATABLES: 2046 table_parts = self._parse_table_parts( 2047 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2048 ) 2049 2050 # exp.Properties.Location.POST_NAME 2051 self._match(TokenType.COMMA) 2052 extend_props(self._parse_properties(before=True)) 2053 2054 this = self._parse_schema(this=table_parts) 2055 2056 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2057 extend_props(self._parse_properties()) 2058 2059 has_alias = self._match(TokenType.ALIAS) 2060 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2061 # exp.Properties.Location.POST_ALIAS 2062 extend_props(self._parse_properties()) 2063 2064 if create_token.token_type == TokenType.SEQUENCE: 2065 expression = self._parse_types() 2066 extend_props(self._parse_properties()) 2067 else: 2068 expression = self._parse_ddl_select() 2069 2070 # Some dialects also support using a table as an alias instead of a SELECT. 2071 # Here we fallback to this as an alternative. 2072 if not expression and has_alias: 2073 expression = self._try_parse(self._parse_table_parts) 2074 2075 if create_token.token_type == TokenType.TABLE: 2076 # exp.Properties.Location.POST_EXPRESSION 2077 extend_props(self._parse_properties()) 2078 2079 indexes = [] 2080 while True: 2081 index = self._parse_index() 2082 2083 # exp.Properties.Location.POST_INDEX 2084 extend_props(self._parse_properties()) 2085 if not index: 2086 break 2087 else: 2088 self._match(TokenType.COMMA) 2089 indexes.append(index) 2090 elif create_token.token_type == TokenType.VIEW: 2091 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2092 no_schema_binding = True 2093 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2094 extend_props(self._parse_properties()) 2095 2096 shallow = self._match_text_seq("SHALLOW") 2097 2098 if self._match_texts(self.CLONE_KEYWORDS): 2099 copy = self._prev.text.lower() == "copy" 2100 clone = self.expression( 2101 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2102 ) 2103 2104 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2105 return self._parse_as_command(start) 2106 2107 create_kind_text = create_token.text.upper() 2108 return self.expression( 2109 exp.Create, 2110 this=this, 2111 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2112 replace=replace, 2113 refresh=refresh, 2114 unique=unique, 2115 expression=expression, 2116 exists=exists, 2117 properties=properties, 2118 indexes=indexes, 2119 no_schema_binding=no_schema_binding, 2120 begin=begin, 2121 end=end, 2122 clone=clone, 2123 concurrently=concurrently, 2124 clustered=clustered, 2125 ) 2126 2127 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2128 seq = exp.SequenceProperties() 2129 2130 options = [] 2131 index = self._index 2132 2133 while self._curr: 2134 self._match(TokenType.COMMA) 2135 if self._match_text_seq("INCREMENT"): 2136 self._match_text_seq("BY") 2137 self._match_text_seq("=") 2138 seq.set("increment", self._parse_term()) 2139 elif self._match_text_seq("MINVALUE"): 2140 seq.set("minvalue", self._parse_term()) 2141 elif self._match_text_seq("MAXVALUE"): 2142 seq.set("maxvalue", self._parse_term()) 2143 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2144 self._match_text_seq("=") 2145 seq.set("start", self._parse_term()) 2146 elif self._match_text_seq("CACHE"): 2147 # T-SQL allows empty CACHE which is initialized dynamically 2148 seq.set("cache", self._parse_number() or True) 2149 elif self._match_text_seq("OWNED", "BY"): 2150 # "OWNED BY NONE" is the default 2151 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2152 else: 2153 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2154 if opt: 2155 options.append(opt) 2156 else: 2157 break 2158 2159 seq.set("options", options if options else None) 2160 return None if self._index == index else seq 2161 2162 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2163 # only used for teradata currently 2164 self._match(TokenType.COMMA) 2165 2166 kwargs = { 2167 "no": self._match_text_seq("NO"), 2168 "dual": self._match_text_seq("DUAL"), 2169 "before": self._match_text_seq("BEFORE"), 2170 "default": self._match_text_seq("DEFAULT"), 2171 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2172 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2173 "after": self._match_text_seq("AFTER"), 2174 "minimum": self._match_texts(("MIN", "MINIMUM")), 2175 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2176 } 2177 2178 if self._match_texts(self.PROPERTY_PARSERS): 2179 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2180 try: 2181 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2182 except TypeError: 2183 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2184 2185 return None 2186 2187 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2188 return self._parse_wrapped_csv(self._parse_property) 2189 2190 def _parse_property(self) -> t.Optional[exp.Expression]: 2191 if self._match_texts(self.PROPERTY_PARSERS): 2192 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2193 2194 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2195 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2196 2197 if self._match_text_seq("COMPOUND", "SORTKEY"): 2198 return self._parse_sortkey(compound=True) 2199 2200 if self._match_text_seq("SQL", "SECURITY"): 2201 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2202 2203 index = self._index 2204 key = self._parse_column() 2205 2206 if not self._match(TokenType.EQ): 2207 self._retreat(index) 2208 return self._parse_sequence_properties() 2209 2210 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2211 if isinstance(key, exp.Column): 2212 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2213 2214 value = self._parse_bitwise() or self._parse_var(any_token=True) 2215 2216 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2217 if isinstance(value, exp.Column): 2218 value = exp.var(value.name) 2219 2220 return self.expression(exp.Property, this=key, value=value) 2221 2222 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2223 if self._match_text_seq("BY"): 2224 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2225 2226 self._match(TokenType.ALIAS) 2227 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2228 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2229 2230 return self.expression( 2231 exp.FileFormatProperty, 2232 this=( 2233 self.expression( 2234 exp.InputOutputFormat, 2235 input_format=input_format, 2236 output_format=output_format, 2237 ) 2238 if input_format or output_format 2239 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2240 ), 2241 ) 2242 2243 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2244 field = self._parse_field() 2245 if isinstance(field, exp.Identifier) and not field.quoted: 2246 field = exp.var(field) 2247 2248 return field 2249 2250 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2251 self._match(TokenType.EQ) 2252 self._match(TokenType.ALIAS) 2253 2254 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2255 2256 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2257 properties = [] 2258 while True: 2259 if before: 2260 prop = self._parse_property_before() 2261 else: 2262 prop = self._parse_property() 2263 if not prop: 2264 break 2265 for p in ensure_list(prop): 2266 properties.append(p) 2267 2268 if properties: 2269 return self.expression(exp.Properties, expressions=properties) 2270 2271 return None 2272 2273 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2274 return self.expression( 2275 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2276 ) 2277 2278 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2279 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2280 security_specifier = self._prev.text.upper() 2281 return self.expression(exp.SecurityProperty, this=security_specifier) 2282 return None 2283 2284 def _parse_settings_property(self) -> exp.SettingsProperty: 2285 return self.expression( 2286 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2287 ) 2288 2289 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2290 if self._index >= 2: 2291 pre_volatile_token = self._tokens[self._index - 2] 2292 else: 2293 pre_volatile_token = None 2294 2295 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2296 return exp.VolatileProperty() 2297 2298 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2299 2300 def _parse_retention_period(self) -> exp.Var: 2301 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2302 number = self._parse_number() 2303 number_str = f"{number} " if number else "" 2304 unit = self._parse_var(any_token=True) 2305 return exp.var(f"{number_str}{unit}") 2306 2307 def _parse_system_versioning_property( 2308 self, with_: bool = False 2309 ) -> exp.WithSystemVersioningProperty: 2310 self._match(TokenType.EQ) 2311 prop = self.expression( 2312 exp.WithSystemVersioningProperty, 2313 **{ # type: ignore 2314 "on": True, 2315 "with": with_, 2316 }, 2317 ) 2318 2319 if self._match_text_seq("OFF"): 2320 prop.set("on", False) 2321 return prop 2322 2323 self._match(TokenType.ON) 2324 if self._match(TokenType.L_PAREN): 2325 while self._curr and not self._match(TokenType.R_PAREN): 2326 if self._match_text_seq("HISTORY_TABLE", "="): 2327 prop.set("this", self._parse_table_parts()) 2328 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2329 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2330 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2331 prop.set("retention_period", self._parse_retention_period()) 2332 2333 self._match(TokenType.COMMA) 2334 2335 return prop 2336 2337 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2338 self._match(TokenType.EQ) 2339 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2340 prop = self.expression(exp.DataDeletionProperty, on=on) 2341 2342 if self._match(TokenType.L_PAREN): 2343 while self._curr and not self._match(TokenType.R_PAREN): 2344 if self._match_text_seq("FILTER_COLUMN", "="): 2345 prop.set("filter_column", self._parse_column()) 2346 elif self._match_text_seq("RETENTION_PERIOD", "="): 2347 prop.set("retention_period", self._parse_retention_period()) 2348 2349 self._match(TokenType.COMMA) 2350 2351 return prop 2352 2353 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2354 kind = "HASH" 2355 expressions: t.Optional[t.List[exp.Expression]] = None 2356 if self._match_text_seq("BY", "HASH"): 2357 expressions = self._parse_wrapped_csv(self._parse_id_var) 2358 elif self._match_text_seq("BY", "RANDOM"): 2359 kind = "RANDOM" 2360 2361 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2362 buckets: t.Optional[exp.Expression] = None 2363 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2364 buckets = self._parse_number() 2365 2366 return self.expression( 2367 exp.DistributedByProperty, 2368 expressions=expressions, 2369 kind=kind, 2370 buckets=buckets, 2371 order=self._parse_order(), 2372 ) 2373 2374 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2375 self._match_text_seq("KEY") 2376 expressions = self._parse_wrapped_id_vars() 2377 return self.expression(expr_type, expressions=expressions) 2378 2379 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2380 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2381 prop = self._parse_system_versioning_property(with_=True) 2382 self._match_r_paren() 2383 return prop 2384 2385 if self._match(TokenType.L_PAREN, advance=False): 2386 return self._parse_wrapped_properties() 2387 2388 if self._match_text_seq("JOURNAL"): 2389 return self._parse_withjournaltable() 2390 2391 if self._match_texts(self.VIEW_ATTRIBUTES): 2392 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2393 2394 if self._match_text_seq("DATA"): 2395 return self._parse_withdata(no=False) 2396 elif self._match_text_seq("NO", "DATA"): 2397 return self._parse_withdata(no=True) 2398 2399 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2400 return self._parse_serde_properties(with_=True) 2401 2402 if self._match(TokenType.SCHEMA): 2403 return self.expression( 2404 exp.WithSchemaBindingProperty, 2405 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2406 ) 2407 2408 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2409 return self.expression( 2410 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2411 ) 2412 2413 if not self._next: 2414 return None 2415 2416 return self._parse_withisolatedloading() 2417 2418 def _parse_procedure_option(self) -> exp.Expression | None: 2419 if self._match_text_seq("EXECUTE", "AS"): 2420 return self.expression( 2421 exp.ExecuteAsProperty, 2422 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2423 or self._parse_string(), 2424 ) 2425 2426 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2427 2428 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2429 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2430 self._match(TokenType.EQ) 2431 2432 user = self._parse_id_var() 2433 self._match(TokenType.PARAMETER) 2434 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2435 2436 if not user or not host: 2437 return None 2438 2439 return exp.DefinerProperty(this=f"{user}@{host}") 2440 2441 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2442 self._match(TokenType.TABLE) 2443 self._match(TokenType.EQ) 2444 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2445 2446 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2447 return self.expression(exp.LogProperty, no=no) 2448 2449 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2450 return self.expression(exp.JournalProperty, **kwargs) 2451 2452 def _parse_checksum(self) -> exp.ChecksumProperty: 2453 self._match(TokenType.EQ) 2454 2455 on = None 2456 if self._match(TokenType.ON): 2457 on = True 2458 elif self._match_text_seq("OFF"): 2459 on = False 2460 2461 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2462 2463 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2464 return self.expression( 2465 exp.Cluster, 2466 expressions=( 2467 self._parse_wrapped_csv(self._parse_ordered) 2468 if wrapped 2469 else self._parse_csv(self._parse_ordered) 2470 ), 2471 ) 2472 2473 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2474 self._match_text_seq("BY") 2475 2476 self._match_l_paren() 2477 expressions = self._parse_csv(self._parse_column) 2478 self._match_r_paren() 2479 2480 if self._match_text_seq("SORTED", "BY"): 2481 self._match_l_paren() 2482 sorted_by = self._parse_csv(self._parse_ordered) 2483 self._match_r_paren() 2484 else: 2485 sorted_by = None 2486 2487 self._match(TokenType.INTO) 2488 buckets = self._parse_number() 2489 self._match_text_seq("BUCKETS") 2490 2491 return self.expression( 2492 exp.ClusteredByProperty, 2493 expressions=expressions, 2494 sorted_by=sorted_by, 2495 buckets=buckets, 2496 ) 2497 2498 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2499 if not self._match_text_seq("GRANTS"): 2500 self._retreat(self._index - 1) 2501 return None 2502 2503 return self.expression(exp.CopyGrantsProperty) 2504 2505 def _parse_freespace(self) -> exp.FreespaceProperty: 2506 self._match(TokenType.EQ) 2507 return self.expression( 2508 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2509 ) 2510 2511 def _parse_mergeblockratio( 2512 self, no: bool = False, default: bool = False 2513 ) -> exp.MergeBlockRatioProperty: 2514 if self._match(TokenType.EQ): 2515 return self.expression( 2516 exp.MergeBlockRatioProperty, 2517 this=self._parse_number(), 2518 percent=self._match(TokenType.PERCENT), 2519 ) 2520 2521 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2522 2523 def _parse_datablocksize( 2524 self, 2525 default: t.Optional[bool] = None, 2526 minimum: t.Optional[bool] = None, 2527 maximum: t.Optional[bool] = None, 2528 ) -> exp.DataBlocksizeProperty: 2529 self._match(TokenType.EQ) 2530 size = self._parse_number() 2531 2532 units = None 2533 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2534 units = self._prev.text 2535 2536 return self.expression( 2537 exp.DataBlocksizeProperty, 2538 size=size, 2539 units=units, 2540 default=default, 2541 minimum=minimum, 2542 maximum=maximum, 2543 ) 2544 2545 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2546 self._match(TokenType.EQ) 2547 always = self._match_text_seq("ALWAYS") 2548 manual = self._match_text_seq("MANUAL") 2549 never = self._match_text_seq("NEVER") 2550 default = self._match_text_seq("DEFAULT") 2551 2552 autotemp = None 2553 if self._match_text_seq("AUTOTEMP"): 2554 autotemp = self._parse_schema() 2555 2556 return self.expression( 2557 exp.BlockCompressionProperty, 2558 always=always, 2559 manual=manual, 2560 never=never, 2561 default=default, 2562 autotemp=autotemp, 2563 ) 2564 2565 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2566 index = self._index 2567 no = self._match_text_seq("NO") 2568 concurrent = self._match_text_seq("CONCURRENT") 2569 2570 if not self._match_text_seq("ISOLATED", "LOADING"): 2571 self._retreat(index) 2572 return None 2573 2574 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2575 return self.expression( 2576 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2577 ) 2578 2579 def _parse_locking(self) -> exp.LockingProperty: 2580 if self._match(TokenType.TABLE): 2581 kind = "TABLE" 2582 elif self._match(TokenType.VIEW): 2583 kind = "VIEW" 2584 elif self._match(TokenType.ROW): 2585 kind = "ROW" 2586 elif self._match_text_seq("DATABASE"): 2587 kind = "DATABASE" 2588 else: 2589 kind = None 2590 2591 if kind in ("DATABASE", "TABLE", "VIEW"): 2592 this = self._parse_table_parts() 2593 else: 2594 this = None 2595 2596 if self._match(TokenType.FOR): 2597 for_or_in = "FOR" 2598 elif self._match(TokenType.IN): 2599 for_or_in = "IN" 2600 else: 2601 for_or_in = None 2602 2603 if self._match_text_seq("ACCESS"): 2604 lock_type = "ACCESS" 2605 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2606 lock_type = "EXCLUSIVE" 2607 elif self._match_text_seq("SHARE"): 2608 lock_type = "SHARE" 2609 elif self._match_text_seq("READ"): 2610 lock_type = "READ" 2611 elif self._match_text_seq("WRITE"): 2612 lock_type = "WRITE" 2613 elif self._match_text_seq("CHECKSUM"): 2614 lock_type = "CHECKSUM" 2615 else: 2616 lock_type = None 2617 2618 override = self._match_text_seq("OVERRIDE") 2619 2620 return self.expression( 2621 exp.LockingProperty, 2622 this=this, 2623 kind=kind, 2624 for_or_in=for_or_in, 2625 lock_type=lock_type, 2626 override=override, 2627 ) 2628 2629 def _parse_partition_by(self) -> t.List[exp.Expression]: 2630 if self._match(TokenType.PARTITION_BY): 2631 return self._parse_csv(self._parse_assignment) 2632 return [] 2633 2634 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2635 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2636 if self._match_text_seq("MINVALUE"): 2637 return exp.var("MINVALUE") 2638 if self._match_text_seq("MAXVALUE"): 2639 return exp.var("MAXVALUE") 2640 return self._parse_bitwise() 2641 2642 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2643 expression = None 2644 from_expressions = None 2645 to_expressions = None 2646 2647 if self._match(TokenType.IN): 2648 this = self._parse_wrapped_csv(self._parse_bitwise) 2649 elif self._match(TokenType.FROM): 2650 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2651 self._match_text_seq("TO") 2652 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2653 elif self._match_text_seq("WITH", "(", "MODULUS"): 2654 this = self._parse_number() 2655 self._match_text_seq(",", "REMAINDER") 2656 expression = self._parse_number() 2657 self._match_r_paren() 2658 else: 2659 self.raise_error("Failed to parse partition bound spec.") 2660 2661 return self.expression( 2662 exp.PartitionBoundSpec, 2663 this=this, 2664 expression=expression, 2665 from_expressions=from_expressions, 2666 to_expressions=to_expressions, 2667 ) 2668 2669 # https://www.postgresql.org/docs/current/sql-createtable.html 2670 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2671 if not self._match_text_seq("OF"): 2672 self._retreat(self._index - 1) 2673 return None 2674 2675 this = self._parse_table(schema=True) 2676 2677 if self._match(TokenType.DEFAULT): 2678 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2679 elif self._match_text_seq("FOR", "VALUES"): 2680 expression = self._parse_partition_bound_spec() 2681 else: 2682 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2683 2684 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2685 2686 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2687 self._match(TokenType.EQ) 2688 return self.expression( 2689 exp.PartitionedByProperty, 2690 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2691 ) 2692 2693 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2694 if self._match_text_seq("AND", "STATISTICS"): 2695 statistics = True 2696 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2697 statistics = False 2698 else: 2699 statistics = None 2700 2701 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2702 2703 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2704 if self._match_text_seq("SQL"): 2705 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2706 return None 2707 2708 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2709 if self._match_text_seq("SQL", "DATA"): 2710 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2711 return None 2712 2713 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2714 if self._match_text_seq("PRIMARY", "INDEX"): 2715 return exp.NoPrimaryIndexProperty() 2716 if self._match_text_seq("SQL"): 2717 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2718 return None 2719 2720 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2721 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2722 return exp.OnCommitProperty() 2723 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2724 return exp.OnCommitProperty(delete=True) 2725 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2726 2727 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2728 if self._match_text_seq("SQL", "DATA"): 2729 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2730 return None 2731 2732 def _parse_distkey(self) -> exp.DistKeyProperty: 2733 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2734 2735 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2736 table = self._parse_table(schema=True) 2737 2738 options = [] 2739 while self._match_texts(("INCLUDING", "EXCLUDING")): 2740 this = self._prev.text.upper() 2741 2742 id_var = self._parse_id_var() 2743 if not id_var: 2744 return None 2745 2746 options.append( 2747 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2748 ) 2749 2750 return self.expression(exp.LikeProperty, this=table, expressions=options) 2751 2752 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2753 return self.expression( 2754 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2755 ) 2756 2757 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2758 self._match(TokenType.EQ) 2759 return self.expression( 2760 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2761 ) 2762 2763 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2764 self._match_text_seq("WITH", "CONNECTION") 2765 return self.expression( 2766 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2767 ) 2768 2769 def _parse_returns(self) -> exp.ReturnsProperty: 2770 value: t.Optional[exp.Expression] 2771 null = None 2772 is_table = self._match(TokenType.TABLE) 2773 2774 if is_table: 2775 if self._match(TokenType.LT): 2776 value = self.expression( 2777 exp.Schema, 2778 this="TABLE", 2779 expressions=self._parse_csv(self._parse_struct_types), 2780 ) 2781 if not self._match(TokenType.GT): 2782 self.raise_error("Expecting >") 2783 else: 2784 value = self._parse_schema(exp.var("TABLE")) 2785 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2786 null = True 2787 value = None 2788 else: 2789 value = self._parse_types() 2790 2791 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2792 2793 def _parse_describe(self) -> exp.Describe: 2794 kind = self._match_set(self.CREATABLES) and self._prev.text 2795 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2796 if self._match(TokenType.DOT): 2797 style = None 2798 self._retreat(self._index - 2) 2799 2800 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2801 2802 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2803 this = self._parse_statement() 2804 else: 2805 this = self._parse_table(schema=True) 2806 2807 properties = self._parse_properties() 2808 expressions = properties.expressions if properties else None 2809 partition = self._parse_partition() 2810 return self.expression( 2811 exp.Describe, 2812 this=this, 2813 style=style, 2814 kind=kind, 2815 expressions=expressions, 2816 partition=partition, 2817 format=format, 2818 ) 2819 2820 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2821 kind = self._prev.text.upper() 2822 expressions = [] 2823 2824 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2825 if self._match(TokenType.WHEN): 2826 expression = self._parse_disjunction() 2827 self._match(TokenType.THEN) 2828 else: 2829 expression = None 2830 2831 else_ = self._match(TokenType.ELSE) 2832 2833 if not self._match(TokenType.INTO): 2834 return None 2835 2836 return self.expression( 2837 exp.ConditionalInsert, 2838 this=self.expression( 2839 exp.Insert, 2840 this=self._parse_table(schema=True), 2841 expression=self._parse_derived_table_values(), 2842 ), 2843 expression=expression, 2844 else_=else_, 2845 ) 2846 2847 expression = parse_conditional_insert() 2848 while expression is not None: 2849 expressions.append(expression) 2850 expression = parse_conditional_insert() 2851 2852 return self.expression( 2853 exp.MultitableInserts, 2854 kind=kind, 2855 comments=comments, 2856 expressions=expressions, 2857 source=self._parse_table(), 2858 ) 2859 2860 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2861 comments = [] 2862 hint = self._parse_hint() 2863 overwrite = self._match(TokenType.OVERWRITE) 2864 ignore = self._match(TokenType.IGNORE) 2865 local = self._match_text_seq("LOCAL") 2866 alternative = None 2867 is_function = None 2868 2869 if self._match_text_seq("DIRECTORY"): 2870 this: t.Optional[exp.Expression] = self.expression( 2871 exp.Directory, 2872 this=self._parse_var_or_string(), 2873 local=local, 2874 row_format=self._parse_row_format(match_row=True), 2875 ) 2876 else: 2877 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2878 comments += ensure_list(self._prev_comments) 2879 return self._parse_multitable_inserts(comments) 2880 2881 if self._match(TokenType.OR): 2882 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2883 2884 self._match(TokenType.INTO) 2885 comments += ensure_list(self._prev_comments) 2886 self._match(TokenType.TABLE) 2887 is_function = self._match(TokenType.FUNCTION) 2888 2889 this = ( 2890 self._parse_table(schema=True, parse_partition=True) 2891 if not is_function 2892 else self._parse_function() 2893 ) 2894 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2895 this.set("alias", self._parse_table_alias()) 2896 2897 returning = self._parse_returning() 2898 2899 return self.expression( 2900 exp.Insert, 2901 comments=comments, 2902 hint=hint, 2903 is_function=is_function, 2904 this=this, 2905 stored=self._match_text_seq("STORED") and self._parse_stored(), 2906 by_name=self._match_text_seq("BY", "NAME"), 2907 exists=self._parse_exists(), 2908 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2909 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2910 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2911 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2912 conflict=self._parse_on_conflict(), 2913 returning=returning or self._parse_returning(), 2914 overwrite=overwrite, 2915 alternative=alternative, 2916 ignore=ignore, 2917 source=self._match(TokenType.TABLE) and self._parse_table(), 2918 ) 2919 2920 def _parse_kill(self) -> exp.Kill: 2921 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2922 2923 return self.expression( 2924 exp.Kill, 2925 this=self._parse_primary(), 2926 kind=kind, 2927 ) 2928 2929 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2930 conflict = self._match_text_seq("ON", "CONFLICT") 2931 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2932 2933 if not conflict and not duplicate: 2934 return None 2935 2936 conflict_keys = None 2937 constraint = None 2938 2939 if conflict: 2940 if self._match_text_seq("ON", "CONSTRAINT"): 2941 constraint = self._parse_id_var() 2942 elif self._match(TokenType.L_PAREN): 2943 conflict_keys = self._parse_csv(self._parse_id_var) 2944 self._match_r_paren() 2945 2946 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2947 if self._prev.token_type == TokenType.UPDATE: 2948 self._match(TokenType.SET) 2949 expressions = self._parse_csv(self._parse_equality) 2950 else: 2951 expressions = None 2952 2953 return self.expression( 2954 exp.OnConflict, 2955 duplicate=duplicate, 2956 expressions=expressions, 2957 action=action, 2958 conflict_keys=conflict_keys, 2959 constraint=constraint, 2960 where=self._parse_where(), 2961 ) 2962 2963 def _parse_returning(self) -> t.Optional[exp.Returning]: 2964 if not self._match(TokenType.RETURNING): 2965 return None 2966 return self.expression( 2967 exp.Returning, 2968 expressions=self._parse_csv(self._parse_expression), 2969 into=self._match(TokenType.INTO) and self._parse_table_part(), 2970 ) 2971 2972 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2973 if not self._match(TokenType.FORMAT): 2974 return None 2975 return self._parse_row_format() 2976 2977 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2978 index = self._index 2979 with_ = with_ or self._match_text_seq("WITH") 2980 2981 if not self._match(TokenType.SERDE_PROPERTIES): 2982 self._retreat(index) 2983 return None 2984 return self.expression( 2985 exp.SerdeProperties, 2986 **{ # type: ignore 2987 "expressions": self._parse_wrapped_properties(), 2988 "with": with_, 2989 }, 2990 ) 2991 2992 def _parse_row_format( 2993 self, match_row: bool = False 2994 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2995 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2996 return None 2997 2998 if self._match_text_seq("SERDE"): 2999 this = self._parse_string() 3000 3001 serde_properties = self._parse_serde_properties() 3002 3003 return self.expression( 3004 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3005 ) 3006 3007 self._match_text_seq("DELIMITED") 3008 3009 kwargs = {} 3010 3011 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3012 kwargs["fields"] = self._parse_string() 3013 if self._match_text_seq("ESCAPED", "BY"): 3014 kwargs["escaped"] = self._parse_string() 3015 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3016 kwargs["collection_items"] = self._parse_string() 3017 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3018 kwargs["map_keys"] = self._parse_string() 3019 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3020 kwargs["lines"] = self._parse_string() 3021 if self._match_text_seq("NULL", "DEFINED", "AS"): 3022 kwargs["null"] = self._parse_string() 3023 3024 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3025 3026 def _parse_load(self) -> exp.LoadData | exp.Command: 3027 if self._match_text_seq("DATA"): 3028 local = self._match_text_seq("LOCAL") 3029 self._match_text_seq("INPATH") 3030 inpath = self._parse_string() 3031 overwrite = self._match(TokenType.OVERWRITE) 3032 self._match_pair(TokenType.INTO, TokenType.TABLE) 3033 3034 return self.expression( 3035 exp.LoadData, 3036 this=self._parse_table(schema=True), 3037 local=local, 3038 overwrite=overwrite, 3039 inpath=inpath, 3040 partition=self._parse_partition(), 3041 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3042 serde=self._match_text_seq("SERDE") and self._parse_string(), 3043 ) 3044 return self._parse_as_command(self._prev) 3045 3046 def _parse_delete(self) -> exp.Delete: 3047 # This handles MySQL's "Multiple-Table Syntax" 3048 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3049 tables = None 3050 if not self._match(TokenType.FROM, advance=False): 3051 tables = self._parse_csv(self._parse_table) or None 3052 3053 returning = self._parse_returning() 3054 3055 return self.expression( 3056 exp.Delete, 3057 tables=tables, 3058 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3059 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3060 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3061 where=self._parse_where(), 3062 returning=returning or self._parse_returning(), 3063 limit=self._parse_limit(), 3064 ) 3065 3066 def _parse_update(self) -> exp.Update: 3067 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3068 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3069 returning = self._parse_returning() 3070 return self.expression( 3071 exp.Update, 3072 **{ # type: ignore 3073 "this": this, 3074 "expressions": expressions, 3075 "from": self._parse_from(joins=True), 3076 "where": self._parse_where(), 3077 "returning": returning or self._parse_returning(), 3078 "order": self._parse_order(), 3079 "limit": self._parse_limit(), 3080 }, 3081 ) 3082 3083 def _parse_use(self) -> exp.Use: 3084 return self.expression( 3085 exp.Use, 3086 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3087 this=self._parse_table(schema=False), 3088 ) 3089 3090 def _parse_uncache(self) -> exp.Uncache: 3091 if not self._match(TokenType.TABLE): 3092 self.raise_error("Expecting TABLE after UNCACHE") 3093 3094 return self.expression( 3095 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3096 ) 3097 3098 def _parse_cache(self) -> exp.Cache: 3099 lazy = self._match_text_seq("LAZY") 3100 self._match(TokenType.TABLE) 3101 table = self._parse_table(schema=True) 3102 3103 options = [] 3104 if self._match_text_seq("OPTIONS"): 3105 self._match_l_paren() 3106 k = self._parse_string() 3107 self._match(TokenType.EQ) 3108 v = self._parse_string() 3109 options = [k, v] 3110 self._match_r_paren() 3111 3112 self._match(TokenType.ALIAS) 3113 return self.expression( 3114 exp.Cache, 3115 this=table, 3116 lazy=lazy, 3117 options=options, 3118 expression=self._parse_select(nested=True), 3119 ) 3120 3121 def _parse_partition(self) -> t.Optional[exp.Partition]: 3122 if not self._match_texts(self.PARTITION_KEYWORDS): 3123 return None 3124 3125 return self.expression( 3126 exp.Partition, 3127 subpartition=self._prev.text.upper() == "SUBPARTITION", 3128 expressions=self._parse_wrapped_csv(self._parse_assignment), 3129 ) 3130 3131 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3132 def _parse_value_expression() -> t.Optional[exp.Expression]: 3133 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3134 return exp.var(self._prev.text.upper()) 3135 return self._parse_expression() 3136 3137 if self._match(TokenType.L_PAREN): 3138 expressions = self._parse_csv(_parse_value_expression) 3139 self._match_r_paren() 3140 return self.expression(exp.Tuple, expressions=expressions) 3141 3142 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3143 expression = self._parse_expression() 3144 if expression: 3145 return self.expression(exp.Tuple, expressions=[expression]) 3146 return None 3147 3148 def _parse_projections(self) -> t.List[exp.Expression]: 3149 return self._parse_expressions() 3150 3151 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3152 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3153 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3154 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3155 ) 3156 elif self._match(TokenType.FROM): 3157 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3158 # Support parentheses for duckdb FROM-first syntax 3159 select = self._parse_select() 3160 if select: 3161 select.set("from", from_) 3162 this = select 3163 else: 3164 this = exp.select("*").from_(t.cast(exp.From, from_)) 3165 else: 3166 this = ( 3167 self._parse_table(consume_pipe=True) 3168 if table 3169 else self._parse_select(nested=True, parse_set_operation=False) 3170 ) 3171 3172 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3173 # in case a modifier (e.g. join) is following 3174 if table and isinstance(this, exp.Values) and this.alias: 3175 alias = this.args["alias"].pop() 3176 this = exp.Table(this=this, alias=alias) 3177 3178 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3179 3180 return this 3181 3182 def _parse_select( 3183 self, 3184 nested: bool = False, 3185 table: bool = False, 3186 parse_subquery_alias: bool = True, 3187 parse_set_operation: bool = True, 3188 consume_pipe: bool = True, 3189 ) -> t.Optional[exp.Expression]: 3190 query = self._parse_select_query( 3191 nested=nested, 3192 table=table, 3193 parse_subquery_alias=parse_subquery_alias, 3194 parse_set_operation=parse_set_operation, 3195 ) 3196 3197 if ( 3198 consume_pipe 3199 and self._match(TokenType.PIPE_GT, advance=False) 3200 and isinstance(query, exp.Query) 3201 ): 3202 query = self._parse_pipe_syntax_query(query) 3203 query = query.subquery(copy=False) if query and table else query 3204 3205 return query 3206 3207 def _parse_select_query( 3208 self, 3209 nested: bool = False, 3210 table: bool = False, 3211 parse_subquery_alias: bool = True, 3212 parse_set_operation: bool = True, 3213 ) -> t.Optional[exp.Expression]: 3214 cte = self._parse_with() 3215 3216 if cte: 3217 this = self._parse_statement() 3218 3219 if not this: 3220 self.raise_error("Failed to parse any statement following CTE") 3221 return cte 3222 3223 if "with" in this.arg_types: 3224 this.set("with", cte) 3225 else: 3226 self.raise_error(f"{this.key} does not support CTE") 3227 this = cte 3228 3229 return this 3230 3231 # duckdb supports leading with FROM x 3232 from_ = ( 3233 self._parse_from(consume_pipe=True) 3234 if self._match(TokenType.FROM, advance=False) 3235 else None 3236 ) 3237 3238 if self._match(TokenType.SELECT): 3239 comments = self._prev_comments 3240 3241 hint = self._parse_hint() 3242 3243 if self._next and not self._next.token_type == TokenType.DOT: 3244 all_ = self._match(TokenType.ALL) 3245 distinct = self._match_set(self.DISTINCT_TOKENS) 3246 else: 3247 all_, distinct = None, None 3248 3249 kind = ( 3250 self._match(TokenType.ALIAS) 3251 and self._match_texts(("STRUCT", "VALUE")) 3252 and self._prev.text.upper() 3253 ) 3254 3255 if distinct: 3256 distinct = self.expression( 3257 exp.Distinct, 3258 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3259 ) 3260 3261 if all_ and distinct: 3262 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3263 3264 operation_modifiers = [] 3265 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3266 operation_modifiers.append(exp.var(self._prev.text.upper())) 3267 3268 limit = self._parse_limit(top=True) 3269 projections = self._parse_projections() 3270 3271 this = self.expression( 3272 exp.Select, 3273 kind=kind, 3274 hint=hint, 3275 distinct=distinct, 3276 expressions=projections, 3277 limit=limit, 3278 operation_modifiers=operation_modifiers or None, 3279 ) 3280 this.comments = comments 3281 3282 into = self._parse_into() 3283 if into: 3284 this.set("into", into) 3285 3286 if not from_: 3287 from_ = self._parse_from() 3288 3289 if from_: 3290 this.set("from", from_) 3291 3292 this = self._parse_query_modifiers(this) 3293 elif (table or nested) and self._match(TokenType.L_PAREN): 3294 this = self._parse_wrapped_select(table=table) 3295 3296 # We return early here so that the UNION isn't attached to the subquery by the 3297 # following call to _parse_set_operations, but instead becomes the parent node 3298 self._match_r_paren() 3299 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3300 elif self._match(TokenType.VALUES, advance=False): 3301 this = self._parse_derived_table_values() 3302 elif from_: 3303 this = exp.select("*").from_(from_.this, copy=False) 3304 elif self._match(TokenType.SUMMARIZE): 3305 table = self._match(TokenType.TABLE) 3306 this = self._parse_select() or self._parse_string() or self._parse_table() 3307 return self.expression(exp.Summarize, this=this, table=table) 3308 elif self._match(TokenType.DESCRIBE): 3309 this = self._parse_describe() 3310 elif self._match_text_seq("STREAM"): 3311 this = self._parse_function() 3312 if this: 3313 this = self.expression(exp.Stream, this=this) 3314 else: 3315 self._retreat(self._index - 1) 3316 else: 3317 this = None 3318 3319 return self._parse_set_operations(this) if parse_set_operation else this 3320 3321 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3322 self._match_text_seq("SEARCH") 3323 3324 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3325 3326 if not kind: 3327 return None 3328 3329 self._match_text_seq("FIRST", "BY") 3330 3331 return self.expression( 3332 exp.RecursiveWithSearch, 3333 kind=kind, 3334 this=self._parse_id_var(), 3335 expression=self._match_text_seq("SET") and self._parse_id_var(), 3336 using=self._match_text_seq("USING") and self._parse_id_var(), 3337 ) 3338 3339 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3340 if not skip_with_token and not self._match(TokenType.WITH): 3341 return None 3342 3343 comments = self._prev_comments 3344 recursive = self._match(TokenType.RECURSIVE) 3345 3346 last_comments = None 3347 expressions = [] 3348 while True: 3349 cte = self._parse_cte() 3350 if isinstance(cte, exp.CTE): 3351 expressions.append(cte) 3352 if last_comments: 3353 cte.add_comments(last_comments) 3354 3355 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3356 break 3357 else: 3358 self._match(TokenType.WITH) 3359 3360 last_comments = self._prev_comments 3361 3362 return self.expression( 3363 exp.With, 3364 comments=comments, 3365 expressions=expressions, 3366 recursive=recursive, 3367 search=self._parse_recursive_with_search(), 3368 ) 3369 3370 def _parse_cte(self) -> t.Optional[exp.CTE]: 3371 index = self._index 3372 3373 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3374 if not alias or not alias.this: 3375 self.raise_error("Expected CTE to have alias") 3376 3377 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3378 self._retreat(index) 3379 return None 3380 3381 comments = self._prev_comments 3382 3383 if self._match_text_seq("NOT", "MATERIALIZED"): 3384 materialized = False 3385 elif self._match_text_seq("MATERIALIZED"): 3386 materialized = True 3387 else: 3388 materialized = None 3389 3390 cte = self.expression( 3391 exp.CTE, 3392 this=self._parse_wrapped(self._parse_statement), 3393 alias=alias, 3394 materialized=materialized, 3395 comments=comments, 3396 ) 3397 3398 values = cte.this 3399 if isinstance(values, exp.Values): 3400 if values.alias: 3401 cte.set("this", exp.select("*").from_(values)) 3402 else: 3403 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3404 3405 return cte 3406 3407 def _parse_table_alias( 3408 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3409 ) -> t.Optional[exp.TableAlias]: 3410 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3411 # so this section tries to parse the clause version and if it fails, it treats the token 3412 # as an identifier (alias) 3413 if self._can_parse_limit_or_offset(): 3414 return None 3415 3416 any_token = self._match(TokenType.ALIAS) 3417 alias = ( 3418 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3419 or self._parse_string_as_identifier() 3420 ) 3421 3422 index = self._index 3423 if self._match(TokenType.L_PAREN): 3424 columns = self._parse_csv(self._parse_function_parameter) 3425 self._match_r_paren() if columns else self._retreat(index) 3426 else: 3427 columns = None 3428 3429 if not alias and not columns: 3430 return None 3431 3432 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3433 3434 # We bubble up comments from the Identifier to the TableAlias 3435 if isinstance(alias, exp.Identifier): 3436 table_alias.add_comments(alias.pop_comments()) 3437 3438 return table_alias 3439 3440 def _parse_subquery( 3441 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3442 ) -> t.Optional[exp.Subquery]: 3443 if not this: 3444 return None 3445 3446 return self.expression( 3447 exp.Subquery, 3448 this=this, 3449 pivots=self._parse_pivots(), 3450 alias=self._parse_table_alias() if parse_alias else None, 3451 sample=self._parse_table_sample(), 3452 ) 3453 3454 def _implicit_unnests_to_explicit(self, this: E) -> E: 3455 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3456 3457 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3458 for i, join in enumerate(this.args.get("joins") or []): 3459 table = join.this 3460 normalized_table = table.copy() 3461 normalized_table.meta["maybe_column"] = True 3462 normalized_table = _norm(normalized_table, dialect=self.dialect) 3463 3464 if isinstance(table, exp.Table) and not join.args.get("on"): 3465 if normalized_table.parts[0].name in refs: 3466 table_as_column = table.to_column() 3467 unnest = exp.Unnest(expressions=[table_as_column]) 3468 3469 # Table.to_column creates a parent Alias node that we want to convert to 3470 # a TableAlias and attach to the Unnest, so it matches the parser's output 3471 if isinstance(table.args.get("alias"), exp.TableAlias): 3472 table_as_column.replace(table_as_column.this) 3473 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3474 3475 table.replace(unnest) 3476 3477 refs.add(normalized_table.alias_or_name) 3478 3479 return this 3480 3481 def _parse_query_modifiers( 3482 self, this: t.Optional[exp.Expression] 3483 ) -> t.Optional[exp.Expression]: 3484 if isinstance(this, self.MODIFIABLES): 3485 for join in self._parse_joins(): 3486 this.append("joins", join) 3487 for lateral in iter(self._parse_lateral, None): 3488 this.append("laterals", lateral) 3489 3490 while True: 3491 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3492 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3493 key, expression = parser(self) 3494 3495 if expression: 3496 this.set(key, expression) 3497 if key == "limit": 3498 offset = expression.args.pop("offset", None) 3499 3500 if offset: 3501 offset = exp.Offset(expression=offset) 3502 this.set("offset", offset) 3503 3504 limit_by_expressions = expression.expressions 3505 expression.set("expressions", None) 3506 offset.set("expressions", limit_by_expressions) 3507 continue 3508 break 3509 3510 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3511 this = self._implicit_unnests_to_explicit(this) 3512 3513 return this 3514 3515 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3516 start = self._curr 3517 while self._curr: 3518 self._advance() 3519 3520 end = self._tokens[self._index - 1] 3521 return exp.Hint(expressions=[self._find_sql(start, end)]) 3522 3523 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3524 return self._parse_function_call() 3525 3526 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3527 start_index = self._index 3528 should_fallback_to_string = False 3529 3530 hints = [] 3531 try: 3532 for hint in iter( 3533 lambda: self._parse_csv( 3534 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3535 ), 3536 [], 3537 ): 3538 hints.extend(hint) 3539 except ParseError: 3540 should_fallback_to_string = True 3541 3542 if should_fallback_to_string or self._curr: 3543 self._retreat(start_index) 3544 return self._parse_hint_fallback_to_string() 3545 3546 return self.expression(exp.Hint, expressions=hints) 3547 3548 def _parse_hint(self) -> t.Optional[exp.Hint]: 3549 if self._match(TokenType.HINT) and self._prev_comments: 3550 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3551 3552 return None 3553 3554 def _parse_into(self) -> t.Optional[exp.Into]: 3555 if not self._match(TokenType.INTO): 3556 return None 3557 3558 temp = self._match(TokenType.TEMPORARY) 3559 unlogged = self._match_text_seq("UNLOGGED") 3560 self._match(TokenType.TABLE) 3561 3562 return self.expression( 3563 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3564 ) 3565 3566 def _parse_from( 3567 self, 3568 joins: bool = False, 3569 skip_from_token: bool = False, 3570 consume_pipe: bool = False, 3571 ) -> t.Optional[exp.From]: 3572 if not skip_from_token and not self._match(TokenType.FROM): 3573 return None 3574 3575 return self.expression( 3576 exp.From, 3577 comments=self._prev_comments, 3578 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3579 ) 3580 3581 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3582 return self.expression( 3583 exp.MatchRecognizeMeasure, 3584 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3585 this=self._parse_expression(), 3586 ) 3587 3588 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3589 if not self._match(TokenType.MATCH_RECOGNIZE): 3590 return None 3591 3592 self._match_l_paren() 3593 3594 partition = self._parse_partition_by() 3595 order = self._parse_order() 3596 3597 measures = ( 3598 self._parse_csv(self._parse_match_recognize_measure) 3599 if self._match_text_seq("MEASURES") 3600 else None 3601 ) 3602 3603 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3604 rows = exp.var("ONE ROW PER MATCH") 3605 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3606 text = "ALL ROWS PER MATCH" 3607 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3608 text += " SHOW EMPTY MATCHES" 3609 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3610 text += " OMIT EMPTY MATCHES" 3611 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3612 text += " WITH UNMATCHED ROWS" 3613 rows = exp.var(text) 3614 else: 3615 rows = None 3616 3617 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3618 text = "AFTER MATCH SKIP" 3619 if self._match_text_seq("PAST", "LAST", "ROW"): 3620 text += " PAST LAST ROW" 3621 elif self._match_text_seq("TO", "NEXT", "ROW"): 3622 text += " TO NEXT ROW" 3623 elif self._match_text_seq("TO", "FIRST"): 3624 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3625 elif self._match_text_seq("TO", "LAST"): 3626 text += f" TO LAST {self._advance_any().text}" # type: ignore 3627 after = exp.var(text) 3628 else: 3629 after = None 3630 3631 if self._match_text_seq("PATTERN"): 3632 self._match_l_paren() 3633 3634 if not self._curr: 3635 self.raise_error("Expecting )", self._curr) 3636 3637 paren = 1 3638 start = self._curr 3639 3640 while self._curr and paren > 0: 3641 if self._curr.token_type == TokenType.L_PAREN: 3642 paren += 1 3643 if self._curr.token_type == TokenType.R_PAREN: 3644 paren -= 1 3645 3646 end = self._prev 3647 self._advance() 3648 3649 if paren > 0: 3650 self.raise_error("Expecting )", self._curr) 3651 3652 pattern = exp.var(self._find_sql(start, end)) 3653 else: 3654 pattern = None 3655 3656 define = ( 3657 self._parse_csv(self._parse_name_as_expression) 3658 if self._match_text_seq("DEFINE") 3659 else None 3660 ) 3661 3662 self._match_r_paren() 3663 3664 return self.expression( 3665 exp.MatchRecognize, 3666 partition_by=partition, 3667 order=order, 3668 measures=measures, 3669 rows=rows, 3670 after=after, 3671 pattern=pattern, 3672 define=define, 3673 alias=self._parse_table_alias(), 3674 ) 3675 3676 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3677 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3678 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3679 cross_apply = False 3680 3681 if cross_apply is not None: 3682 this = self._parse_select(table=True) 3683 view = None 3684 outer = None 3685 elif self._match(TokenType.LATERAL): 3686 this = self._parse_select(table=True) 3687 view = self._match(TokenType.VIEW) 3688 outer = self._match(TokenType.OUTER) 3689 else: 3690 return None 3691 3692 if not this: 3693 this = ( 3694 self._parse_unnest() 3695 or self._parse_function() 3696 or self._parse_id_var(any_token=False) 3697 ) 3698 3699 while self._match(TokenType.DOT): 3700 this = exp.Dot( 3701 this=this, 3702 expression=self._parse_function() or self._parse_id_var(any_token=False), 3703 ) 3704 3705 ordinality: t.Optional[bool] = None 3706 3707 if view: 3708 table = self._parse_id_var(any_token=False) 3709 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3710 table_alias: t.Optional[exp.TableAlias] = self.expression( 3711 exp.TableAlias, this=table, columns=columns 3712 ) 3713 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3714 # We move the alias from the lateral's child node to the lateral itself 3715 table_alias = this.args["alias"].pop() 3716 else: 3717 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3718 table_alias = self._parse_table_alias() 3719 3720 return self.expression( 3721 exp.Lateral, 3722 this=this, 3723 view=view, 3724 outer=outer, 3725 alias=table_alias, 3726 cross_apply=cross_apply, 3727 ordinality=ordinality, 3728 ) 3729 3730 def _parse_join_parts( 3731 self, 3732 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3733 return ( 3734 self._match_set(self.JOIN_METHODS) and self._prev, 3735 self._match_set(self.JOIN_SIDES) and self._prev, 3736 self._match_set(self.JOIN_KINDS) and self._prev, 3737 ) 3738 3739 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3740 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3741 this = self._parse_column() 3742 if isinstance(this, exp.Column): 3743 return this.this 3744 return this 3745 3746 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3747 3748 def _parse_join( 3749 self, skip_join_token: bool = False, parse_bracket: bool = False 3750 ) -> t.Optional[exp.Join]: 3751 if self._match(TokenType.COMMA): 3752 table = self._try_parse(self._parse_table) 3753 cross_join = self.expression(exp.Join, this=table) if table else None 3754 3755 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3756 cross_join.set("kind", "CROSS") 3757 3758 return cross_join 3759 3760 index = self._index 3761 method, side, kind = self._parse_join_parts() 3762 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3763 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3764 3765 if not skip_join_token and not join: 3766 self._retreat(index) 3767 kind = None 3768 method = None 3769 side = None 3770 3771 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3772 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3773 3774 if not skip_join_token and not join and not outer_apply and not cross_apply: 3775 return None 3776 3777 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3778 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3779 kwargs["expressions"] = self._parse_csv( 3780 lambda: self._parse_table(parse_bracket=parse_bracket) 3781 ) 3782 3783 if method: 3784 kwargs["method"] = method.text 3785 if side: 3786 kwargs["side"] = side.text 3787 if kind: 3788 kwargs["kind"] = kind.text 3789 if hint: 3790 kwargs["hint"] = hint 3791 3792 if self._match(TokenType.MATCH_CONDITION): 3793 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3794 3795 if self._match(TokenType.ON): 3796 kwargs["on"] = self._parse_assignment() 3797 elif self._match(TokenType.USING): 3798 kwargs["using"] = self._parse_using_identifiers() 3799 elif ( 3800 not (outer_apply or cross_apply) 3801 and not isinstance(kwargs["this"], exp.Unnest) 3802 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3803 ): 3804 index = self._index 3805 joins: t.Optional[list] = list(self._parse_joins()) 3806 3807 if joins and self._match(TokenType.ON): 3808 kwargs["on"] = self._parse_assignment() 3809 elif joins and self._match(TokenType.USING): 3810 kwargs["using"] = self._parse_using_identifiers() 3811 else: 3812 joins = None 3813 self._retreat(index) 3814 3815 kwargs["this"].set("joins", joins if joins else None) 3816 3817 kwargs["pivots"] = self._parse_pivots() 3818 3819 comments = [c for token in (method, side, kind) if token for c in token.comments] 3820 return self.expression(exp.Join, comments=comments, **kwargs) 3821 3822 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3823 this = self._parse_assignment() 3824 3825 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3826 return this 3827 3828 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3829 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3830 3831 return this 3832 3833 def _parse_index_params(self) -> exp.IndexParameters: 3834 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3835 3836 if self._match(TokenType.L_PAREN, advance=False): 3837 columns = self._parse_wrapped_csv(self._parse_with_operator) 3838 else: 3839 columns = None 3840 3841 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3842 partition_by = self._parse_partition_by() 3843 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3844 tablespace = ( 3845 self._parse_var(any_token=True) 3846 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3847 else None 3848 ) 3849 where = self._parse_where() 3850 3851 on = self._parse_field() if self._match(TokenType.ON) else None 3852 3853 return self.expression( 3854 exp.IndexParameters, 3855 using=using, 3856 columns=columns, 3857 include=include, 3858 partition_by=partition_by, 3859 where=where, 3860 with_storage=with_storage, 3861 tablespace=tablespace, 3862 on=on, 3863 ) 3864 3865 def _parse_index( 3866 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3867 ) -> t.Optional[exp.Index]: 3868 if index or anonymous: 3869 unique = None 3870 primary = None 3871 amp = None 3872 3873 self._match(TokenType.ON) 3874 self._match(TokenType.TABLE) # hive 3875 table = self._parse_table_parts(schema=True) 3876 else: 3877 unique = self._match(TokenType.UNIQUE) 3878 primary = self._match_text_seq("PRIMARY") 3879 amp = self._match_text_seq("AMP") 3880 3881 if not self._match(TokenType.INDEX): 3882 return None 3883 3884 index = self._parse_id_var() 3885 table = None 3886 3887 params = self._parse_index_params() 3888 3889 return self.expression( 3890 exp.Index, 3891 this=index, 3892 table=table, 3893 unique=unique, 3894 primary=primary, 3895 amp=amp, 3896 params=params, 3897 ) 3898 3899 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3900 hints: t.List[exp.Expression] = [] 3901 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3902 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3903 hints.append( 3904 self.expression( 3905 exp.WithTableHint, 3906 expressions=self._parse_csv( 3907 lambda: self._parse_function() or self._parse_var(any_token=True) 3908 ), 3909 ) 3910 ) 3911 self._match_r_paren() 3912 else: 3913 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3914 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3915 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3916 3917 self._match_set((TokenType.INDEX, TokenType.KEY)) 3918 if self._match(TokenType.FOR): 3919 hint.set("target", self._advance_any() and self._prev.text.upper()) 3920 3921 hint.set("expressions", self._parse_wrapped_id_vars()) 3922 hints.append(hint) 3923 3924 return hints or None 3925 3926 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3927 return ( 3928 (not schema and self._parse_function(optional_parens=False)) 3929 or self._parse_id_var(any_token=False) 3930 or self._parse_string_as_identifier() 3931 or self._parse_placeholder() 3932 ) 3933 3934 def _parse_table_parts( 3935 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3936 ) -> exp.Table: 3937 catalog = None 3938 db = None 3939 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3940 3941 while self._match(TokenType.DOT): 3942 if catalog: 3943 # This allows nesting the table in arbitrarily many dot expressions if needed 3944 table = self.expression( 3945 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3946 ) 3947 else: 3948 catalog = db 3949 db = table 3950 # "" used for tsql FROM a..b case 3951 table = self._parse_table_part(schema=schema) or "" 3952 3953 if ( 3954 wildcard 3955 and self._is_connected() 3956 and (isinstance(table, exp.Identifier) or not table) 3957 and self._match(TokenType.STAR) 3958 ): 3959 if isinstance(table, exp.Identifier): 3960 table.args["this"] += "*" 3961 else: 3962 table = exp.Identifier(this="*") 3963 3964 # We bubble up comments from the Identifier to the Table 3965 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3966 3967 if is_db_reference: 3968 catalog = db 3969 db = table 3970 table = None 3971 3972 if not table and not is_db_reference: 3973 self.raise_error(f"Expected table name but got {self._curr}") 3974 if not db and is_db_reference: 3975 self.raise_error(f"Expected database name but got {self._curr}") 3976 3977 table = self.expression( 3978 exp.Table, 3979 comments=comments, 3980 this=table, 3981 db=db, 3982 catalog=catalog, 3983 ) 3984 3985 changes = self._parse_changes() 3986 if changes: 3987 table.set("changes", changes) 3988 3989 at_before = self._parse_historical_data() 3990 if at_before: 3991 table.set("when", at_before) 3992 3993 pivots = self._parse_pivots() 3994 if pivots: 3995 table.set("pivots", pivots) 3996 3997 return table 3998 3999 def _parse_table( 4000 self, 4001 schema: bool = False, 4002 joins: bool = False, 4003 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4004 parse_bracket: bool = False, 4005 is_db_reference: bool = False, 4006 parse_partition: bool = False, 4007 consume_pipe: bool = False, 4008 ) -> t.Optional[exp.Expression]: 4009 lateral = self._parse_lateral() 4010 if lateral: 4011 return lateral 4012 4013 unnest = self._parse_unnest() 4014 if unnest: 4015 return unnest 4016 4017 values = self._parse_derived_table_values() 4018 if values: 4019 return values 4020 4021 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4022 if subquery: 4023 if not subquery.args.get("pivots"): 4024 subquery.set("pivots", self._parse_pivots()) 4025 return subquery 4026 4027 bracket = parse_bracket and self._parse_bracket(None) 4028 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4029 4030 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4031 self._parse_table 4032 ) 4033 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4034 4035 only = self._match(TokenType.ONLY) 4036 4037 this = t.cast( 4038 exp.Expression, 4039 bracket 4040 or rows_from 4041 or self._parse_bracket( 4042 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4043 ), 4044 ) 4045 4046 if only: 4047 this.set("only", only) 4048 4049 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4050 self._match_text_seq("*") 4051 4052 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4053 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4054 this.set("partition", self._parse_partition()) 4055 4056 if schema: 4057 return self._parse_schema(this=this) 4058 4059 version = self._parse_version() 4060 4061 if version: 4062 this.set("version", version) 4063 4064 if self.dialect.ALIAS_POST_TABLESAMPLE: 4065 this.set("sample", self._parse_table_sample()) 4066 4067 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4068 if alias: 4069 this.set("alias", alias) 4070 4071 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4072 return self.expression( 4073 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4074 ) 4075 4076 this.set("hints", self._parse_table_hints()) 4077 4078 if not this.args.get("pivots"): 4079 this.set("pivots", self._parse_pivots()) 4080 4081 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4082 this.set("sample", self._parse_table_sample()) 4083 4084 if joins: 4085 for join in self._parse_joins(): 4086 this.append("joins", join) 4087 4088 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4089 this.set("ordinality", True) 4090 this.set("alias", self._parse_table_alias()) 4091 4092 return this 4093 4094 def _parse_version(self) -> t.Optional[exp.Version]: 4095 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4096 this = "TIMESTAMP" 4097 elif self._match(TokenType.VERSION_SNAPSHOT): 4098 this = "VERSION" 4099 else: 4100 return None 4101 4102 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4103 kind = self._prev.text.upper() 4104 start = self._parse_bitwise() 4105 self._match_texts(("TO", "AND")) 4106 end = self._parse_bitwise() 4107 expression: t.Optional[exp.Expression] = self.expression( 4108 exp.Tuple, expressions=[start, end] 4109 ) 4110 elif self._match_text_seq("CONTAINED", "IN"): 4111 kind = "CONTAINED IN" 4112 expression = self.expression( 4113 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4114 ) 4115 elif self._match(TokenType.ALL): 4116 kind = "ALL" 4117 expression = None 4118 else: 4119 self._match_text_seq("AS", "OF") 4120 kind = "AS OF" 4121 expression = self._parse_type() 4122 4123 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4124 4125 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4126 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4127 index = self._index 4128 historical_data = None 4129 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4130 this = self._prev.text.upper() 4131 kind = ( 4132 self._match(TokenType.L_PAREN) 4133 and self._match_texts(self.HISTORICAL_DATA_KIND) 4134 and self._prev.text.upper() 4135 ) 4136 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4137 4138 if expression: 4139 self._match_r_paren() 4140 historical_data = self.expression( 4141 exp.HistoricalData, this=this, kind=kind, expression=expression 4142 ) 4143 else: 4144 self._retreat(index) 4145 4146 return historical_data 4147 4148 def _parse_changes(self) -> t.Optional[exp.Changes]: 4149 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4150 return None 4151 4152 information = self._parse_var(any_token=True) 4153 self._match_r_paren() 4154 4155 return self.expression( 4156 exp.Changes, 4157 information=information, 4158 at_before=self._parse_historical_data(), 4159 end=self._parse_historical_data(), 4160 ) 4161 4162 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4163 if not self._match(TokenType.UNNEST): 4164 return None 4165 4166 expressions = self._parse_wrapped_csv(self._parse_equality) 4167 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4168 4169 alias = self._parse_table_alias() if with_alias else None 4170 4171 if alias: 4172 if self.dialect.UNNEST_COLUMN_ONLY: 4173 if alias.args.get("columns"): 4174 self.raise_error("Unexpected extra column alias in unnest.") 4175 4176 alias.set("columns", [alias.this]) 4177 alias.set("this", None) 4178 4179 columns = alias.args.get("columns") or [] 4180 if offset and len(expressions) < len(columns): 4181 offset = columns.pop() 4182 4183 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4184 self._match(TokenType.ALIAS) 4185 offset = self._parse_id_var( 4186 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4187 ) or exp.to_identifier("offset") 4188 4189 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4190 4191 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4192 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4193 if not is_derived and not ( 4194 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4195 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4196 ): 4197 return None 4198 4199 expressions = self._parse_csv(self._parse_value) 4200 alias = self._parse_table_alias() 4201 4202 if is_derived: 4203 self._match_r_paren() 4204 4205 return self.expression( 4206 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4207 ) 4208 4209 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4210 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4211 as_modifier and self._match_text_seq("USING", "SAMPLE") 4212 ): 4213 return None 4214 4215 bucket_numerator = None 4216 bucket_denominator = None 4217 bucket_field = None 4218 percent = None 4219 size = None 4220 seed = None 4221 4222 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4223 matched_l_paren = self._match(TokenType.L_PAREN) 4224 4225 if self.TABLESAMPLE_CSV: 4226 num = None 4227 expressions = self._parse_csv(self._parse_primary) 4228 else: 4229 expressions = None 4230 num = ( 4231 self._parse_factor() 4232 if self._match(TokenType.NUMBER, advance=False) 4233 else self._parse_primary() or self._parse_placeholder() 4234 ) 4235 4236 if self._match_text_seq("BUCKET"): 4237 bucket_numerator = self._parse_number() 4238 self._match_text_seq("OUT", "OF") 4239 bucket_denominator = bucket_denominator = self._parse_number() 4240 self._match(TokenType.ON) 4241 bucket_field = self._parse_field() 4242 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4243 percent = num 4244 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4245 size = num 4246 else: 4247 percent = num 4248 4249 if matched_l_paren: 4250 self._match_r_paren() 4251 4252 if self._match(TokenType.L_PAREN): 4253 method = self._parse_var(upper=True) 4254 seed = self._match(TokenType.COMMA) and self._parse_number() 4255 self._match_r_paren() 4256 elif self._match_texts(("SEED", "REPEATABLE")): 4257 seed = self._parse_wrapped(self._parse_number) 4258 4259 if not method and self.DEFAULT_SAMPLING_METHOD: 4260 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4261 4262 return self.expression( 4263 exp.TableSample, 4264 expressions=expressions, 4265 method=method, 4266 bucket_numerator=bucket_numerator, 4267 bucket_denominator=bucket_denominator, 4268 bucket_field=bucket_field, 4269 percent=percent, 4270 size=size, 4271 seed=seed, 4272 ) 4273 4274 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4275 return list(iter(self._parse_pivot, None)) or None 4276 4277 def _parse_joins(self) -> t.Iterator[exp.Join]: 4278 return iter(self._parse_join, None) 4279 4280 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4281 if not self._match(TokenType.INTO): 4282 return None 4283 4284 return self.expression( 4285 exp.UnpivotColumns, 4286 this=self._match_text_seq("NAME") and self._parse_column(), 4287 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4288 ) 4289 4290 # https://duckdb.org/docs/sql/statements/pivot 4291 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4292 def _parse_on() -> t.Optional[exp.Expression]: 4293 this = self._parse_bitwise() 4294 4295 if self._match(TokenType.IN): 4296 # PIVOT ... ON col IN (row_val1, row_val2) 4297 return self._parse_in(this) 4298 if self._match(TokenType.ALIAS, advance=False): 4299 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4300 return self._parse_alias(this) 4301 4302 return this 4303 4304 this = self._parse_table() 4305 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4306 into = self._parse_unpivot_columns() 4307 using = self._match(TokenType.USING) and self._parse_csv( 4308 lambda: self._parse_alias(self._parse_function()) 4309 ) 4310 group = self._parse_group() 4311 4312 return self.expression( 4313 exp.Pivot, 4314 this=this, 4315 expressions=expressions, 4316 using=using, 4317 group=group, 4318 unpivot=is_unpivot, 4319 into=into, 4320 ) 4321 4322 def _parse_pivot_in(self) -> exp.In: 4323 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4324 this = self._parse_select_or_expression() 4325 4326 self._match(TokenType.ALIAS) 4327 alias = self._parse_bitwise() 4328 if alias: 4329 if isinstance(alias, exp.Column) and not alias.db: 4330 alias = alias.this 4331 return self.expression(exp.PivotAlias, this=this, alias=alias) 4332 4333 return this 4334 4335 value = self._parse_column() 4336 4337 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4338 self.raise_error("Expecting IN (") 4339 4340 if self._match(TokenType.ANY): 4341 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4342 else: 4343 exprs = self._parse_csv(_parse_aliased_expression) 4344 4345 self._match_r_paren() 4346 return self.expression(exp.In, this=value, expressions=exprs) 4347 4348 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4349 index = self._index 4350 include_nulls = None 4351 4352 if self._match(TokenType.PIVOT): 4353 unpivot = False 4354 elif self._match(TokenType.UNPIVOT): 4355 unpivot = True 4356 4357 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4358 if self._match_text_seq("INCLUDE", "NULLS"): 4359 include_nulls = True 4360 elif self._match_text_seq("EXCLUDE", "NULLS"): 4361 include_nulls = False 4362 else: 4363 return None 4364 4365 expressions = [] 4366 4367 if not self._match(TokenType.L_PAREN): 4368 self._retreat(index) 4369 return None 4370 4371 if unpivot: 4372 expressions = self._parse_csv(self._parse_column) 4373 else: 4374 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4375 4376 if not expressions: 4377 self.raise_error("Failed to parse PIVOT's aggregation list") 4378 4379 if not self._match(TokenType.FOR): 4380 self.raise_error("Expecting FOR") 4381 4382 fields = [] 4383 while True: 4384 field = self._try_parse(self._parse_pivot_in) 4385 if not field: 4386 break 4387 fields.append(field) 4388 4389 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4390 self._parse_bitwise 4391 ) 4392 4393 group = self._parse_group() 4394 4395 self._match_r_paren() 4396 4397 pivot = self.expression( 4398 exp.Pivot, 4399 expressions=expressions, 4400 fields=fields, 4401 unpivot=unpivot, 4402 include_nulls=include_nulls, 4403 default_on_null=default_on_null, 4404 group=group, 4405 ) 4406 4407 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4408 pivot.set("alias", self._parse_table_alias()) 4409 4410 if not unpivot: 4411 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4412 4413 columns: t.List[exp.Expression] = [] 4414 all_fields = [] 4415 for pivot_field in pivot.fields: 4416 pivot_field_expressions = pivot_field.expressions 4417 4418 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4419 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4420 continue 4421 4422 all_fields.append( 4423 [ 4424 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4425 for fld in pivot_field_expressions 4426 ] 4427 ) 4428 4429 if all_fields: 4430 if names: 4431 all_fields.append(names) 4432 4433 # Generate all possible combinations of the pivot columns 4434 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4435 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4436 for fld_parts_tuple in itertools.product(*all_fields): 4437 fld_parts = list(fld_parts_tuple) 4438 4439 if names and self.PREFIXED_PIVOT_COLUMNS: 4440 # Move the "name" to the front of the list 4441 fld_parts.insert(0, fld_parts.pop(-1)) 4442 4443 columns.append(exp.to_identifier("_".join(fld_parts))) 4444 4445 pivot.set("columns", columns) 4446 4447 return pivot 4448 4449 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4450 return [agg.alias for agg in aggregations if agg.alias] 4451 4452 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4453 if not skip_where_token and not self._match(TokenType.PREWHERE): 4454 return None 4455 4456 return self.expression( 4457 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4458 ) 4459 4460 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4461 if not skip_where_token and not self._match(TokenType.WHERE): 4462 return None 4463 4464 return self.expression( 4465 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4466 ) 4467 4468 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4469 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4470 return None 4471 4472 elements: t.Dict[str, t.Any] = defaultdict(list) 4473 4474 if self._match(TokenType.ALL): 4475 elements["all"] = True 4476 elif self._match(TokenType.DISTINCT): 4477 elements["all"] = False 4478 4479 while True: 4480 index = self._index 4481 4482 elements["expressions"].extend( 4483 self._parse_csv( 4484 lambda: None 4485 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4486 else self._parse_assignment() 4487 ) 4488 ) 4489 4490 before_with_index = self._index 4491 with_prefix = self._match(TokenType.WITH) 4492 4493 if self._match(TokenType.ROLLUP): 4494 elements["rollup"].append( 4495 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4496 ) 4497 elif self._match(TokenType.CUBE): 4498 elements["cube"].append( 4499 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4500 ) 4501 elif self._match(TokenType.GROUPING_SETS): 4502 elements["grouping_sets"].append( 4503 self.expression( 4504 exp.GroupingSets, 4505 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4506 ) 4507 ) 4508 elif self._match_text_seq("TOTALS"): 4509 elements["totals"] = True # type: ignore 4510 4511 if before_with_index <= self._index <= before_with_index + 1: 4512 self._retreat(before_with_index) 4513 break 4514 4515 if index == self._index: 4516 break 4517 4518 return self.expression(exp.Group, **elements) # type: ignore 4519 4520 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4521 return self.expression( 4522 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4523 ) 4524 4525 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4526 if self._match(TokenType.L_PAREN): 4527 grouping_set = self._parse_csv(self._parse_column) 4528 self._match_r_paren() 4529 return self.expression(exp.Tuple, expressions=grouping_set) 4530 4531 return self._parse_column() 4532 4533 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4534 if not skip_having_token and not self._match(TokenType.HAVING): 4535 return None 4536 return self.expression(exp.Having, this=self._parse_assignment()) 4537 4538 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4539 if not self._match(TokenType.QUALIFY): 4540 return None 4541 return self.expression(exp.Qualify, this=self._parse_assignment()) 4542 4543 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4544 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4545 exp.Prior, this=self._parse_bitwise() 4546 ) 4547 connect = self._parse_assignment() 4548 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4549 return connect 4550 4551 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4552 if skip_start_token: 4553 start = None 4554 elif self._match(TokenType.START_WITH): 4555 start = self._parse_assignment() 4556 else: 4557 return None 4558 4559 self._match(TokenType.CONNECT_BY) 4560 nocycle = self._match_text_seq("NOCYCLE") 4561 connect = self._parse_connect_with_prior() 4562 4563 if not start and self._match(TokenType.START_WITH): 4564 start = self._parse_assignment() 4565 4566 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4567 4568 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4569 this = self._parse_id_var(any_token=True) 4570 if self._match(TokenType.ALIAS): 4571 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4572 return this 4573 4574 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4575 if self._match_text_seq("INTERPOLATE"): 4576 return self._parse_wrapped_csv(self._parse_name_as_expression) 4577 return None 4578 4579 def _parse_order( 4580 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4581 ) -> t.Optional[exp.Expression]: 4582 siblings = None 4583 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4584 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4585 return this 4586 4587 siblings = True 4588 4589 return self.expression( 4590 exp.Order, 4591 this=this, 4592 expressions=self._parse_csv(self._parse_ordered), 4593 siblings=siblings, 4594 ) 4595 4596 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4597 if not self._match(token): 4598 return None 4599 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4600 4601 def _parse_ordered( 4602 self, parse_method: t.Optional[t.Callable] = None 4603 ) -> t.Optional[exp.Ordered]: 4604 this = parse_method() if parse_method else self._parse_assignment() 4605 if not this: 4606 return None 4607 4608 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4609 this = exp.var("ALL") 4610 4611 asc = self._match(TokenType.ASC) 4612 desc = self._match(TokenType.DESC) or (asc and False) 4613 4614 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4615 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4616 4617 nulls_first = is_nulls_first or False 4618 explicitly_null_ordered = is_nulls_first or is_nulls_last 4619 4620 if ( 4621 not explicitly_null_ordered 4622 and ( 4623 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4624 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4625 ) 4626 and self.dialect.NULL_ORDERING != "nulls_are_last" 4627 ): 4628 nulls_first = True 4629 4630 if self._match_text_seq("WITH", "FILL"): 4631 with_fill = self.expression( 4632 exp.WithFill, 4633 **{ # type: ignore 4634 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4635 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4636 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4637 "interpolate": self._parse_interpolate(), 4638 }, 4639 ) 4640 else: 4641 with_fill = None 4642 4643 return self.expression( 4644 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4645 ) 4646 4647 def _parse_limit_options(self) -> exp.LimitOptions: 4648 percent = self._match(TokenType.PERCENT) 4649 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4650 self._match_text_seq("ONLY") 4651 with_ties = self._match_text_seq("WITH", "TIES") 4652 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4653 4654 def _parse_limit( 4655 self, 4656 this: t.Optional[exp.Expression] = None, 4657 top: bool = False, 4658 skip_limit_token: bool = False, 4659 ) -> t.Optional[exp.Expression]: 4660 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4661 comments = self._prev_comments 4662 if top: 4663 limit_paren = self._match(TokenType.L_PAREN) 4664 expression = self._parse_term() if limit_paren else self._parse_number() 4665 4666 if limit_paren: 4667 self._match_r_paren() 4668 4669 limit_options = self._parse_limit_options() 4670 else: 4671 limit_options = None 4672 expression = self._parse_term() 4673 4674 if self._match(TokenType.COMMA): 4675 offset = expression 4676 expression = self._parse_term() 4677 else: 4678 offset = None 4679 4680 limit_exp = self.expression( 4681 exp.Limit, 4682 this=this, 4683 expression=expression, 4684 offset=offset, 4685 comments=comments, 4686 limit_options=limit_options, 4687 expressions=self._parse_limit_by(), 4688 ) 4689 4690 return limit_exp 4691 4692 if self._match(TokenType.FETCH): 4693 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4694 direction = self._prev.text.upper() if direction else "FIRST" 4695 4696 count = self._parse_field(tokens=self.FETCH_TOKENS) 4697 4698 return self.expression( 4699 exp.Fetch, 4700 direction=direction, 4701 count=count, 4702 limit_options=self._parse_limit_options(), 4703 ) 4704 4705 return this 4706 4707 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4708 if not self._match(TokenType.OFFSET): 4709 return this 4710 4711 count = self._parse_term() 4712 self._match_set((TokenType.ROW, TokenType.ROWS)) 4713 4714 return self.expression( 4715 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4716 ) 4717 4718 def _can_parse_limit_or_offset(self) -> bool: 4719 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4720 return False 4721 4722 index = self._index 4723 result = bool( 4724 self._try_parse(self._parse_limit, retreat=True) 4725 or self._try_parse(self._parse_offset, retreat=True) 4726 ) 4727 self._retreat(index) 4728 return result 4729 4730 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4731 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4732 4733 def _parse_locks(self) -> t.List[exp.Lock]: 4734 locks = [] 4735 while True: 4736 if self._match_text_seq("FOR", "UPDATE"): 4737 update = True 4738 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4739 "LOCK", "IN", "SHARE", "MODE" 4740 ): 4741 update = False 4742 else: 4743 break 4744 4745 expressions = None 4746 if self._match_text_seq("OF"): 4747 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4748 4749 wait: t.Optional[bool | exp.Expression] = None 4750 if self._match_text_seq("NOWAIT"): 4751 wait = True 4752 elif self._match_text_seq("WAIT"): 4753 wait = self._parse_primary() 4754 elif self._match_text_seq("SKIP", "LOCKED"): 4755 wait = False 4756 4757 locks.append( 4758 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4759 ) 4760 4761 return locks 4762 4763 def parse_set_operation( 4764 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4765 ) -> t.Optional[exp.Expression]: 4766 start = self._index 4767 _, side_token, kind_token = self._parse_join_parts() 4768 4769 side = side_token.text if side_token else None 4770 kind = kind_token.text if kind_token else None 4771 4772 if not self._match_set(self.SET_OPERATIONS): 4773 self._retreat(start) 4774 return None 4775 4776 token_type = self._prev.token_type 4777 4778 if token_type == TokenType.UNION: 4779 operation: t.Type[exp.SetOperation] = exp.Union 4780 elif token_type == TokenType.EXCEPT: 4781 operation = exp.Except 4782 else: 4783 operation = exp.Intersect 4784 4785 comments = self._prev.comments 4786 4787 if self._match(TokenType.DISTINCT): 4788 distinct: t.Optional[bool] = True 4789 elif self._match(TokenType.ALL): 4790 distinct = False 4791 else: 4792 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4793 if distinct is None: 4794 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4795 4796 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4797 "STRICT", "CORRESPONDING" 4798 ) 4799 if self._match_text_seq("CORRESPONDING"): 4800 by_name = True 4801 if not side and not kind: 4802 kind = "INNER" 4803 4804 on_column_list = None 4805 if by_name and self._match_texts(("ON", "BY")): 4806 on_column_list = self._parse_wrapped_csv(self._parse_column) 4807 4808 expression = self._parse_select( 4809 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4810 ) 4811 4812 return self.expression( 4813 operation, 4814 comments=comments, 4815 this=this, 4816 distinct=distinct, 4817 by_name=by_name, 4818 expression=expression, 4819 side=side, 4820 kind=kind, 4821 on=on_column_list, 4822 ) 4823 4824 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4825 while this: 4826 setop = self.parse_set_operation(this) 4827 if not setop: 4828 break 4829 this = setop 4830 4831 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4832 expression = this.expression 4833 4834 if expression: 4835 for arg in self.SET_OP_MODIFIERS: 4836 expr = expression.args.get(arg) 4837 if expr: 4838 this.set(arg, expr.pop()) 4839 4840 return this 4841 4842 def _parse_expression(self) -> t.Optional[exp.Expression]: 4843 return self._parse_alias(self._parse_assignment()) 4844 4845 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4846 this = self._parse_disjunction() 4847 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4848 # This allows us to parse <non-identifier token> := <expr> 4849 this = exp.column( 4850 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4851 ) 4852 4853 while self._match_set(self.ASSIGNMENT): 4854 if isinstance(this, exp.Column) and len(this.parts) == 1: 4855 this = this.this 4856 4857 this = self.expression( 4858 self.ASSIGNMENT[self._prev.token_type], 4859 this=this, 4860 comments=self._prev_comments, 4861 expression=self._parse_assignment(), 4862 ) 4863 4864 return this 4865 4866 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4867 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4868 4869 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4870 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4871 4872 def _parse_equality(self) -> t.Optional[exp.Expression]: 4873 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4874 4875 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4876 return self._parse_tokens(self._parse_range, self.COMPARISON) 4877 4878 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4879 this = this or self._parse_bitwise() 4880 negate = self._match(TokenType.NOT) 4881 4882 if self._match_set(self.RANGE_PARSERS): 4883 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4884 if not expression: 4885 return this 4886 4887 this = expression 4888 elif self._match(TokenType.ISNULL): 4889 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4890 4891 # Postgres supports ISNULL and NOTNULL for conditions. 4892 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4893 if self._match(TokenType.NOTNULL): 4894 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4895 this = self.expression(exp.Not, this=this) 4896 4897 if negate: 4898 this = self._negate_range(this) 4899 4900 if self._match(TokenType.IS): 4901 this = self._parse_is(this) 4902 4903 return this 4904 4905 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4906 if not this: 4907 return this 4908 4909 return self.expression(exp.Not, this=this) 4910 4911 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4912 index = self._index - 1 4913 negate = self._match(TokenType.NOT) 4914 4915 if self._match_text_seq("DISTINCT", "FROM"): 4916 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4917 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4918 4919 if self._match(TokenType.JSON): 4920 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4921 4922 if self._match_text_seq("WITH"): 4923 _with = True 4924 elif self._match_text_seq("WITHOUT"): 4925 _with = False 4926 else: 4927 _with = None 4928 4929 unique = self._match(TokenType.UNIQUE) 4930 self._match_text_seq("KEYS") 4931 expression: t.Optional[exp.Expression] = self.expression( 4932 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4933 ) 4934 else: 4935 expression = self._parse_primary() or self._parse_null() 4936 if not expression: 4937 self._retreat(index) 4938 return None 4939 4940 this = self.expression(exp.Is, this=this, expression=expression) 4941 return self.expression(exp.Not, this=this) if negate else this 4942 4943 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4944 unnest = self._parse_unnest(with_alias=False) 4945 if unnest: 4946 this = self.expression(exp.In, this=this, unnest=unnest) 4947 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4948 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4949 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4950 4951 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4952 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4953 else: 4954 this = self.expression(exp.In, this=this, expressions=expressions) 4955 4956 if matched_l_paren: 4957 self._match_r_paren(this) 4958 elif not self._match(TokenType.R_BRACKET, expression=this): 4959 self.raise_error("Expecting ]") 4960 else: 4961 this = self.expression(exp.In, this=this, field=self._parse_column()) 4962 4963 return this 4964 4965 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4966 low = self._parse_bitwise() 4967 self._match(TokenType.AND) 4968 high = self._parse_bitwise() 4969 return self.expression(exp.Between, this=this, low=low, high=high) 4970 4971 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4972 if not self._match(TokenType.ESCAPE): 4973 return this 4974 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4975 4976 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4977 index = self._index 4978 4979 if not self._match(TokenType.INTERVAL) and match_interval: 4980 return None 4981 4982 if self._match(TokenType.STRING, advance=False): 4983 this = self._parse_primary() 4984 else: 4985 this = self._parse_term() 4986 4987 if not this or ( 4988 isinstance(this, exp.Column) 4989 and not this.table 4990 and not this.this.quoted 4991 and this.name.upper() == "IS" 4992 ): 4993 self._retreat(index) 4994 return None 4995 4996 unit = self._parse_function() or ( 4997 not self._match(TokenType.ALIAS, advance=False) 4998 and self._parse_var(any_token=True, upper=True) 4999 ) 5000 5001 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5002 # each INTERVAL expression into this canonical form so it's easy to transpile 5003 if this and this.is_number: 5004 this = exp.Literal.string(this.to_py()) 5005 elif this and this.is_string: 5006 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5007 if parts and unit: 5008 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5009 unit = None 5010 self._retreat(self._index - 1) 5011 5012 if len(parts) == 1: 5013 this = exp.Literal.string(parts[0][0]) 5014 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5015 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5016 unit = self.expression( 5017 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5018 ) 5019 5020 interval = self.expression(exp.Interval, this=this, unit=unit) 5021 5022 index = self._index 5023 self._match(TokenType.PLUS) 5024 5025 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5026 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5027 return self.expression( 5028 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5029 ) 5030 5031 self._retreat(index) 5032 return interval 5033 5034 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5035 this = self._parse_term() 5036 5037 while True: 5038 if self._match_set(self.BITWISE): 5039 this = self.expression( 5040 self.BITWISE[self._prev.token_type], 5041 this=this, 5042 expression=self._parse_term(), 5043 ) 5044 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5045 this = self.expression( 5046 exp.DPipe, 5047 this=this, 5048 expression=self._parse_term(), 5049 safe=not self.dialect.STRICT_STRING_CONCAT, 5050 ) 5051 elif self._match(TokenType.DQMARK): 5052 this = self.expression( 5053 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5054 ) 5055 elif self._match_pair(TokenType.LT, TokenType.LT): 5056 this = self.expression( 5057 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5058 ) 5059 elif self._match_pair(TokenType.GT, TokenType.GT): 5060 this = self.expression( 5061 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5062 ) 5063 else: 5064 break 5065 5066 return this 5067 5068 def _parse_term(self) -> t.Optional[exp.Expression]: 5069 this = self._parse_factor() 5070 5071 while self._match_set(self.TERM): 5072 klass = self.TERM[self._prev.token_type] 5073 comments = self._prev_comments 5074 expression = self._parse_factor() 5075 5076 this = self.expression(klass, this=this, comments=comments, expression=expression) 5077 5078 if isinstance(this, exp.Collate): 5079 expr = this.expression 5080 5081 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5082 # fallback to Identifier / Var 5083 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5084 ident = expr.this 5085 if isinstance(ident, exp.Identifier): 5086 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5087 5088 return this 5089 5090 def _parse_factor(self) -> t.Optional[exp.Expression]: 5091 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5092 this = parse_method() 5093 5094 while self._match_set(self.FACTOR): 5095 klass = self.FACTOR[self._prev.token_type] 5096 comments = self._prev_comments 5097 expression = parse_method() 5098 5099 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5100 self._retreat(self._index - 1) 5101 return this 5102 5103 this = self.expression(klass, this=this, comments=comments, expression=expression) 5104 5105 if isinstance(this, exp.Div): 5106 this.args["typed"] = self.dialect.TYPED_DIVISION 5107 this.args["safe"] = self.dialect.SAFE_DIVISION 5108 5109 return this 5110 5111 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5112 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5113 5114 def _parse_unary(self) -> t.Optional[exp.Expression]: 5115 if self._match_set(self.UNARY_PARSERS): 5116 return self.UNARY_PARSERS[self._prev.token_type](self) 5117 return self._parse_at_time_zone(self._parse_type()) 5118 5119 def _parse_type( 5120 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5121 ) -> t.Optional[exp.Expression]: 5122 interval = parse_interval and self._parse_interval() 5123 if interval: 5124 return interval 5125 5126 index = self._index 5127 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5128 5129 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5130 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5131 if isinstance(data_type, exp.Cast): 5132 # This constructor can contain ops directly after it, for instance struct unnesting: 5133 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5134 return self._parse_column_ops(data_type) 5135 5136 if data_type: 5137 index2 = self._index 5138 this = self._parse_primary() 5139 5140 if isinstance(this, exp.Literal): 5141 literal = this.name 5142 this = self._parse_column_ops(this) 5143 5144 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5145 if parser: 5146 return parser(self, this, data_type) 5147 5148 if ( 5149 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5150 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5151 and TIME_ZONE_RE.search(literal) 5152 ): 5153 data_type = exp.DataType.build("TIMESTAMPTZ") 5154 5155 return self.expression(exp.Cast, this=this, to=data_type) 5156 5157 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5158 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5159 # 5160 # If the index difference here is greater than 1, that means the parser itself must have 5161 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5162 # 5163 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5164 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5165 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5166 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5167 # 5168 # In these cases, we don't really want to return the converted type, but instead retreat 5169 # and try to parse a Column or Identifier in the section below. 5170 if data_type.expressions and index2 - index > 1: 5171 self._retreat(index2) 5172 return self._parse_column_ops(data_type) 5173 5174 self._retreat(index) 5175 5176 if fallback_to_identifier: 5177 return self._parse_id_var() 5178 5179 this = self._parse_column() 5180 return this and self._parse_column_ops(this) 5181 5182 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5183 this = self._parse_type() 5184 if not this: 5185 return None 5186 5187 if isinstance(this, exp.Column) and not this.table: 5188 this = exp.var(this.name.upper()) 5189 5190 return self.expression( 5191 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5192 ) 5193 5194 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5195 type_name = identifier.name 5196 5197 while self._match(TokenType.DOT): 5198 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5199 5200 return exp.DataType.build(type_name, udt=True) 5201 5202 def _parse_types( 5203 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5204 ) -> t.Optional[exp.Expression]: 5205 index = self._index 5206 5207 this: t.Optional[exp.Expression] = None 5208 prefix = self._match_text_seq("SYSUDTLIB", ".") 5209 5210 if not self._match_set(self.TYPE_TOKENS): 5211 identifier = allow_identifiers and self._parse_id_var( 5212 any_token=False, tokens=(TokenType.VAR,) 5213 ) 5214 if isinstance(identifier, exp.Identifier): 5215 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5216 5217 if len(tokens) != 1: 5218 self.raise_error("Unexpected identifier", self._prev) 5219 5220 if tokens[0].token_type in self.TYPE_TOKENS: 5221 self._prev = tokens[0] 5222 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5223 this = self._parse_user_defined_type(identifier) 5224 else: 5225 self._retreat(self._index - 1) 5226 return None 5227 else: 5228 return None 5229 5230 type_token = self._prev.token_type 5231 5232 if type_token == TokenType.PSEUDO_TYPE: 5233 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5234 5235 if type_token == TokenType.OBJECT_IDENTIFIER: 5236 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5237 5238 # https://materialize.com/docs/sql/types/map/ 5239 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5240 key_type = self._parse_types( 5241 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5242 ) 5243 if not self._match(TokenType.FARROW): 5244 self._retreat(index) 5245 return None 5246 5247 value_type = self._parse_types( 5248 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5249 ) 5250 if not self._match(TokenType.R_BRACKET): 5251 self._retreat(index) 5252 return None 5253 5254 return exp.DataType( 5255 this=exp.DataType.Type.MAP, 5256 expressions=[key_type, value_type], 5257 nested=True, 5258 prefix=prefix, 5259 ) 5260 5261 nested = type_token in self.NESTED_TYPE_TOKENS 5262 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5263 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5264 expressions = None 5265 maybe_func = False 5266 5267 if self._match(TokenType.L_PAREN): 5268 if is_struct: 5269 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5270 elif nested: 5271 expressions = self._parse_csv( 5272 lambda: self._parse_types( 5273 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5274 ) 5275 ) 5276 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5277 this = expressions[0] 5278 this.set("nullable", True) 5279 self._match_r_paren() 5280 return this 5281 elif type_token in self.ENUM_TYPE_TOKENS: 5282 expressions = self._parse_csv(self._parse_equality) 5283 elif is_aggregate: 5284 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5285 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5286 ) 5287 if not func_or_ident: 5288 return None 5289 expressions = [func_or_ident] 5290 if self._match(TokenType.COMMA): 5291 expressions.extend( 5292 self._parse_csv( 5293 lambda: self._parse_types( 5294 check_func=check_func, 5295 schema=schema, 5296 allow_identifiers=allow_identifiers, 5297 ) 5298 ) 5299 ) 5300 else: 5301 expressions = self._parse_csv(self._parse_type_size) 5302 5303 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5304 if type_token == TokenType.VECTOR and len(expressions) == 2: 5305 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5306 5307 if not expressions or not self._match(TokenType.R_PAREN): 5308 self._retreat(index) 5309 return None 5310 5311 maybe_func = True 5312 5313 values: t.Optional[t.List[exp.Expression]] = None 5314 5315 if nested and self._match(TokenType.LT): 5316 if is_struct: 5317 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5318 else: 5319 expressions = self._parse_csv( 5320 lambda: self._parse_types( 5321 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5322 ) 5323 ) 5324 5325 if not self._match(TokenType.GT): 5326 self.raise_error("Expecting >") 5327 5328 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5329 values = self._parse_csv(self._parse_assignment) 5330 if not values and is_struct: 5331 values = None 5332 self._retreat(self._index - 1) 5333 else: 5334 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5335 5336 if type_token in self.TIMESTAMPS: 5337 if self._match_text_seq("WITH", "TIME", "ZONE"): 5338 maybe_func = False 5339 tz_type = ( 5340 exp.DataType.Type.TIMETZ 5341 if type_token in self.TIMES 5342 else exp.DataType.Type.TIMESTAMPTZ 5343 ) 5344 this = exp.DataType(this=tz_type, expressions=expressions) 5345 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5346 maybe_func = False 5347 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5348 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5349 maybe_func = False 5350 elif type_token == TokenType.INTERVAL: 5351 unit = self._parse_var(upper=True) 5352 if unit: 5353 if self._match_text_seq("TO"): 5354 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5355 5356 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5357 else: 5358 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5359 elif type_token == TokenType.VOID: 5360 this = exp.DataType(this=exp.DataType.Type.NULL) 5361 5362 if maybe_func and check_func: 5363 index2 = self._index 5364 peek = self._parse_string() 5365 5366 if not peek: 5367 self._retreat(index) 5368 return None 5369 5370 self._retreat(index2) 5371 5372 if not this: 5373 if self._match_text_seq("UNSIGNED"): 5374 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5375 if not unsigned_type_token: 5376 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5377 5378 type_token = unsigned_type_token or type_token 5379 5380 this = exp.DataType( 5381 this=exp.DataType.Type[type_token.value], 5382 expressions=expressions, 5383 nested=nested, 5384 prefix=prefix, 5385 ) 5386 5387 # Empty arrays/structs are allowed 5388 if values is not None: 5389 cls = exp.Struct if is_struct else exp.Array 5390 this = exp.cast(cls(expressions=values), this, copy=False) 5391 5392 elif expressions: 5393 this.set("expressions", expressions) 5394 5395 # https://materialize.com/docs/sql/types/list/#type-name 5396 while self._match(TokenType.LIST): 5397 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5398 5399 index = self._index 5400 5401 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5402 matched_array = self._match(TokenType.ARRAY) 5403 5404 while self._curr: 5405 datatype_token = self._prev.token_type 5406 matched_l_bracket = self._match(TokenType.L_BRACKET) 5407 5408 if (not matched_l_bracket and not matched_array) or ( 5409 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5410 ): 5411 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5412 # not to be confused with the fixed size array parsing 5413 break 5414 5415 matched_array = False 5416 values = self._parse_csv(self._parse_assignment) or None 5417 if ( 5418 values 5419 and not schema 5420 and ( 5421 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5422 ) 5423 ): 5424 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5425 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5426 self._retreat(index) 5427 break 5428 5429 this = exp.DataType( 5430 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5431 ) 5432 self._match(TokenType.R_BRACKET) 5433 5434 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5435 converter = self.TYPE_CONVERTERS.get(this.this) 5436 if converter: 5437 this = converter(t.cast(exp.DataType, this)) 5438 5439 return this 5440 5441 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5442 index = self._index 5443 5444 if ( 5445 self._curr 5446 and self._next 5447 and self._curr.token_type in self.TYPE_TOKENS 5448 and self._next.token_type in self.TYPE_TOKENS 5449 ): 5450 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5451 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5452 this = self._parse_id_var() 5453 else: 5454 this = ( 5455 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5456 or self._parse_id_var() 5457 ) 5458 5459 self._match(TokenType.COLON) 5460 5461 if ( 5462 type_required 5463 and not isinstance(this, exp.DataType) 5464 and not self._match_set(self.TYPE_TOKENS, advance=False) 5465 ): 5466 self._retreat(index) 5467 return self._parse_types() 5468 5469 return self._parse_column_def(this) 5470 5471 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5472 if not self._match_text_seq("AT", "TIME", "ZONE"): 5473 return this 5474 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5475 5476 def _parse_column(self) -> t.Optional[exp.Expression]: 5477 this = self._parse_column_reference() 5478 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5479 5480 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5481 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5482 5483 return column 5484 5485 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5486 this = self._parse_field() 5487 if ( 5488 not this 5489 and self._match(TokenType.VALUES, advance=False) 5490 and self.VALUES_FOLLOWED_BY_PAREN 5491 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5492 ): 5493 this = self._parse_id_var() 5494 5495 if isinstance(this, exp.Identifier): 5496 # We bubble up comments from the Identifier to the Column 5497 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5498 5499 return this 5500 5501 def _parse_colon_as_variant_extract( 5502 self, this: t.Optional[exp.Expression] 5503 ) -> t.Optional[exp.Expression]: 5504 casts = [] 5505 json_path = [] 5506 escape = None 5507 5508 while self._match(TokenType.COLON): 5509 start_index = self._index 5510 5511 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5512 path = self._parse_column_ops( 5513 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5514 ) 5515 5516 # The cast :: operator has a lower precedence than the extraction operator :, so 5517 # we rearrange the AST appropriately to avoid casting the JSON path 5518 while isinstance(path, exp.Cast): 5519 casts.append(path.to) 5520 path = path.this 5521 5522 if casts: 5523 dcolon_offset = next( 5524 i 5525 for i, t in enumerate(self._tokens[start_index:]) 5526 if t.token_type == TokenType.DCOLON 5527 ) 5528 end_token = self._tokens[start_index + dcolon_offset - 1] 5529 else: 5530 end_token = self._prev 5531 5532 if path: 5533 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5534 # it'll roundtrip to a string literal in GET_PATH 5535 if isinstance(path, exp.Identifier) and path.quoted: 5536 escape = True 5537 5538 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5539 5540 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5541 # Databricks transforms it back to the colon/dot notation 5542 if json_path: 5543 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5544 5545 if json_path_expr: 5546 json_path_expr.set("escape", escape) 5547 5548 this = self.expression( 5549 exp.JSONExtract, 5550 this=this, 5551 expression=json_path_expr, 5552 variant_extract=True, 5553 ) 5554 5555 while casts: 5556 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5557 5558 return this 5559 5560 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5561 return self._parse_types() 5562 5563 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5564 this = self._parse_bracket(this) 5565 5566 while self._match_set(self.COLUMN_OPERATORS): 5567 op_token = self._prev.token_type 5568 op = self.COLUMN_OPERATORS.get(op_token) 5569 5570 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5571 field = self._parse_dcolon() 5572 if not field: 5573 self.raise_error("Expected type") 5574 elif op and self._curr: 5575 field = self._parse_column_reference() or self._parse_bracket() 5576 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5577 field = self._parse_column_ops(field) 5578 else: 5579 field = self._parse_field(any_token=True, anonymous_func=True) 5580 5581 # Function calls can be qualified, e.g., x.y.FOO() 5582 # This converts the final AST to a series of Dots leading to the function call 5583 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5584 if isinstance(field, (exp.Func, exp.Window)) and this: 5585 this = this.transform( 5586 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5587 ) 5588 5589 if op: 5590 this = op(self, this, field) 5591 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5592 this = self.expression( 5593 exp.Column, 5594 comments=this.comments, 5595 this=field, 5596 table=this.this, 5597 db=this.args.get("table"), 5598 catalog=this.args.get("db"), 5599 ) 5600 elif isinstance(field, exp.Window): 5601 # Move the exp.Dot's to the window's function 5602 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5603 field.set("this", window_func) 5604 this = field 5605 else: 5606 this = self.expression(exp.Dot, this=this, expression=field) 5607 5608 if field and field.comments: 5609 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5610 5611 this = self._parse_bracket(this) 5612 5613 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5614 5615 def _parse_paren(self) -> t.Optional[exp.Expression]: 5616 if not self._match(TokenType.L_PAREN): 5617 return None 5618 5619 comments = self._prev_comments 5620 query = self._parse_select() 5621 5622 if query: 5623 expressions = [query] 5624 else: 5625 expressions = self._parse_expressions() 5626 5627 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5628 5629 if not this and self._match(TokenType.R_PAREN, advance=False): 5630 this = self.expression(exp.Tuple) 5631 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5632 this = self._parse_subquery(this=this, parse_alias=False) 5633 elif isinstance(this, exp.Subquery): 5634 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5635 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5636 this = self.expression(exp.Tuple, expressions=expressions) 5637 else: 5638 this = self.expression(exp.Paren, this=this) 5639 5640 if this: 5641 this.add_comments(comments) 5642 5643 self._match_r_paren(expression=this) 5644 return this 5645 5646 def _parse_primary(self) -> t.Optional[exp.Expression]: 5647 if self._match_set(self.PRIMARY_PARSERS): 5648 token_type = self._prev.token_type 5649 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5650 5651 if token_type == TokenType.STRING: 5652 expressions = [primary] 5653 while self._match(TokenType.STRING): 5654 expressions.append(exp.Literal.string(self._prev.text)) 5655 5656 if len(expressions) > 1: 5657 return self.expression(exp.Concat, expressions=expressions) 5658 5659 return primary 5660 5661 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5662 return exp.Literal.number(f"0.{self._prev.text}") 5663 5664 return self._parse_paren() 5665 5666 def _parse_field( 5667 self, 5668 any_token: bool = False, 5669 tokens: t.Optional[t.Collection[TokenType]] = None, 5670 anonymous_func: bool = False, 5671 ) -> t.Optional[exp.Expression]: 5672 if anonymous_func: 5673 field = ( 5674 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5675 or self._parse_primary() 5676 ) 5677 else: 5678 field = self._parse_primary() or self._parse_function( 5679 anonymous=anonymous_func, any_token=any_token 5680 ) 5681 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5682 5683 def _parse_function( 5684 self, 5685 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5686 anonymous: bool = False, 5687 optional_parens: bool = True, 5688 any_token: bool = False, 5689 ) -> t.Optional[exp.Expression]: 5690 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5691 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5692 fn_syntax = False 5693 if ( 5694 self._match(TokenType.L_BRACE, advance=False) 5695 and self._next 5696 and self._next.text.upper() == "FN" 5697 ): 5698 self._advance(2) 5699 fn_syntax = True 5700 5701 func = self._parse_function_call( 5702 functions=functions, 5703 anonymous=anonymous, 5704 optional_parens=optional_parens, 5705 any_token=any_token, 5706 ) 5707 5708 if fn_syntax: 5709 self._match(TokenType.R_BRACE) 5710 5711 return func 5712 5713 def _parse_function_call( 5714 self, 5715 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5716 anonymous: bool = False, 5717 optional_parens: bool = True, 5718 any_token: bool = False, 5719 ) -> t.Optional[exp.Expression]: 5720 if not self._curr: 5721 return None 5722 5723 comments = self._curr.comments 5724 token = self._curr 5725 token_type = self._curr.token_type 5726 this = self._curr.text 5727 upper = this.upper() 5728 5729 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5730 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5731 self._advance() 5732 return self._parse_window(parser(self)) 5733 5734 if not self._next or self._next.token_type != TokenType.L_PAREN: 5735 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5736 self._advance() 5737 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5738 5739 return None 5740 5741 if any_token: 5742 if token_type in self.RESERVED_TOKENS: 5743 return None 5744 elif token_type not in self.FUNC_TOKENS: 5745 return None 5746 5747 self._advance(2) 5748 5749 parser = self.FUNCTION_PARSERS.get(upper) 5750 if parser and not anonymous: 5751 this = parser(self) 5752 else: 5753 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5754 5755 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5756 this = self.expression( 5757 subquery_predicate, comments=comments, this=self._parse_select() 5758 ) 5759 self._match_r_paren() 5760 return this 5761 5762 if functions is None: 5763 functions = self.FUNCTIONS 5764 5765 function = functions.get(upper) 5766 known_function = function and not anonymous 5767 5768 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5769 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5770 5771 post_func_comments = self._curr and self._curr.comments 5772 if known_function and post_func_comments: 5773 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5774 # call we'll construct it as exp.Anonymous, even if it's "known" 5775 if any( 5776 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5777 for comment in post_func_comments 5778 ): 5779 known_function = False 5780 5781 if alias and known_function: 5782 args = self._kv_to_prop_eq(args) 5783 5784 if known_function: 5785 func_builder = t.cast(t.Callable, function) 5786 5787 if "dialect" in func_builder.__code__.co_varnames: 5788 func = func_builder(args, dialect=self.dialect) 5789 else: 5790 func = func_builder(args) 5791 5792 func = self.validate_expression(func, args) 5793 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5794 func.meta["name"] = this 5795 5796 this = func 5797 else: 5798 if token_type == TokenType.IDENTIFIER: 5799 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5800 5801 this = self.expression(exp.Anonymous, this=this, expressions=args) 5802 this = this.update_positions(token) 5803 5804 if isinstance(this, exp.Expression): 5805 this.add_comments(comments) 5806 5807 self._match_r_paren(this) 5808 return self._parse_window(this) 5809 5810 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5811 return expression 5812 5813 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5814 transformed = [] 5815 5816 for index, e in enumerate(expressions): 5817 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5818 if isinstance(e, exp.Alias): 5819 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5820 5821 if not isinstance(e, exp.PropertyEQ): 5822 e = self.expression( 5823 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5824 ) 5825 5826 if isinstance(e.this, exp.Column): 5827 e.this.replace(e.this.this) 5828 else: 5829 e = self._to_prop_eq(e, index) 5830 5831 transformed.append(e) 5832 5833 return transformed 5834 5835 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5836 return self._parse_statement() 5837 5838 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5839 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5840 5841 def _parse_user_defined_function( 5842 self, kind: t.Optional[TokenType] = None 5843 ) -> t.Optional[exp.Expression]: 5844 this = self._parse_table_parts(schema=True) 5845 5846 if not self._match(TokenType.L_PAREN): 5847 return this 5848 5849 expressions = self._parse_csv(self._parse_function_parameter) 5850 self._match_r_paren() 5851 return self.expression( 5852 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5853 ) 5854 5855 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5856 literal = self._parse_primary() 5857 if literal: 5858 return self.expression(exp.Introducer, this=token.text, expression=literal) 5859 5860 return self._identifier_expression(token) 5861 5862 def _parse_session_parameter(self) -> exp.SessionParameter: 5863 kind = None 5864 this = self._parse_id_var() or self._parse_primary() 5865 5866 if this and self._match(TokenType.DOT): 5867 kind = this.name 5868 this = self._parse_var() or self._parse_primary() 5869 5870 return self.expression(exp.SessionParameter, this=this, kind=kind) 5871 5872 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5873 return self._parse_id_var() 5874 5875 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5876 index = self._index 5877 5878 if self._match(TokenType.L_PAREN): 5879 expressions = t.cast( 5880 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5881 ) 5882 5883 if not self._match(TokenType.R_PAREN): 5884 self._retreat(index) 5885 else: 5886 expressions = [self._parse_lambda_arg()] 5887 5888 if self._match_set(self.LAMBDAS): 5889 return self.LAMBDAS[self._prev.token_type](self, expressions) 5890 5891 self._retreat(index) 5892 5893 this: t.Optional[exp.Expression] 5894 5895 if self._match(TokenType.DISTINCT): 5896 this = self.expression( 5897 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5898 ) 5899 else: 5900 this = self._parse_select_or_expression(alias=alias) 5901 5902 return self._parse_limit( 5903 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5904 ) 5905 5906 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5907 index = self._index 5908 if not self._match(TokenType.L_PAREN): 5909 return this 5910 5911 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5912 # expr can be of both types 5913 if self._match_set(self.SELECT_START_TOKENS): 5914 self._retreat(index) 5915 return this 5916 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5917 self._match_r_paren() 5918 return self.expression(exp.Schema, this=this, expressions=args) 5919 5920 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5921 return self._parse_column_def(self._parse_field(any_token=True)) 5922 5923 def _parse_column_def( 5924 self, this: t.Optional[exp.Expression], computed_column: bool = True 5925 ) -> t.Optional[exp.Expression]: 5926 # column defs are not really columns, they're identifiers 5927 if isinstance(this, exp.Column): 5928 this = this.this 5929 5930 if not computed_column: 5931 self._match(TokenType.ALIAS) 5932 5933 kind = self._parse_types(schema=True) 5934 5935 if self._match_text_seq("FOR", "ORDINALITY"): 5936 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5937 5938 constraints: t.List[exp.Expression] = [] 5939 5940 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5941 ("ALIAS", "MATERIALIZED") 5942 ): 5943 persisted = self._prev.text.upper() == "MATERIALIZED" 5944 constraint_kind = exp.ComputedColumnConstraint( 5945 this=self._parse_assignment(), 5946 persisted=persisted or self._match_text_seq("PERSISTED"), 5947 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5948 ) 5949 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5950 elif ( 5951 kind 5952 and self._match(TokenType.ALIAS, advance=False) 5953 and ( 5954 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5955 or (self._next and self._next.token_type == TokenType.L_PAREN) 5956 ) 5957 ): 5958 self._advance() 5959 constraints.append( 5960 self.expression( 5961 exp.ColumnConstraint, 5962 kind=exp.ComputedColumnConstraint( 5963 this=self._parse_disjunction(), 5964 persisted=self._match_texts(("STORED", "VIRTUAL")) 5965 and self._prev.text.upper() == "STORED", 5966 ), 5967 ) 5968 ) 5969 5970 while True: 5971 constraint = self._parse_column_constraint() 5972 if not constraint: 5973 break 5974 constraints.append(constraint) 5975 5976 if not kind and not constraints: 5977 return this 5978 5979 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5980 5981 def _parse_auto_increment( 5982 self, 5983 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5984 start = None 5985 increment = None 5986 order = None 5987 5988 if self._match(TokenType.L_PAREN, advance=False): 5989 args = self._parse_wrapped_csv(self._parse_bitwise) 5990 start = seq_get(args, 0) 5991 increment = seq_get(args, 1) 5992 elif self._match_text_seq("START"): 5993 start = self._parse_bitwise() 5994 self._match_text_seq("INCREMENT") 5995 increment = self._parse_bitwise() 5996 if self._match_text_seq("ORDER"): 5997 order = True 5998 elif self._match_text_seq("NOORDER"): 5999 order = False 6000 6001 if start and increment: 6002 return exp.GeneratedAsIdentityColumnConstraint( 6003 start=start, increment=increment, this=False, order=order 6004 ) 6005 6006 return exp.AutoIncrementColumnConstraint() 6007 6008 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6009 if not self._match_text_seq("REFRESH"): 6010 self._retreat(self._index - 1) 6011 return None 6012 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6013 6014 def _parse_compress(self) -> exp.CompressColumnConstraint: 6015 if self._match(TokenType.L_PAREN, advance=False): 6016 return self.expression( 6017 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6018 ) 6019 6020 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6021 6022 def _parse_generated_as_identity( 6023 self, 6024 ) -> ( 6025 exp.GeneratedAsIdentityColumnConstraint 6026 | exp.ComputedColumnConstraint 6027 | exp.GeneratedAsRowColumnConstraint 6028 ): 6029 if self._match_text_seq("BY", "DEFAULT"): 6030 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6031 this = self.expression( 6032 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6033 ) 6034 else: 6035 self._match_text_seq("ALWAYS") 6036 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6037 6038 self._match(TokenType.ALIAS) 6039 6040 if self._match_text_seq("ROW"): 6041 start = self._match_text_seq("START") 6042 if not start: 6043 self._match(TokenType.END) 6044 hidden = self._match_text_seq("HIDDEN") 6045 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6046 6047 identity = self._match_text_seq("IDENTITY") 6048 6049 if self._match(TokenType.L_PAREN): 6050 if self._match(TokenType.START_WITH): 6051 this.set("start", self._parse_bitwise()) 6052 if self._match_text_seq("INCREMENT", "BY"): 6053 this.set("increment", self._parse_bitwise()) 6054 if self._match_text_seq("MINVALUE"): 6055 this.set("minvalue", self._parse_bitwise()) 6056 if self._match_text_seq("MAXVALUE"): 6057 this.set("maxvalue", self._parse_bitwise()) 6058 6059 if self._match_text_seq("CYCLE"): 6060 this.set("cycle", True) 6061 elif self._match_text_seq("NO", "CYCLE"): 6062 this.set("cycle", False) 6063 6064 if not identity: 6065 this.set("expression", self._parse_range()) 6066 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6067 args = self._parse_csv(self._parse_bitwise) 6068 this.set("start", seq_get(args, 0)) 6069 this.set("increment", seq_get(args, 1)) 6070 6071 self._match_r_paren() 6072 6073 return this 6074 6075 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6076 self._match_text_seq("LENGTH") 6077 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6078 6079 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6080 if self._match_text_seq("NULL"): 6081 return self.expression(exp.NotNullColumnConstraint) 6082 if self._match_text_seq("CASESPECIFIC"): 6083 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6084 if self._match_text_seq("FOR", "REPLICATION"): 6085 return self.expression(exp.NotForReplicationColumnConstraint) 6086 6087 # Unconsume the `NOT` token 6088 self._retreat(self._index - 1) 6089 return None 6090 6091 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6092 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6093 6094 procedure_option_follows = ( 6095 self._match(TokenType.WITH, advance=False) 6096 and self._next 6097 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6098 ) 6099 6100 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6101 return self.expression( 6102 exp.ColumnConstraint, 6103 this=this, 6104 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6105 ) 6106 6107 return this 6108 6109 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6110 if not self._match(TokenType.CONSTRAINT): 6111 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6112 6113 return self.expression( 6114 exp.Constraint, 6115 this=self._parse_id_var(), 6116 expressions=self._parse_unnamed_constraints(), 6117 ) 6118 6119 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6120 constraints = [] 6121 while True: 6122 constraint = self._parse_unnamed_constraint() or self._parse_function() 6123 if not constraint: 6124 break 6125 constraints.append(constraint) 6126 6127 return constraints 6128 6129 def _parse_unnamed_constraint( 6130 self, constraints: t.Optional[t.Collection[str]] = None 6131 ) -> t.Optional[exp.Expression]: 6132 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6133 constraints or self.CONSTRAINT_PARSERS 6134 ): 6135 return None 6136 6137 constraint = self._prev.text.upper() 6138 if constraint not in self.CONSTRAINT_PARSERS: 6139 self.raise_error(f"No parser found for schema constraint {constraint}.") 6140 6141 return self.CONSTRAINT_PARSERS[constraint](self) 6142 6143 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6144 return self._parse_id_var(any_token=False) 6145 6146 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6147 self._match_text_seq("KEY") 6148 return self.expression( 6149 exp.UniqueColumnConstraint, 6150 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6151 this=self._parse_schema(self._parse_unique_key()), 6152 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6153 on_conflict=self._parse_on_conflict(), 6154 options=self._parse_key_constraint_options(), 6155 ) 6156 6157 def _parse_key_constraint_options(self) -> t.List[str]: 6158 options = [] 6159 while True: 6160 if not self._curr: 6161 break 6162 6163 if self._match(TokenType.ON): 6164 action = None 6165 on = self._advance_any() and self._prev.text 6166 6167 if self._match_text_seq("NO", "ACTION"): 6168 action = "NO ACTION" 6169 elif self._match_text_seq("CASCADE"): 6170 action = "CASCADE" 6171 elif self._match_text_seq("RESTRICT"): 6172 action = "RESTRICT" 6173 elif self._match_pair(TokenType.SET, TokenType.NULL): 6174 action = "SET NULL" 6175 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6176 action = "SET DEFAULT" 6177 else: 6178 self.raise_error("Invalid key constraint") 6179 6180 options.append(f"ON {on} {action}") 6181 else: 6182 var = self._parse_var_from_options( 6183 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6184 ) 6185 if not var: 6186 break 6187 options.append(var.name) 6188 6189 return options 6190 6191 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6192 if match and not self._match(TokenType.REFERENCES): 6193 return None 6194 6195 expressions = None 6196 this = self._parse_table(schema=True) 6197 options = self._parse_key_constraint_options() 6198 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6199 6200 def _parse_foreign_key(self) -> exp.ForeignKey: 6201 expressions = ( 6202 self._parse_wrapped_id_vars() 6203 if not self._match(TokenType.REFERENCES, advance=False) 6204 else None 6205 ) 6206 reference = self._parse_references() 6207 on_options = {} 6208 6209 while self._match(TokenType.ON): 6210 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6211 self.raise_error("Expected DELETE or UPDATE") 6212 6213 kind = self._prev.text.lower() 6214 6215 if self._match_text_seq("NO", "ACTION"): 6216 action = "NO ACTION" 6217 elif self._match(TokenType.SET): 6218 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6219 action = "SET " + self._prev.text.upper() 6220 else: 6221 self._advance() 6222 action = self._prev.text.upper() 6223 6224 on_options[kind] = action 6225 6226 return self.expression( 6227 exp.ForeignKey, 6228 expressions=expressions, 6229 reference=reference, 6230 options=self._parse_key_constraint_options(), 6231 **on_options, # type: ignore 6232 ) 6233 6234 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6235 return self._parse_ordered() or self._parse_field() 6236 6237 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6238 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6239 self._retreat(self._index - 1) 6240 return None 6241 6242 id_vars = self._parse_wrapped_id_vars() 6243 return self.expression( 6244 exp.PeriodForSystemTimeConstraint, 6245 this=seq_get(id_vars, 0), 6246 expression=seq_get(id_vars, 1), 6247 ) 6248 6249 def _parse_primary_key( 6250 self, wrapped_optional: bool = False, in_props: bool = False 6251 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6252 desc = ( 6253 self._match_set((TokenType.ASC, TokenType.DESC)) 6254 and self._prev.token_type == TokenType.DESC 6255 ) 6256 6257 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6258 return self.expression( 6259 exp.PrimaryKeyColumnConstraint, 6260 desc=desc, 6261 options=self._parse_key_constraint_options(), 6262 ) 6263 6264 expressions = self._parse_wrapped_csv( 6265 self._parse_primary_key_part, optional=wrapped_optional 6266 ) 6267 options = self._parse_key_constraint_options() 6268 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6269 6270 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6271 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6272 6273 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6274 """ 6275 Parses a datetime column in ODBC format. We parse the column into the corresponding 6276 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6277 same as we did for `DATE('yyyy-mm-dd')`. 6278 6279 Reference: 6280 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6281 """ 6282 self._match(TokenType.VAR) 6283 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6284 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6285 if not self._match(TokenType.R_BRACE): 6286 self.raise_error("Expected }") 6287 return expression 6288 6289 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6290 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6291 return this 6292 6293 bracket_kind = self._prev.token_type 6294 if ( 6295 bracket_kind == TokenType.L_BRACE 6296 and self._curr 6297 and self._curr.token_type == TokenType.VAR 6298 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6299 ): 6300 return self._parse_odbc_datetime_literal() 6301 6302 expressions = self._parse_csv( 6303 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6304 ) 6305 6306 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6307 self.raise_error("Expected ]") 6308 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6309 self.raise_error("Expected }") 6310 6311 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6312 if bracket_kind == TokenType.L_BRACE: 6313 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6314 elif not this: 6315 this = build_array_constructor( 6316 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6317 ) 6318 else: 6319 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6320 if constructor_type: 6321 return build_array_constructor( 6322 constructor_type, 6323 args=expressions, 6324 bracket_kind=bracket_kind, 6325 dialect=self.dialect, 6326 ) 6327 6328 expressions = apply_index_offset( 6329 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6330 ) 6331 this = self.expression( 6332 exp.Bracket, 6333 this=this, 6334 expressions=expressions, 6335 comments=this.pop_comments(), 6336 ) 6337 6338 self._add_comments(this) 6339 return self._parse_bracket(this) 6340 6341 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6342 if self._match(TokenType.COLON): 6343 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6344 return this 6345 6346 def _parse_case(self) -> t.Optional[exp.Expression]: 6347 ifs = [] 6348 default = None 6349 6350 comments = self._prev_comments 6351 expression = self._parse_assignment() 6352 6353 while self._match(TokenType.WHEN): 6354 this = self._parse_assignment() 6355 self._match(TokenType.THEN) 6356 then = self._parse_assignment() 6357 ifs.append(self.expression(exp.If, this=this, true=then)) 6358 6359 if self._match(TokenType.ELSE): 6360 default = self._parse_assignment() 6361 6362 if not self._match(TokenType.END): 6363 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6364 default = exp.column("interval") 6365 else: 6366 self.raise_error("Expected END after CASE", self._prev) 6367 6368 return self.expression( 6369 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6370 ) 6371 6372 def _parse_if(self) -> t.Optional[exp.Expression]: 6373 if self._match(TokenType.L_PAREN): 6374 args = self._parse_csv( 6375 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6376 ) 6377 this = self.validate_expression(exp.If.from_arg_list(args), args) 6378 self._match_r_paren() 6379 else: 6380 index = self._index - 1 6381 6382 if self.NO_PAREN_IF_COMMANDS and index == 0: 6383 return self._parse_as_command(self._prev) 6384 6385 condition = self._parse_assignment() 6386 6387 if not condition: 6388 self._retreat(index) 6389 return None 6390 6391 self._match(TokenType.THEN) 6392 true = self._parse_assignment() 6393 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6394 self._match(TokenType.END) 6395 this = self.expression(exp.If, this=condition, true=true, false=false) 6396 6397 return this 6398 6399 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6400 if not self._match_text_seq("VALUE", "FOR"): 6401 self._retreat(self._index - 1) 6402 return None 6403 6404 return self.expression( 6405 exp.NextValueFor, 6406 this=self._parse_column(), 6407 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6408 ) 6409 6410 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6411 this = self._parse_function() or self._parse_var_or_string(upper=True) 6412 6413 if self._match(TokenType.FROM): 6414 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6415 6416 if not self._match(TokenType.COMMA): 6417 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6418 6419 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6420 6421 def _parse_gap_fill(self) -> exp.GapFill: 6422 self._match(TokenType.TABLE) 6423 this = self._parse_table() 6424 6425 self._match(TokenType.COMMA) 6426 args = [this, *self._parse_csv(self._parse_lambda)] 6427 6428 gap_fill = exp.GapFill.from_arg_list(args) 6429 return self.validate_expression(gap_fill, args) 6430 6431 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6432 this = self._parse_assignment() 6433 6434 if not self._match(TokenType.ALIAS): 6435 if self._match(TokenType.COMMA): 6436 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6437 6438 self.raise_error("Expected AS after CAST") 6439 6440 fmt = None 6441 to = self._parse_types() 6442 6443 default = self._match(TokenType.DEFAULT) 6444 if default: 6445 default = self._parse_bitwise() 6446 self._match_text_seq("ON", "CONVERSION", "ERROR") 6447 6448 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6449 fmt_string = self._parse_string() 6450 fmt = self._parse_at_time_zone(fmt_string) 6451 6452 if not to: 6453 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6454 if to.this in exp.DataType.TEMPORAL_TYPES: 6455 this = self.expression( 6456 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6457 this=this, 6458 format=exp.Literal.string( 6459 format_time( 6460 fmt_string.this if fmt_string else "", 6461 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6462 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6463 ) 6464 ), 6465 safe=safe, 6466 ) 6467 6468 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6469 this.set("zone", fmt.args["zone"]) 6470 return this 6471 elif not to: 6472 self.raise_error("Expected TYPE after CAST") 6473 elif isinstance(to, exp.Identifier): 6474 to = exp.DataType.build(to.name, udt=True) 6475 elif to.this == exp.DataType.Type.CHAR: 6476 if self._match(TokenType.CHARACTER_SET): 6477 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6478 6479 return self.expression( 6480 exp.Cast if strict else exp.TryCast, 6481 this=this, 6482 to=to, 6483 format=fmt, 6484 safe=safe, 6485 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6486 default=default, 6487 ) 6488 6489 def _parse_string_agg(self) -> exp.GroupConcat: 6490 if self._match(TokenType.DISTINCT): 6491 args: t.List[t.Optional[exp.Expression]] = [ 6492 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6493 ] 6494 if self._match(TokenType.COMMA): 6495 args.extend(self._parse_csv(self._parse_assignment)) 6496 else: 6497 args = self._parse_csv(self._parse_assignment) # type: ignore 6498 6499 if self._match_text_seq("ON", "OVERFLOW"): 6500 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6501 if self._match_text_seq("ERROR"): 6502 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6503 else: 6504 self._match_text_seq("TRUNCATE") 6505 on_overflow = self.expression( 6506 exp.OverflowTruncateBehavior, 6507 this=self._parse_string(), 6508 with_count=( 6509 self._match_text_seq("WITH", "COUNT") 6510 or not self._match_text_seq("WITHOUT", "COUNT") 6511 ), 6512 ) 6513 else: 6514 on_overflow = None 6515 6516 index = self._index 6517 if not self._match(TokenType.R_PAREN) and args: 6518 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6519 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6520 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6521 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6522 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6523 6524 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6525 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6526 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6527 if not self._match_text_seq("WITHIN", "GROUP"): 6528 self._retreat(index) 6529 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6530 6531 # The corresponding match_r_paren will be called in parse_function (caller) 6532 self._match_l_paren() 6533 6534 return self.expression( 6535 exp.GroupConcat, 6536 this=self._parse_order(this=seq_get(args, 0)), 6537 separator=seq_get(args, 1), 6538 on_overflow=on_overflow, 6539 ) 6540 6541 def _parse_convert( 6542 self, strict: bool, safe: t.Optional[bool] = None 6543 ) -> t.Optional[exp.Expression]: 6544 this = self._parse_bitwise() 6545 6546 if self._match(TokenType.USING): 6547 to: t.Optional[exp.Expression] = self.expression( 6548 exp.CharacterSet, this=self._parse_var() 6549 ) 6550 elif self._match(TokenType.COMMA): 6551 to = self._parse_types() 6552 else: 6553 to = None 6554 6555 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6556 6557 def _parse_xml_table(self) -> exp.XMLTable: 6558 namespaces = None 6559 passing = None 6560 columns = None 6561 6562 if self._match_text_seq("XMLNAMESPACES", "("): 6563 namespaces = self._parse_xml_namespace() 6564 self._match_text_seq(")", ",") 6565 6566 this = self._parse_string() 6567 6568 if self._match_text_seq("PASSING"): 6569 # The BY VALUE keywords are optional and are provided for semantic clarity 6570 self._match_text_seq("BY", "VALUE") 6571 passing = self._parse_csv(self._parse_column) 6572 6573 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6574 6575 if self._match_text_seq("COLUMNS"): 6576 columns = self._parse_csv(self._parse_field_def) 6577 6578 return self.expression( 6579 exp.XMLTable, 6580 this=this, 6581 namespaces=namespaces, 6582 passing=passing, 6583 columns=columns, 6584 by_ref=by_ref, 6585 ) 6586 6587 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6588 namespaces = [] 6589 6590 while True: 6591 if self._match(TokenType.DEFAULT): 6592 uri = self._parse_string() 6593 else: 6594 uri = self._parse_alias(self._parse_string()) 6595 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6596 if not self._match(TokenType.COMMA): 6597 break 6598 6599 return namespaces 6600 6601 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6602 """ 6603 There are generally two variants of the DECODE function: 6604 6605 - DECODE(bin, charset) 6606 - DECODE(expression, search, result [, search, result] ... [, default]) 6607 6608 The second variant will always be parsed into a CASE expression. Note that NULL 6609 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6610 instead of relying on pattern matching. 6611 """ 6612 args = self._parse_csv(self._parse_assignment) 6613 6614 if len(args) < 3: 6615 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6616 6617 expression, *expressions = args 6618 if not expression: 6619 return None 6620 6621 ifs = [] 6622 for search, result in zip(expressions[::2], expressions[1::2]): 6623 if not search or not result: 6624 return None 6625 6626 if isinstance(search, exp.Literal): 6627 ifs.append( 6628 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6629 ) 6630 elif isinstance(search, exp.Null): 6631 ifs.append( 6632 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6633 ) 6634 else: 6635 cond = exp.or_( 6636 exp.EQ(this=expression.copy(), expression=search), 6637 exp.and_( 6638 exp.Is(this=expression.copy(), expression=exp.Null()), 6639 exp.Is(this=search.copy(), expression=exp.Null()), 6640 copy=False, 6641 ), 6642 copy=False, 6643 ) 6644 ifs.append(exp.If(this=cond, true=result)) 6645 6646 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6647 6648 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6649 self._match_text_seq("KEY") 6650 key = self._parse_column() 6651 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6652 self._match_text_seq("VALUE") 6653 value = self._parse_bitwise() 6654 6655 if not key and not value: 6656 return None 6657 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6658 6659 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6660 if not this or not self._match_text_seq("FORMAT", "JSON"): 6661 return this 6662 6663 return self.expression(exp.FormatJson, this=this) 6664 6665 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6666 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6667 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6668 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6669 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6670 else: 6671 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6672 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6673 6674 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6675 6676 if not empty and not error and not null: 6677 return None 6678 6679 return self.expression( 6680 exp.OnCondition, 6681 empty=empty, 6682 error=error, 6683 null=null, 6684 ) 6685 6686 def _parse_on_handling( 6687 self, on: str, *values: str 6688 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6689 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6690 for value in values: 6691 if self._match_text_seq(value, "ON", on): 6692 return f"{value} ON {on}" 6693 6694 index = self._index 6695 if self._match(TokenType.DEFAULT): 6696 default_value = self._parse_bitwise() 6697 if self._match_text_seq("ON", on): 6698 return default_value 6699 6700 self._retreat(index) 6701 6702 return None 6703 6704 @t.overload 6705 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6706 6707 @t.overload 6708 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6709 6710 def _parse_json_object(self, agg=False): 6711 star = self._parse_star() 6712 expressions = ( 6713 [star] 6714 if star 6715 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6716 ) 6717 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6718 6719 unique_keys = None 6720 if self._match_text_seq("WITH", "UNIQUE"): 6721 unique_keys = True 6722 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6723 unique_keys = False 6724 6725 self._match_text_seq("KEYS") 6726 6727 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6728 self._parse_type() 6729 ) 6730 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6731 6732 return self.expression( 6733 exp.JSONObjectAgg if agg else exp.JSONObject, 6734 expressions=expressions, 6735 null_handling=null_handling, 6736 unique_keys=unique_keys, 6737 return_type=return_type, 6738 encoding=encoding, 6739 ) 6740 6741 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6742 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6743 if not self._match_text_seq("NESTED"): 6744 this = self._parse_id_var() 6745 kind = self._parse_types(allow_identifiers=False) 6746 nested = None 6747 else: 6748 this = None 6749 kind = None 6750 nested = True 6751 6752 path = self._match_text_seq("PATH") and self._parse_string() 6753 nested_schema = nested and self._parse_json_schema() 6754 6755 return self.expression( 6756 exp.JSONColumnDef, 6757 this=this, 6758 kind=kind, 6759 path=path, 6760 nested_schema=nested_schema, 6761 ) 6762 6763 def _parse_json_schema(self) -> exp.JSONSchema: 6764 self._match_text_seq("COLUMNS") 6765 return self.expression( 6766 exp.JSONSchema, 6767 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6768 ) 6769 6770 def _parse_json_table(self) -> exp.JSONTable: 6771 this = self._parse_format_json(self._parse_bitwise()) 6772 path = self._match(TokenType.COMMA) and self._parse_string() 6773 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6774 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6775 schema = self._parse_json_schema() 6776 6777 return exp.JSONTable( 6778 this=this, 6779 schema=schema, 6780 path=path, 6781 error_handling=error_handling, 6782 empty_handling=empty_handling, 6783 ) 6784 6785 def _parse_match_against(self) -> exp.MatchAgainst: 6786 expressions = self._parse_csv(self._parse_column) 6787 6788 self._match_text_seq(")", "AGAINST", "(") 6789 6790 this = self._parse_string() 6791 6792 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6793 modifier = "IN NATURAL LANGUAGE MODE" 6794 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6795 modifier = f"{modifier} WITH QUERY EXPANSION" 6796 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6797 modifier = "IN BOOLEAN MODE" 6798 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6799 modifier = "WITH QUERY EXPANSION" 6800 else: 6801 modifier = None 6802 6803 return self.expression( 6804 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6805 ) 6806 6807 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6808 def _parse_open_json(self) -> exp.OpenJSON: 6809 this = self._parse_bitwise() 6810 path = self._match(TokenType.COMMA) and self._parse_string() 6811 6812 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6813 this = self._parse_field(any_token=True) 6814 kind = self._parse_types() 6815 path = self._parse_string() 6816 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6817 6818 return self.expression( 6819 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6820 ) 6821 6822 expressions = None 6823 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6824 self._match_l_paren() 6825 expressions = self._parse_csv(_parse_open_json_column_def) 6826 6827 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6828 6829 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6830 args = self._parse_csv(self._parse_bitwise) 6831 6832 if self._match(TokenType.IN): 6833 return self.expression( 6834 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6835 ) 6836 6837 if haystack_first: 6838 haystack = seq_get(args, 0) 6839 needle = seq_get(args, 1) 6840 else: 6841 haystack = seq_get(args, 1) 6842 needle = seq_get(args, 0) 6843 6844 return self.expression( 6845 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6846 ) 6847 6848 def _parse_predict(self) -> exp.Predict: 6849 self._match_text_seq("MODEL") 6850 this = self._parse_table() 6851 6852 self._match(TokenType.COMMA) 6853 self._match_text_seq("TABLE") 6854 6855 return self.expression( 6856 exp.Predict, 6857 this=this, 6858 expression=self._parse_table(), 6859 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6860 ) 6861 6862 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6863 args = self._parse_csv(self._parse_table) 6864 return exp.JoinHint(this=func_name.upper(), expressions=args) 6865 6866 def _parse_substring(self) -> exp.Substring: 6867 # Postgres supports the form: substring(string [from int] [for int]) 6868 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6869 6870 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6871 6872 if self._match(TokenType.FROM): 6873 args.append(self._parse_bitwise()) 6874 if self._match(TokenType.FOR): 6875 if len(args) == 1: 6876 args.append(exp.Literal.number(1)) 6877 args.append(self._parse_bitwise()) 6878 6879 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6880 6881 def _parse_trim(self) -> exp.Trim: 6882 # https://www.w3resource.com/sql/character-functions/trim.php 6883 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6884 6885 position = None 6886 collation = None 6887 expression = None 6888 6889 if self._match_texts(self.TRIM_TYPES): 6890 position = self._prev.text.upper() 6891 6892 this = self._parse_bitwise() 6893 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6894 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6895 expression = self._parse_bitwise() 6896 6897 if invert_order: 6898 this, expression = expression, this 6899 6900 if self._match(TokenType.COLLATE): 6901 collation = self._parse_bitwise() 6902 6903 return self.expression( 6904 exp.Trim, this=this, position=position, expression=expression, collation=collation 6905 ) 6906 6907 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6908 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6909 6910 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6911 return self._parse_window(self._parse_id_var(), alias=True) 6912 6913 def _parse_respect_or_ignore_nulls( 6914 self, this: t.Optional[exp.Expression] 6915 ) -> t.Optional[exp.Expression]: 6916 if self._match_text_seq("IGNORE", "NULLS"): 6917 return self.expression(exp.IgnoreNulls, this=this) 6918 if self._match_text_seq("RESPECT", "NULLS"): 6919 return self.expression(exp.RespectNulls, this=this) 6920 return this 6921 6922 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6923 if self._match(TokenType.HAVING): 6924 self._match_texts(("MAX", "MIN")) 6925 max = self._prev.text.upper() != "MIN" 6926 return self.expression( 6927 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6928 ) 6929 6930 return this 6931 6932 def _parse_window( 6933 self, this: t.Optional[exp.Expression], alias: bool = False 6934 ) -> t.Optional[exp.Expression]: 6935 func = this 6936 comments = func.comments if isinstance(func, exp.Expression) else None 6937 6938 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6939 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6940 if self._match_text_seq("WITHIN", "GROUP"): 6941 order = self._parse_wrapped(self._parse_order) 6942 this = self.expression(exp.WithinGroup, this=this, expression=order) 6943 6944 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6945 self._match(TokenType.WHERE) 6946 this = self.expression( 6947 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6948 ) 6949 self._match_r_paren() 6950 6951 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6952 # Some dialects choose to implement and some do not. 6953 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6954 6955 # There is some code above in _parse_lambda that handles 6956 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6957 6958 # The below changes handle 6959 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6960 6961 # Oracle allows both formats 6962 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6963 # and Snowflake chose to do the same for familiarity 6964 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6965 if isinstance(this, exp.AggFunc): 6966 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6967 6968 if ignore_respect and ignore_respect is not this: 6969 ignore_respect.replace(ignore_respect.this) 6970 this = self.expression(ignore_respect.__class__, this=this) 6971 6972 this = self._parse_respect_or_ignore_nulls(this) 6973 6974 # bigquery select from window x AS (partition by ...) 6975 if alias: 6976 over = None 6977 self._match(TokenType.ALIAS) 6978 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6979 return this 6980 else: 6981 over = self._prev.text.upper() 6982 6983 if comments and isinstance(func, exp.Expression): 6984 func.pop_comments() 6985 6986 if not self._match(TokenType.L_PAREN): 6987 return self.expression( 6988 exp.Window, 6989 comments=comments, 6990 this=this, 6991 alias=self._parse_id_var(False), 6992 over=over, 6993 ) 6994 6995 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6996 6997 first = self._match(TokenType.FIRST) 6998 if self._match_text_seq("LAST"): 6999 first = False 7000 7001 partition, order = self._parse_partition_and_order() 7002 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7003 7004 if kind: 7005 self._match(TokenType.BETWEEN) 7006 start = self._parse_window_spec() 7007 self._match(TokenType.AND) 7008 end = self._parse_window_spec() 7009 exclude = ( 7010 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7011 if self._match_text_seq("EXCLUDE") 7012 else None 7013 ) 7014 7015 spec = self.expression( 7016 exp.WindowSpec, 7017 kind=kind, 7018 start=start["value"], 7019 start_side=start["side"], 7020 end=end["value"], 7021 end_side=end["side"], 7022 exclude=exclude, 7023 ) 7024 else: 7025 spec = None 7026 7027 self._match_r_paren() 7028 7029 window = self.expression( 7030 exp.Window, 7031 comments=comments, 7032 this=this, 7033 partition_by=partition, 7034 order=order, 7035 spec=spec, 7036 alias=window_alias, 7037 over=over, 7038 first=first, 7039 ) 7040 7041 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7042 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7043 return self._parse_window(window, alias=alias) 7044 7045 return window 7046 7047 def _parse_partition_and_order( 7048 self, 7049 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7050 return self._parse_partition_by(), self._parse_order() 7051 7052 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7053 self._match(TokenType.BETWEEN) 7054 7055 return { 7056 "value": ( 7057 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7058 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7059 or self._parse_bitwise() 7060 ), 7061 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7062 } 7063 7064 def _parse_alias( 7065 self, this: t.Optional[exp.Expression], explicit: bool = False 7066 ) -> t.Optional[exp.Expression]: 7067 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7068 # so this section tries to parse the clause version and if it fails, it treats the token 7069 # as an identifier (alias) 7070 if self._can_parse_limit_or_offset(): 7071 return this 7072 7073 any_token = self._match(TokenType.ALIAS) 7074 comments = self._prev_comments or [] 7075 7076 if explicit and not any_token: 7077 return this 7078 7079 if self._match(TokenType.L_PAREN): 7080 aliases = self.expression( 7081 exp.Aliases, 7082 comments=comments, 7083 this=this, 7084 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7085 ) 7086 self._match_r_paren(aliases) 7087 return aliases 7088 7089 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7090 self.STRING_ALIASES and self._parse_string_as_identifier() 7091 ) 7092 7093 if alias: 7094 comments.extend(alias.pop_comments()) 7095 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7096 column = this.this 7097 7098 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7099 if not this.comments and column and column.comments: 7100 this.comments = column.pop_comments() 7101 7102 return this 7103 7104 def _parse_id_var( 7105 self, 7106 any_token: bool = True, 7107 tokens: t.Optional[t.Collection[TokenType]] = None, 7108 ) -> t.Optional[exp.Expression]: 7109 expression = self._parse_identifier() 7110 if not expression and ( 7111 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7112 ): 7113 quoted = self._prev.token_type == TokenType.STRING 7114 expression = self._identifier_expression(quoted=quoted) 7115 7116 return expression 7117 7118 def _parse_string(self) -> t.Optional[exp.Expression]: 7119 if self._match_set(self.STRING_PARSERS): 7120 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7121 return self._parse_placeholder() 7122 7123 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7124 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7125 if output: 7126 output.update_positions(self._prev) 7127 return output 7128 7129 def _parse_number(self) -> t.Optional[exp.Expression]: 7130 if self._match_set(self.NUMERIC_PARSERS): 7131 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7132 return self._parse_placeholder() 7133 7134 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7135 if self._match(TokenType.IDENTIFIER): 7136 return self._identifier_expression(quoted=True) 7137 return self._parse_placeholder() 7138 7139 def _parse_var( 7140 self, 7141 any_token: bool = False, 7142 tokens: t.Optional[t.Collection[TokenType]] = None, 7143 upper: bool = False, 7144 ) -> t.Optional[exp.Expression]: 7145 if ( 7146 (any_token and self._advance_any()) 7147 or self._match(TokenType.VAR) 7148 or (self._match_set(tokens) if tokens else False) 7149 ): 7150 return self.expression( 7151 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7152 ) 7153 return self._parse_placeholder() 7154 7155 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7156 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7157 self._advance() 7158 return self._prev 7159 return None 7160 7161 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7162 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7163 7164 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7165 return self._parse_primary() or self._parse_var(any_token=True) 7166 7167 def _parse_null(self) -> t.Optional[exp.Expression]: 7168 if self._match_set(self.NULL_TOKENS): 7169 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7170 return self._parse_placeholder() 7171 7172 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7173 if self._match(TokenType.TRUE): 7174 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7175 if self._match(TokenType.FALSE): 7176 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7177 return self._parse_placeholder() 7178 7179 def _parse_star(self) -> t.Optional[exp.Expression]: 7180 if self._match(TokenType.STAR): 7181 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7182 return self._parse_placeholder() 7183 7184 def _parse_parameter(self) -> exp.Parameter: 7185 this = self._parse_identifier() or self._parse_primary_or_var() 7186 return self.expression(exp.Parameter, this=this) 7187 7188 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7189 if self._match_set(self.PLACEHOLDER_PARSERS): 7190 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7191 if placeholder: 7192 return placeholder 7193 self._advance(-1) 7194 return None 7195 7196 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7197 if not self._match_texts(keywords): 7198 return None 7199 if self._match(TokenType.L_PAREN, advance=False): 7200 return self._parse_wrapped_csv(self._parse_expression) 7201 7202 expression = self._parse_expression() 7203 return [expression] if expression else None 7204 7205 def _parse_csv( 7206 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7207 ) -> t.List[exp.Expression]: 7208 parse_result = parse_method() 7209 items = [parse_result] if parse_result is not None else [] 7210 7211 while self._match(sep): 7212 self._add_comments(parse_result) 7213 parse_result = parse_method() 7214 if parse_result is not None: 7215 items.append(parse_result) 7216 7217 return items 7218 7219 def _parse_tokens( 7220 self, parse_method: t.Callable, expressions: t.Dict 7221 ) -> t.Optional[exp.Expression]: 7222 this = parse_method() 7223 7224 while self._match_set(expressions): 7225 this = self.expression( 7226 expressions[self._prev.token_type], 7227 this=this, 7228 comments=self._prev_comments, 7229 expression=parse_method(), 7230 ) 7231 7232 return this 7233 7234 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7235 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7236 7237 def _parse_wrapped_csv( 7238 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7239 ) -> t.List[exp.Expression]: 7240 return self._parse_wrapped( 7241 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7242 ) 7243 7244 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7245 wrapped = self._match(TokenType.L_PAREN) 7246 if not wrapped and not optional: 7247 self.raise_error("Expecting (") 7248 parse_result = parse_method() 7249 if wrapped: 7250 self._match_r_paren() 7251 return parse_result 7252 7253 def _parse_expressions(self) -> t.List[exp.Expression]: 7254 return self._parse_csv(self._parse_expression) 7255 7256 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7257 return self._parse_select() or self._parse_set_operations( 7258 self._parse_alias(self._parse_assignment(), explicit=True) 7259 if alias 7260 else self._parse_assignment() 7261 ) 7262 7263 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7264 return self._parse_query_modifiers( 7265 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7266 ) 7267 7268 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7269 this = None 7270 if self._match_texts(self.TRANSACTION_KIND): 7271 this = self._prev.text 7272 7273 self._match_texts(("TRANSACTION", "WORK")) 7274 7275 modes = [] 7276 while True: 7277 mode = [] 7278 while self._match(TokenType.VAR): 7279 mode.append(self._prev.text) 7280 7281 if mode: 7282 modes.append(" ".join(mode)) 7283 if not self._match(TokenType.COMMA): 7284 break 7285 7286 return self.expression(exp.Transaction, this=this, modes=modes) 7287 7288 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7289 chain = None 7290 savepoint = None 7291 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7292 7293 self._match_texts(("TRANSACTION", "WORK")) 7294 7295 if self._match_text_seq("TO"): 7296 self._match_text_seq("SAVEPOINT") 7297 savepoint = self._parse_id_var() 7298 7299 if self._match(TokenType.AND): 7300 chain = not self._match_text_seq("NO") 7301 self._match_text_seq("CHAIN") 7302 7303 if is_rollback: 7304 return self.expression(exp.Rollback, savepoint=savepoint) 7305 7306 return self.expression(exp.Commit, chain=chain) 7307 7308 def _parse_refresh(self) -> exp.Refresh: 7309 self._match(TokenType.TABLE) 7310 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7311 7312 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7313 if not self._prev.text.upper() == "ADD": 7314 return None 7315 7316 start = self._index 7317 self._match(TokenType.COLUMN) 7318 7319 exists_column = self._parse_exists(not_=True) 7320 expression = self._parse_field_def() 7321 7322 if not isinstance(expression, exp.ColumnDef): 7323 self._retreat(start) 7324 return None 7325 7326 expression.set("exists", exists_column) 7327 7328 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7329 if self._match_texts(("FIRST", "AFTER")): 7330 position = self._prev.text 7331 column_position = self.expression( 7332 exp.ColumnPosition, this=self._parse_column(), position=position 7333 ) 7334 expression.set("position", column_position) 7335 7336 return expression 7337 7338 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7339 drop = self._match(TokenType.DROP) and self._parse_drop() 7340 if drop and not isinstance(drop, exp.Command): 7341 drop.set("kind", drop.args.get("kind", "COLUMN")) 7342 return drop 7343 7344 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7345 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7346 return self.expression( 7347 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7348 ) 7349 7350 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7351 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7352 self._match_text_seq("ADD") 7353 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7354 return self.expression( 7355 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7356 ) 7357 7358 column_def = self._parse_add_column() 7359 if isinstance(column_def, exp.ColumnDef): 7360 return column_def 7361 7362 exists = self._parse_exists(not_=True) 7363 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7364 return self.expression( 7365 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7366 ) 7367 7368 return None 7369 7370 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7371 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7372 or self._match_text_seq("COLUMNS") 7373 ): 7374 schema = self._parse_schema() 7375 7376 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7377 7378 return self._parse_csv(_parse_add_alteration) 7379 7380 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7381 if self._match_texts(self.ALTER_ALTER_PARSERS): 7382 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7383 7384 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7385 # keyword after ALTER we default to parsing this statement 7386 self._match(TokenType.COLUMN) 7387 column = self._parse_field(any_token=True) 7388 7389 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7390 return self.expression(exp.AlterColumn, this=column, drop=True) 7391 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7392 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7393 if self._match(TokenType.COMMENT): 7394 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7395 if self._match_text_seq("DROP", "NOT", "NULL"): 7396 return self.expression( 7397 exp.AlterColumn, 7398 this=column, 7399 drop=True, 7400 allow_null=True, 7401 ) 7402 if self._match_text_seq("SET", "NOT", "NULL"): 7403 return self.expression( 7404 exp.AlterColumn, 7405 this=column, 7406 allow_null=False, 7407 ) 7408 7409 if self._match_text_seq("SET", "VISIBLE"): 7410 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7411 if self._match_text_seq("SET", "INVISIBLE"): 7412 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7413 7414 self._match_text_seq("SET", "DATA") 7415 self._match_text_seq("TYPE") 7416 return self.expression( 7417 exp.AlterColumn, 7418 this=column, 7419 dtype=self._parse_types(), 7420 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7421 using=self._match(TokenType.USING) and self._parse_assignment(), 7422 ) 7423 7424 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7425 if self._match_texts(("ALL", "EVEN", "AUTO")): 7426 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7427 7428 self._match_text_seq("KEY", "DISTKEY") 7429 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7430 7431 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7432 if compound: 7433 self._match_text_seq("SORTKEY") 7434 7435 if self._match(TokenType.L_PAREN, advance=False): 7436 return self.expression( 7437 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7438 ) 7439 7440 self._match_texts(("AUTO", "NONE")) 7441 return self.expression( 7442 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7443 ) 7444 7445 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7446 index = self._index - 1 7447 7448 partition_exists = self._parse_exists() 7449 if self._match(TokenType.PARTITION, advance=False): 7450 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7451 7452 self._retreat(index) 7453 return self._parse_csv(self._parse_drop_column) 7454 7455 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7456 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7457 exists = self._parse_exists() 7458 old_column = self._parse_column() 7459 to = self._match_text_seq("TO") 7460 new_column = self._parse_column() 7461 7462 if old_column is None or to is None or new_column is None: 7463 return None 7464 7465 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7466 7467 self._match_text_seq("TO") 7468 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7469 7470 def _parse_alter_table_set(self) -> exp.AlterSet: 7471 alter_set = self.expression(exp.AlterSet) 7472 7473 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7474 "TABLE", "PROPERTIES" 7475 ): 7476 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7477 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7478 alter_set.set("expressions", [self._parse_assignment()]) 7479 elif self._match_texts(("LOGGED", "UNLOGGED")): 7480 alter_set.set("option", exp.var(self._prev.text.upper())) 7481 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7482 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7483 elif self._match_text_seq("LOCATION"): 7484 alter_set.set("location", self._parse_field()) 7485 elif self._match_text_seq("ACCESS", "METHOD"): 7486 alter_set.set("access_method", self._parse_field()) 7487 elif self._match_text_seq("TABLESPACE"): 7488 alter_set.set("tablespace", self._parse_field()) 7489 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7490 alter_set.set("file_format", [self._parse_field()]) 7491 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7492 alter_set.set("file_format", self._parse_wrapped_options()) 7493 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7494 alter_set.set("copy_options", self._parse_wrapped_options()) 7495 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7496 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7497 else: 7498 if self._match_text_seq("SERDE"): 7499 alter_set.set("serde", self._parse_field()) 7500 7501 properties = self._parse_wrapped(self._parse_properties, optional=True) 7502 alter_set.set("expressions", [properties]) 7503 7504 return alter_set 7505 7506 def _parse_alter(self) -> exp.Alter | exp.Command: 7507 start = self._prev 7508 7509 alter_token = self._match_set(self.ALTERABLES) and self._prev 7510 if not alter_token: 7511 return self._parse_as_command(start) 7512 7513 exists = self._parse_exists() 7514 only = self._match_text_seq("ONLY") 7515 this = self._parse_table(schema=True) 7516 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7517 7518 if self._next: 7519 self._advance() 7520 7521 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7522 if parser: 7523 actions = ensure_list(parser(self)) 7524 not_valid = self._match_text_seq("NOT", "VALID") 7525 options = self._parse_csv(self._parse_property) 7526 7527 if not self._curr and actions: 7528 return self.expression( 7529 exp.Alter, 7530 this=this, 7531 kind=alter_token.text.upper(), 7532 exists=exists, 7533 actions=actions, 7534 only=only, 7535 options=options, 7536 cluster=cluster, 7537 not_valid=not_valid, 7538 ) 7539 7540 return self._parse_as_command(start) 7541 7542 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7543 start = self._prev 7544 # https://duckdb.org/docs/sql/statements/analyze 7545 if not self._curr: 7546 return self.expression(exp.Analyze) 7547 7548 options = [] 7549 while self._match_texts(self.ANALYZE_STYLES): 7550 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7551 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7552 else: 7553 options.append(self._prev.text.upper()) 7554 7555 this: t.Optional[exp.Expression] = None 7556 inner_expression: t.Optional[exp.Expression] = None 7557 7558 kind = self._curr and self._curr.text.upper() 7559 7560 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7561 this = self._parse_table_parts() 7562 elif self._match_text_seq("TABLES"): 7563 if self._match_set((TokenType.FROM, TokenType.IN)): 7564 kind = f"{kind} {self._prev.text.upper()}" 7565 this = self._parse_table(schema=True, is_db_reference=True) 7566 elif self._match_text_seq("DATABASE"): 7567 this = self._parse_table(schema=True, is_db_reference=True) 7568 elif self._match_text_seq("CLUSTER"): 7569 this = self._parse_table() 7570 # Try matching inner expr keywords before fallback to parse table. 7571 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7572 kind = None 7573 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7574 else: 7575 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7576 kind = None 7577 this = self._parse_table_parts() 7578 7579 partition = self._try_parse(self._parse_partition) 7580 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7581 return self._parse_as_command(start) 7582 7583 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7584 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7585 "WITH", "ASYNC", "MODE" 7586 ): 7587 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7588 else: 7589 mode = None 7590 7591 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7592 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7593 7594 properties = self._parse_properties() 7595 return self.expression( 7596 exp.Analyze, 7597 kind=kind, 7598 this=this, 7599 mode=mode, 7600 partition=partition, 7601 properties=properties, 7602 expression=inner_expression, 7603 options=options, 7604 ) 7605 7606 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7607 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7608 this = None 7609 kind = self._prev.text.upper() 7610 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7611 expressions = [] 7612 7613 if not self._match_text_seq("STATISTICS"): 7614 self.raise_error("Expecting token STATISTICS") 7615 7616 if self._match_text_seq("NOSCAN"): 7617 this = "NOSCAN" 7618 elif self._match(TokenType.FOR): 7619 if self._match_text_seq("ALL", "COLUMNS"): 7620 this = "FOR ALL COLUMNS" 7621 if self._match_texts("COLUMNS"): 7622 this = "FOR COLUMNS" 7623 expressions = self._parse_csv(self._parse_column_reference) 7624 elif self._match_text_seq("SAMPLE"): 7625 sample = self._parse_number() 7626 expressions = [ 7627 self.expression( 7628 exp.AnalyzeSample, 7629 sample=sample, 7630 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7631 ) 7632 ] 7633 7634 return self.expression( 7635 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7636 ) 7637 7638 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7639 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7640 kind = None 7641 this = None 7642 expression: t.Optional[exp.Expression] = None 7643 if self._match_text_seq("REF", "UPDATE"): 7644 kind = "REF" 7645 this = "UPDATE" 7646 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7647 this = "UPDATE SET DANGLING TO NULL" 7648 elif self._match_text_seq("STRUCTURE"): 7649 kind = "STRUCTURE" 7650 if self._match_text_seq("CASCADE", "FAST"): 7651 this = "CASCADE FAST" 7652 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7653 ("ONLINE", "OFFLINE") 7654 ): 7655 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7656 expression = self._parse_into() 7657 7658 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7659 7660 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7661 this = self._prev.text.upper() 7662 if self._match_text_seq("COLUMNS"): 7663 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7664 return None 7665 7666 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7667 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7668 if self._match_text_seq("STATISTICS"): 7669 return self.expression(exp.AnalyzeDelete, kind=kind) 7670 return None 7671 7672 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7673 if self._match_text_seq("CHAINED", "ROWS"): 7674 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7675 return None 7676 7677 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7678 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7679 this = self._prev.text.upper() 7680 expression: t.Optional[exp.Expression] = None 7681 expressions = [] 7682 update_options = None 7683 7684 if self._match_text_seq("HISTOGRAM", "ON"): 7685 expressions = self._parse_csv(self._parse_column_reference) 7686 with_expressions = [] 7687 while self._match(TokenType.WITH): 7688 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7689 if self._match_texts(("SYNC", "ASYNC")): 7690 if self._match_text_seq("MODE", advance=False): 7691 with_expressions.append(f"{self._prev.text.upper()} MODE") 7692 self._advance() 7693 else: 7694 buckets = self._parse_number() 7695 if self._match_text_seq("BUCKETS"): 7696 with_expressions.append(f"{buckets} BUCKETS") 7697 if with_expressions: 7698 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7699 7700 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7701 TokenType.UPDATE, advance=False 7702 ): 7703 update_options = self._prev.text.upper() 7704 self._advance() 7705 elif self._match_text_seq("USING", "DATA"): 7706 expression = self.expression(exp.UsingData, this=self._parse_string()) 7707 7708 return self.expression( 7709 exp.AnalyzeHistogram, 7710 this=this, 7711 expressions=expressions, 7712 expression=expression, 7713 update_options=update_options, 7714 ) 7715 7716 def _parse_merge(self) -> exp.Merge: 7717 self._match(TokenType.INTO) 7718 target = self._parse_table() 7719 7720 if target and self._match(TokenType.ALIAS, advance=False): 7721 target.set("alias", self._parse_table_alias()) 7722 7723 self._match(TokenType.USING) 7724 using = self._parse_table() 7725 7726 self._match(TokenType.ON) 7727 on = self._parse_assignment() 7728 7729 return self.expression( 7730 exp.Merge, 7731 this=target, 7732 using=using, 7733 on=on, 7734 whens=self._parse_when_matched(), 7735 returning=self._parse_returning(), 7736 ) 7737 7738 def _parse_when_matched(self) -> exp.Whens: 7739 whens = [] 7740 7741 while self._match(TokenType.WHEN): 7742 matched = not self._match(TokenType.NOT) 7743 self._match_text_seq("MATCHED") 7744 source = ( 7745 False 7746 if self._match_text_seq("BY", "TARGET") 7747 else self._match_text_seq("BY", "SOURCE") 7748 ) 7749 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7750 7751 self._match(TokenType.THEN) 7752 7753 if self._match(TokenType.INSERT): 7754 this = self._parse_star() 7755 if this: 7756 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7757 else: 7758 then = self.expression( 7759 exp.Insert, 7760 this=exp.var("ROW") 7761 if self._match_text_seq("ROW") 7762 else self._parse_value(values=False), 7763 expression=self._match_text_seq("VALUES") and self._parse_value(), 7764 ) 7765 elif self._match(TokenType.UPDATE): 7766 expressions = self._parse_star() 7767 if expressions: 7768 then = self.expression(exp.Update, expressions=expressions) 7769 else: 7770 then = self.expression( 7771 exp.Update, 7772 expressions=self._match(TokenType.SET) 7773 and self._parse_csv(self._parse_equality), 7774 ) 7775 elif self._match(TokenType.DELETE): 7776 then = self.expression(exp.Var, this=self._prev.text) 7777 else: 7778 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7779 7780 whens.append( 7781 self.expression( 7782 exp.When, 7783 matched=matched, 7784 source=source, 7785 condition=condition, 7786 then=then, 7787 ) 7788 ) 7789 return self.expression(exp.Whens, expressions=whens) 7790 7791 def _parse_show(self) -> t.Optional[exp.Expression]: 7792 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7793 if parser: 7794 return parser(self) 7795 return self._parse_as_command(self._prev) 7796 7797 def _parse_set_item_assignment( 7798 self, kind: t.Optional[str] = None 7799 ) -> t.Optional[exp.Expression]: 7800 index = self._index 7801 7802 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7803 return self._parse_set_transaction(global_=kind == "GLOBAL") 7804 7805 left = self._parse_primary() or self._parse_column() 7806 assignment_delimiter = self._match_texts(("=", "TO")) 7807 7808 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7809 self._retreat(index) 7810 return None 7811 7812 right = self._parse_statement() or self._parse_id_var() 7813 if isinstance(right, (exp.Column, exp.Identifier)): 7814 right = exp.var(right.name) 7815 7816 this = self.expression(exp.EQ, this=left, expression=right) 7817 return self.expression(exp.SetItem, this=this, kind=kind) 7818 7819 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7820 self._match_text_seq("TRANSACTION") 7821 characteristics = self._parse_csv( 7822 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7823 ) 7824 return self.expression( 7825 exp.SetItem, 7826 expressions=characteristics, 7827 kind="TRANSACTION", 7828 **{"global": global_}, # type: ignore 7829 ) 7830 7831 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7832 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7833 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7834 7835 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7836 index = self._index 7837 set_ = self.expression( 7838 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7839 ) 7840 7841 if self._curr: 7842 self._retreat(index) 7843 return self._parse_as_command(self._prev) 7844 7845 return set_ 7846 7847 def _parse_var_from_options( 7848 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7849 ) -> t.Optional[exp.Var]: 7850 start = self._curr 7851 if not start: 7852 return None 7853 7854 option = start.text.upper() 7855 continuations = options.get(option) 7856 7857 index = self._index 7858 self._advance() 7859 for keywords in continuations or []: 7860 if isinstance(keywords, str): 7861 keywords = (keywords,) 7862 7863 if self._match_text_seq(*keywords): 7864 option = f"{option} {' '.join(keywords)}" 7865 break 7866 else: 7867 if continuations or continuations is None: 7868 if raise_unmatched: 7869 self.raise_error(f"Unknown option {option}") 7870 7871 self._retreat(index) 7872 return None 7873 7874 return exp.var(option) 7875 7876 def _parse_as_command(self, start: Token) -> exp.Command: 7877 while self._curr: 7878 self._advance() 7879 text = self._find_sql(start, self._prev) 7880 size = len(start.text) 7881 self._warn_unsupported() 7882 return exp.Command(this=text[:size], expression=text[size:]) 7883 7884 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7885 settings = [] 7886 7887 self._match_l_paren() 7888 kind = self._parse_id_var() 7889 7890 if self._match(TokenType.L_PAREN): 7891 while True: 7892 key = self._parse_id_var() 7893 value = self._parse_primary() 7894 if not key and value is None: 7895 break 7896 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7897 self._match(TokenType.R_PAREN) 7898 7899 self._match_r_paren() 7900 7901 return self.expression( 7902 exp.DictProperty, 7903 this=this, 7904 kind=kind.this if kind else None, 7905 settings=settings, 7906 ) 7907 7908 def _parse_dict_range(self, this: str) -> exp.DictRange: 7909 self._match_l_paren() 7910 has_min = self._match_text_seq("MIN") 7911 if has_min: 7912 min = self._parse_var() or self._parse_primary() 7913 self._match_text_seq("MAX") 7914 max = self._parse_var() or self._parse_primary() 7915 else: 7916 max = self._parse_var() or self._parse_primary() 7917 min = exp.Literal.number(0) 7918 self._match_r_paren() 7919 return self.expression(exp.DictRange, this=this, min=min, max=max) 7920 7921 def _parse_comprehension( 7922 self, this: t.Optional[exp.Expression] 7923 ) -> t.Optional[exp.Comprehension]: 7924 index = self._index 7925 expression = self._parse_column() 7926 if not self._match(TokenType.IN): 7927 self._retreat(index - 1) 7928 return None 7929 iterator = self._parse_column() 7930 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7931 return self.expression( 7932 exp.Comprehension, 7933 this=this, 7934 expression=expression, 7935 iterator=iterator, 7936 condition=condition, 7937 ) 7938 7939 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7940 if self._match(TokenType.HEREDOC_STRING): 7941 return self.expression(exp.Heredoc, this=self._prev.text) 7942 7943 if not self._match_text_seq("$"): 7944 return None 7945 7946 tags = ["$"] 7947 tag_text = None 7948 7949 if self._is_connected(): 7950 self._advance() 7951 tags.append(self._prev.text.upper()) 7952 else: 7953 self.raise_error("No closing $ found") 7954 7955 if tags[-1] != "$": 7956 if self._is_connected() and self._match_text_seq("$"): 7957 tag_text = tags[-1] 7958 tags.append("$") 7959 else: 7960 self.raise_error("No closing $ found") 7961 7962 heredoc_start = self._curr 7963 7964 while self._curr: 7965 if self._match_text_seq(*tags, advance=False): 7966 this = self._find_sql(heredoc_start, self._prev) 7967 self._advance(len(tags)) 7968 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7969 7970 self._advance() 7971 7972 self.raise_error(f"No closing {''.join(tags)} found") 7973 return None 7974 7975 def _find_parser( 7976 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7977 ) -> t.Optional[t.Callable]: 7978 if not self._curr: 7979 return None 7980 7981 index = self._index 7982 this = [] 7983 while True: 7984 # The current token might be multiple words 7985 curr = self._curr.text.upper() 7986 key = curr.split(" ") 7987 this.append(curr) 7988 7989 self._advance() 7990 result, trie = in_trie(trie, key) 7991 if result == TrieResult.FAILED: 7992 break 7993 7994 if result == TrieResult.EXISTS: 7995 subparser = parsers[" ".join(this)] 7996 return subparser 7997 7998 self._retreat(index) 7999 return None 8000 8001 def _match(self, token_type, advance=True, expression=None): 8002 if not self._curr: 8003 return None 8004 8005 if self._curr.token_type == token_type: 8006 if advance: 8007 self._advance() 8008 self._add_comments(expression) 8009 return True 8010 8011 return None 8012 8013 def _match_set(self, types, advance=True): 8014 if not self._curr: 8015 return None 8016 8017 if self._curr.token_type in types: 8018 if advance: 8019 self._advance() 8020 return True 8021 8022 return None 8023 8024 def _match_pair(self, token_type_a, token_type_b, advance=True): 8025 if not self._curr or not self._next: 8026 return None 8027 8028 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8029 if advance: 8030 self._advance(2) 8031 return True 8032 8033 return None 8034 8035 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8036 if not self._match(TokenType.L_PAREN, expression=expression): 8037 self.raise_error("Expecting (") 8038 8039 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8040 if not self._match(TokenType.R_PAREN, expression=expression): 8041 self.raise_error("Expecting )") 8042 8043 def _match_texts(self, texts, advance=True): 8044 if ( 8045 self._curr 8046 and self._curr.token_type != TokenType.STRING 8047 and self._curr.text.upper() in texts 8048 ): 8049 if advance: 8050 self._advance() 8051 return True 8052 return None 8053 8054 def _match_text_seq(self, *texts, advance=True): 8055 index = self._index 8056 for text in texts: 8057 if ( 8058 self._curr 8059 and self._curr.token_type != TokenType.STRING 8060 and self._curr.text.upper() == text 8061 ): 8062 self._advance() 8063 else: 8064 self._retreat(index) 8065 return None 8066 8067 if not advance: 8068 self._retreat(index) 8069 8070 return True 8071 8072 def _replace_lambda( 8073 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8074 ) -> t.Optional[exp.Expression]: 8075 if not node: 8076 return node 8077 8078 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8079 8080 for column in node.find_all(exp.Column): 8081 typ = lambda_types.get(column.parts[0].name) 8082 if typ is not None: 8083 dot_or_id = column.to_dot() if column.table else column.this 8084 8085 if typ: 8086 dot_or_id = self.expression( 8087 exp.Cast, 8088 this=dot_or_id, 8089 to=typ, 8090 ) 8091 8092 parent = column.parent 8093 8094 while isinstance(parent, exp.Dot): 8095 if not isinstance(parent.parent, exp.Dot): 8096 parent.replace(dot_or_id) 8097 break 8098 parent = parent.parent 8099 else: 8100 if column is node: 8101 node = dot_or_id 8102 else: 8103 column.replace(dot_or_id) 8104 return node 8105 8106 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8107 start = self._prev 8108 8109 # Not to be confused with TRUNCATE(number, decimals) function call 8110 if self._match(TokenType.L_PAREN): 8111 self._retreat(self._index - 2) 8112 return self._parse_function() 8113 8114 # Clickhouse supports TRUNCATE DATABASE as well 8115 is_database = self._match(TokenType.DATABASE) 8116 8117 self._match(TokenType.TABLE) 8118 8119 exists = self._parse_exists(not_=False) 8120 8121 expressions = self._parse_csv( 8122 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8123 ) 8124 8125 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8126 8127 if self._match_text_seq("RESTART", "IDENTITY"): 8128 identity = "RESTART" 8129 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8130 identity = "CONTINUE" 8131 else: 8132 identity = None 8133 8134 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8135 option = self._prev.text 8136 else: 8137 option = None 8138 8139 partition = self._parse_partition() 8140 8141 # Fallback case 8142 if self._curr: 8143 return self._parse_as_command(start) 8144 8145 return self.expression( 8146 exp.TruncateTable, 8147 expressions=expressions, 8148 is_database=is_database, 8149 exists=exists, 8150 cluster=cluster, 8151 identity=identity, 8152 option=option, 8153 partition=partition, 8154 ) 8155 8156 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8157 this = self._parse_ordered(self._parse_opclass) 8158 8159 if not self._match(TokenType.WITH): 8160 return this 8161 8162 op = self._parse_var(any_token=True) 8163 8164 return self.expression(exp.WithOperator, this=this, op=op) 8165 8166 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8167 self._match(TokenType.EQ) 8168 self._match(TokenType.L_PAREN) 8169 8170 opts: t.List[t.Optional[exp.Expression]] = [] 8171 option: exp.Expression | None 8172 while self._curr and not self._match(TokenType.R_PAREN): 8173 if self._match_text_seq("FORMAT_NAME", "="): 8174 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8175 option = self._parse_format_name() 8176 else: 8177 option = self._parse_property() 8178 8179 if option is None: 8180 self.raise_error("Unable to parse option") 8181 break 8182 8183 opts.append(option) 8184 8185 return opts 8186 8187 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8188 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8189 8190 options = [] 8191 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8192 option = self._parse_var(any_token=True) 8193 prev = self._prev.text.upper() 8194 8195 # Different dialects might separate options and values by white space, "=" and "AS" 8196 self._match(TokenType.EQ) 8197 self._match(TokenType.ALIAS) 8198 8199 param = self.expression(exp.CopyParameter, this=option) 8200 8201 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8202 TokenType.L_PAREN, advance=False 8203 ): 8204 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8205 param.set("expressions", self._parse_wrapped_options()) 8206 elif prev == "FILE_FORMAT": 8207 # T-SQL's external file format case 8208 param.set("expression", self._parse_field()) 8209 else: 8210 param.set("expression", self._parse_unquoted_field()) 8211 8212 options.append(param) 8213 self._match(sep) 8214 8215 return options 8216 8217 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8218 expr = self.expression(exp.Credentials) 8219 8220 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8221 expr.set("storage", self._parse_field()) 8222 if self._match_text_seq("CREDENTIALS"): 8223 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8224 creds = ( 8225 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8226 ) 8227 expr.set("credentials", creds) 8228 if self._match_text_seq("ENCRYPTION"): 8229 expr.set("encryption", self._parse_wrapped_options()) 8230 if self._match_text_seq("IAM_ROLE"): 8231 expr.set("iam_role", self._parse_field()) 8232 if self._match_text_seq("REGION"): 8233 expr.set("region", self._parse_field()) 8234 8235 return expr 8236 8237 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8238 return self._parse_field() 8239 8240 def _parse_copy(self) -> exp.Copy | exp.Command: 8241 start = self._prev 8242 8243 self._match(TokenType.INTO) 8244 8245 this = ( 8246 self._parse_select(nested=True, parse_subquery_alias=False) 8247 if self._match(TokenType.L_PAREN, advance=False) 8248 else self._parse_table(schema=True) 8249 ) 8250 8251 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8252 8253 files = self._parse_csv(self._parse_file_location) 8254 credentials = self._parse_credentials() 8255 8256 self._match_text_seq("WITH") 8257 8258 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8259 8260 # Fallback case 8261 if self._curr: 8262 return self._parse_as_command(start) 8263 8264 return self.expression( 8265 exp.Copy, 8266 this=this, 8267 kind=kind, 8268 credentials=credentials, 8269 files=files, 8270 params=params, 8271 ) 8272 8273 def _parse_normalize(self) -> exp.Normalize: 8274 return self.expression( 8275 exp.Normalize, 8276 this=self._parse_bitwise(), 8277 form=self._match(TokenType.COMMA) and self._parse_var(), 8278 ) 8279 8280 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8281 args = self._parse_csv(lambda: self._parse_lambda()) 8282 8283 this = seq_get(args, 0) 8284 decimals = seq_get(args, 1) 8285 8286 return expr_type( 8287 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8288 ) 8289 8290 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8291 star_token = self._prev 8292 8293 if self._match_text_seq("COLUMNS", "(", advance=False): 8294 this = self._parse_function() 8295 if isinstance(this, exp.Columns): 8296 this.set("unpack", True) 8297 return this 8298 8299 return self.expression( 8300 exp.Star, 8301 **{ # type: ignore 8302 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8303 "replace": self._parse_star_op("REPLACE"), 8304 "rename": self._parse_star_op("RENAME"), 8305 }, 8306 ).update_positions(star_token) 8307 8308 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8309 privilege_parts = [] 8310 8311 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8312 # (end of privilege list) or L_PAREN (start of column list) are met 8313 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8314 privilege_parts.append(self._curr.text.upper()) 8315 self._advance() 8316 8317 this = exp.var(" ".join(privilege_parts)) 8318 expressions = ( 8319 self._parse_wrapped_csv(self._parse_column) 8320 if self._match(TokenType.L_PAREN, advance=False) 8321 else None 8322 ) 8323 8324 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8325 8326 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8327 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8328 principal = self._parse_id_var() 8329 8330 if not principal: 8331 return None 8332 8333 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8334 8335 def _parse_grant(self) -> exp.Grant | exp.Command: 8336 start = self._prev 8337 8338 privileges = self._parse_csv(self._parse_grant_privilege) 8339 8340 self._match(TokenType.ON) 8341 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8342 8343 # Attempt to parse the securable e.g. MySQL allows names 8344 # such as "foo.*", "*.*" which are not easily parseable yet 8345 securable = self._try_parse(self._parse_table_parts) 8346 8347 if not securable or not self._match_text_seq("TO"): 8348 return self._parse_as_command(start) 8349 8350 principals = self._parse_csv(self._parse_grant_principal) 8351 8352 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8353 8354 if self._curr: 8355 return self._parse_as_command(start) 8356 8357 return self.expression( 8358 exp.Grant, 8359 privileges=privileges, 8360 kind=kind, 8361 securable=securable, 8362 principals=principals, 8363 grant_option=grant_option, 8364 ) 8365 8366 def _parse_overlay(self) -> exp.Overlay: 8367 return self.expression( 8368 exp.Overlay, 8369 **{ # type: ignore 8370 "this": self._parse_bitwise(), 8371 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8372 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8373 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8374 }, 8375 ) 8376 8377 def _parse_format_name(self) -> exp.Property: 8378 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8379 # for FILE_FORMAT = <format_name> 8380 return self.expression( 8381 exp.Property, 8382 this=exp.var("FORMAT_NAME"), 8383 value=self._parse_string() or self._parse_table_parts(), 8384 ) 8385 8386 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8387 args: t.List[exp.Expression] = [] 8388 8389 if self._match(TokenType.DISTINCT): 8390 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8391 self._match(TokenType.COMMA) 8392 8393 args.extend(self._parse_csv(self._parse_assignment)) 8394 8395 return self.expression( 8396 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8397 ) 8398 8399 def _identifier_expression( 8400 self, token: t.Optional[Token] = None, **kwargs: t.Any 8401 ) -> exp.Identifier: 8402 token = token or self._prev 8403 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8404 expression.update_positions(token) 8405 return expression 8406 8407 def _build_pipe_cte( 8408 self, 8409 query: exp.Query, 8410 expressions: t.List[exp.Expression], 8411 alias_cte: t.Optional[exp.TableAlias] = None, 8412 ) -> exp.Select: 8413 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8414 if alias_cte: 8415 new_cte = alias_cte 8416 else: 8417 self._pipe_cte_counter += 1 8418 new_cte = f"__tmp{self._pipe_cte_counter}" 8419 8420 with_ = query.args.get("with") 8421 ctes = with_.pop() if with_ else None 8422 8423 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8424 if ctes: 8425 new_select.set("with", ctes) 8426 8427 return new_select.with_(new_cte, as_=query, copy=False) 8428 8429 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8430 select = self._parse_select(consume_pipe=False) 8431 if not select: 8432 return query 8433 8434 return self._build_pipe_cte( 8435 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8436 ) 8437 8438 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8439 limit = self._parse_limit() 8440 offset = self._parse_offset() 8441 if limit: 8442 curr_limit = query.args.get("limit", limit) 8443 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8444 query.limit(limit, copy=False) 8445 if offset: 8446 curr_offset = query.args.get("offset") 8447 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8448 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8449 8450 return query 8451 8452 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8453 this = self._parse_assignment() 8454 if self._match_text_seq("GROUP", "AND", advance=False): 8455 return this 8456 8457 this = self._parse_alias(this) 8458 8459 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8460 return self._parse_ordered(lambda: this) 8461 8462 return this 8463 8464 def _parse_pipe_syntax_aggregate_group_order_by( 8465 self, query: exp.Select, group_by_exists: bool = True 8466 ) -> exp.Select: 8467 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8468 aggregates_or_groups, orders = [], [] 8469 for element in expr: 8470 if isinstance(element, exp.Ordered): 8471 this = element.this 8472 if isinstance(this, exp.Alias): 8473 element.set("this", this.args["alias"]) 8474 orders.append(element) 8475 else: 8476 this = element 8477 aggregates_or_groups.append(this) 8478 8479 if group_by_exists: 8480 query.select(*aggregates_or_groups, copy=False).group_by( 8481 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8482 copy=False, 8483 ) 8484 else: 8485 query.select(*aggregates_or_groups, append=False, copy=False) 8486 8487 if orders: 8488 return query.order_by(*orders, append=False, copy=False) 8489 8490 return query 8491 8492 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8493 self._match_text_seq("AGGREGATE") 8494 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8495 8496 if self._match(TokenType.GROUP_BY) or ( 8497 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8498 ): 8499 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8500 8501 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8502 8503 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8504 first_setop = self.parse_set_operation(this=query) 8505 if not first_setop: 8506 return None 8507 8508 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8509 expr = self._parse_paren() 8510 return expr.assert_is(exp.Subquery).unnest() if expr else None 8511 8512 first_setop.this.pop() 8513 8514 setops = [ 8515 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8516 *self._parse_csv(_parse_and_unwrap_query), 8517 ] 8518 8519 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8520 with_ = query.args.get("with") 8521 ctes = with_.pop() if with_ else None 8522 8523 if isinstance(first_setop, exp.Union): 8524 query = query.union(*setops, copy=False, **first_setop.args) 8525 elif isinstance(first_setop, exp.Except): 8526 query = query.except_(*setops, copy=False, **first_setop.args) 8527 else: 8528 query = query.intersect(*setops, copy=False, **first_setop.args) 8529 8530 query.set("with", ctes) 8531 8532 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8533 8534 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8535 join = self._parse_join() 8536 if not join: 8537 return None 8538 8539 if isinstance(query, exp.Select): 8540 return query.join(join, copy=False) 8541 8542 return query 8543 8544 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8545 pivots = self._parse_pivots() 8546 if not pivots: 8547 return query 8548 8549 from_ = query.args.get("from") 8550 if from_: 8551 from_.this.set("pivots", pivots) 8552 8553 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8554 8555 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8556 self._match_text_seq("EXTEND") 8557 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8558 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8559 8560 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8561 sample = self._parse_table_sample() 8562 8563 with_ = query.args.get("with") 8564 if with_: 8565 with_.expressions[-1].this.set("sample", sample) 8566 else: 8567 query.set("sample", sample) 8568 8569 return query 8570 8571 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8572 if isinstance(query, exp.Subquery): 8573 query = exp.select("*").from_(query, copy=False) 8574 8575 if not query.args.get("from"): 8576 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8577 8578 while self._match(TokenType.PIPE_GT): 8579 start = self._curr 8580 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8581 if not parser: 8582 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8583 # keywords, making it tricky to disambiguate them without lookahead. The approach 8584 # here is to try and parse a set operation and if that fails, then try to parse a 8585 # join operator. If that fails as well, then the operator is not supported. 8586 parsed_query = self._parse_pipe_syntax_set_operator(query) 8587 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8588 if not parsed_query: 8589 self._retreat(start) 8590 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8591 break 8592 query = parsed_query 8593 else: 8594 query = parser(self, query) 8595 8596 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1553 def __init__( 1554 self, 1555 error_level: t.Optional[ErrorLevel] = None, 1556 error_message_context: int = 100, 1557 max_errors: int = 3, 1558 dialect: DialectType = None, 1559 ): 1560 from sqlglot.dialects import Dialect 1561 1562 self.error_level = error_level or ErrorLevel.IMMEDIATE 1563 self.error_message_context = error_message_context 1564 self.max_errors = max_errors 1565 self.dialect = Dialect.get_or_raise(dialect) 1566 self.reset()
1579 def parse( 1580 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 """ 1583 Parses a list of tokens and returns a list of syntax trees, one tree 1584 per parsed SQL statement. 1585 1586 Args: 1587 raw_tokens: The list of tokens. 1588 sql: The original SQL string, used to produce helpful debug messages. 1589 1590 Returns: 1591 The list of the produced syntax trees. 1592 """ 1593 return self._parse( 1594 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1595 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1597 def parse_into( 1598 self, 1599 expression_types: exp.IntoType, 1600 raw_tokens: t.List[Token], 1601 sql: t.Optional[str] = None, 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens into a given Expression type. If a collection of Expression 1605 types is given instead, this method will try to parse the token list into each one 1606 of them, stopping at the first for which the parsing succeeds. 1607 1608 Args: 1609 expression_types: The expression type(s) to try and parse the token list into. 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The target Expression. 1615 """ 1616 errors = [] 1617 for expression_type in ensure_list(expression_types): 1618 parser = self.EXPRESSION_PARSERS.get(expression_type) 1619 if not parser: 1620 raise TypeError(f"No parser registered for {expression_type}") 1621 1622 try: 1623 return self._parse(parser, raw_tokens, sql) 1624 except ParseError as e: 1625 e.errors[0]["into_expression"] = expression_type 1626 errors.append(e) 1627 1628 raise ParseError( 1629 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1630 errors=merge_errors(errors), 1631 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1671 def check_errors(self) -> None: 1672 """Logs or raises any found errors, depending on the chosen error level setting.""" 1673 if self.error_level == ErrorLevel.WARN: 1674 for error in self.errors: 1675 logger.error(str(error)) 1676 elif self.error_level == ErrorLevel.RAISE and self.errors: 1677 raise ParseError( 1678 concat_messages(self.errors, self.max_errors), 1679 errors=merge_errors(self.errors), 1680 )
Logs or raises any found errors, depending on the chosen error level setting.
1682 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1683 """ 1684 Appends an error in the list of recorded errors or raises it, depending on the chosen 1685 error level setting. 1686 """ 1687 token = token or self._curr or self._prev or Token.string("") 1688 start = token.start 1689 end = token.end + 1 1690 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1691 highlight = self.sql[start:end] 1692 end_context = self.sql[end : end + self.error_message_context] 1693 1694 error = ParseError.new( 1695 f"{message}. Line {token.line}, Col: {token.col}.\n" 1696 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1697 description=message, 1698 line=token.line, 1699 col=token.col, 1700 start_context=start_context, 1701 highlight=highlight, 1702 end_context=end_context, 1703 ) 1704 1705 if self.error_level == ErrorLevel.IMMEDIATE: 1706 raise error 1707 1708 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1710 def expression( 1711 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1712 ) -> E: 1713 """ 1714 Creates a new, validated Expression. 1715 1716 Args: 1717 exp_class: The expression class to instantiate. 1718 comments: An optional list of comments to attach to the expression. 1719 kwargs: The arguments to set for the expression along with their respective values. 1720 1721 Returns: 1722 The target expression. 1723 """ 1724 instance = exp_class(**kwargs) 1725 instance.add_comments(comments) if comments else self._add_comments(instance) 1726 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1733 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1734 """ 1735 Validates an Expression, making sure that all its mandatory arguments are set. 1736 1737 Args: 1738 expression: The expression to validate. 1739 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1740 1741 Returns: 1742 The validated expression. 1743 """ 1744 if self.error_level != ErrorLevel.IGNORE: 1745 for error_message in expression.error_messages(args): 1746 self.raise_error(error_message) 1747 1748 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4763 def parse_set_operation( 4764 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4765 ) -> t.Optional[exp.Expression]: 4766 start = self._index 4767 _, side_token, kind_token = self._parse_join_parts() 4768 4769 side = side_token.text if side_token else None 4770 kind = kind_token.text if kind_token else None 4771 4772 if not self._match_set(self.SET_OPERATIONS): 4773 self._retreat(start) 4774 return None 4775 4776 token_type = self._prev.token_type 4777 4778 if token_type == TokenType.UNION: 4779 operation: t.Type[exp.SetOperation] = exp.Union 4780 elif token_type == TokenType.EXCEPT: 4781 operation = exp.Except 4782 else: 4783 operation = exp.Intersect 4784 4785 comments = self._prev.comments 4786 4787 if self._match(TokenType.DISTINCT): 4788 distinct: t.Optional[bool] = True 4789 elif self._match(TokenType.ALL): 4790 distinct = False 4791 else: 4792 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4793 if distinct is None: 4794 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4795 4796 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4797 "STRICT", "CORRESPONDING" 4798 ) 4799 if self._match_text_seq("CORRESPONDING"): 4800 by_name = True 4801 if not side and not kind: 4802 kind = "INNER" 4803 4804 on_column_list = None 4805 if by_name and self._match_texts(("ON", "BY")): 4806 on_column_list = self._parse_wrapped_csv(self._parse_column) 4807 4808 expression = self._parse_select( 4809 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4810 ) 4811 4812 return self.expression( 4813 operation, 4814 comments=comments, 4815 this=this, 4816 distinct=distinct, 4817 by_name=by_name, 4818 expression=expression, 4819 side=side, 4820 kind=kind, 4821 on=on_column_list, 4822 )