sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PIPE_SYNTAX_TRANSFORM_PARSERS = { 934 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 935 "WHERE": lambda self, query: self._parse_pipe_syntax_where(query), 936 "ORDER BY": lambda self, query: query.order_by(self._parse_order(), copy=False), 937 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 938 "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False), 939 } 940 941 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 942 "ALLOWED_VALUES": lambda self: self.expression( 943 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 944 ), 945 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 946 "AUTO": lambda self: self._parse_auto_property(), 947 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 948 "BACKUP": lambda self: self.expression( 949 exp.BackupProperty, this=self._parse_var(any_token=True) 950 ), 951 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 952 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 953 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 954 "CHECKSUM": lambda self: self._parse_checksum(), 955 "CLUSTER BY": lambda self: self._parse_cluster(), 956 "CLUSTERED": lambda self: self._parse_clustered_by(), 957 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 958 exp.CollateProperty, **kwargs 959 ), 960 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 961 "CONTAINS": lambda self: self._parse_contains_property(), 962 "COPY": lambda self: self._parse_copy_property(), 963 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 964 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 965 "DEFINER": lambda self: self._parse_definer(), 966 "DETERMINISTIC": lambda self: self.expression( 967 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 968 ), 969 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 970 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 971 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 972 "DISTKEY": lambda self: self._parse_distkey(), 973 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 974 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 975 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 976 "ENVIRONMENT": lambda self: self.expression( 977 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 978 ), 979 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 980 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 981 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 982 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 983 "FREESPACE": lambda self: self._parse_freespace(), 984 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 985 "HEAP": lambda self: self.expression(exp.HeapProperty), 986 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 987 "IMMUTABLE": lambda self: self.expression( 988 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 989 ), 990 "INHERITS": lambda self: self.expression( 991 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 992 ), 993 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 994 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 995 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 996 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 997 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 998 "LIKE": lambda self: self._parse_create_like(), 999 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1000 "LOCK": lambda self: self._parse_locking(), 1001 "LOCKING": lambda self: self._parse_locking(), 1002 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1003 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1004 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1005 "MODIFIES": lambda self: self._parse_modifies_property(), 1006 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1007 "NO": lambda self: self._parse_no_property(), 1008 "ON": lambda self: self._parse_on_property(), 1009 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1010 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1011 "PARTITION": lambda self: self._parse_partitioned_of(), 1012 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1013 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1014 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1015 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1016 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1017 "READS": lambda self: self._parse_reads_property(), 1018 "REMOTE": lambda self: self._parse_remote_with_connection(), 1019 "RETURNS": lambda self: self._parse_returns(), 1020 "STRICT": lambda self: self.expression(exp.StrictProperty), 1021 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1022 "ROW": lambda self: self._parse_row(), 1023 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1024 "SAMPLE": lambda self: self.expression( 1025 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1026 ), 1027 "SECURE": lambda self: self.expression(exp.SecureProperty), 1028 "SECURITY": lambda self: self._parse_security(), 1029 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1030 "SETTINGS": lambda self: self._parse_settings_property(), 1031 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1032 "SORTKEY": lambda self: self._parse_sortkey(), 1033 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1034 "STABLE": lambda self: self.expression( 1035 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1036 ), 1037 "STORED": lambda self: self._parse_stored(), 1038 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1039 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1040 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1041 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1042 "TO": lambda self: self._parse_to_table(), 1043 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1044 "TRANSFORM": lambda self: self.expression( 1045 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1046 ), 1047 "TTL": lambda self: self._parse_ttl(), 1048 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1049 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1050 "VOLATILE": lambda self: self._parse_volatile_property(), 1051 "WITH": lambda self: self._parse_with_property(), 1052 } 1053 1054 CONSTRAINT_PARSERS = { 1055 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1056 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1057 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1058 "CHARACTER SET": lambda self: self.expression( 1059 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1060 ), 1061 "CHECK": lambda self: self.expression( 1062 exp.CheckColumnConstraint, 1063 this=self._parse_wrapped(self._parse_assignment), 1064 enforced=self._match_text_seq("ENFORCED"), 1065 ), 1066 "COLLATE": lambda self: self.expression( 1067 exp.CollateColumnConstraint, 1068 this=self._parse_identifier() or self._parse_column(), 1069 ), 1070 "COMMENT": lambda self: self.expression( 1071 exp.CommentColumnConstraint, this=self._parse_string() 1072 ), 1073 "COMPRESS": lambda self: self._parse_compress(), 1074 "CLUSTERED": lambda self: self.expression( 1075 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1076 ), 1077 "NONCLUSTERED": lambda self: self.expression( 1078 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1079 ), 1080 "DEFAULT": lambda self: self.expression( 1081 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1082 ), 1083 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1084 "EPHEMERAL": lambda self: self.expression( 1085 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1086 ), 1087 "EXCLUDE": lambda self: self.expression( 1088 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1089 ), 1090 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1091 "FORMAT": lambda self: self.expression( 1092 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1093 ), 1094 "GENERATED": lambda self: self._parse_generated_as_identity(), 1095 "IDENTITY": lambda self: self._parse_auto_increment(), 1096 "INLINE": lambda self: self._parse_inline(), 1097 "LIKE": lambda self: self._parse_create_like(), 1098 "NOT": lambda self: self._parse_not_constraint(), 1099 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1100 "ON": lambda self: ( 1101 self._match(TokenType.UPDATE) 1102 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1103 ) 1104 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1105 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1106 "PERIOD": lambda self: self._parse_period_for_system_time(), 1107 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1108 "REFERENCES": lambda self: self._parse_references(match=False), 1109 "TITLE": lambda self: self.expression( 1110 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1111 ), 1112 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1113 "UNIQUE": lambda self: self._parse_unique(), 1114 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1115 "WATERMARK": lambda self: self.expression( 1116 exp.WatermarkColumnConstraint, 1117 this=self._match(TokenType.FOR) and self._parse_column(), 1118 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1119 ), 1120 "WITH": lambda self: self.expression( 1121 exp.Properties, expressions=self._parse_wrapped_properties() 1122 ), 1123 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1124 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1125 } 1126 1127 def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query: 1128 select = self._parse_select() 1129 if isinstance(select, exp.Select): 1130 return select.from_(query.subquery(copy=False), copy=False) 1131 return query 1132 1133 def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query: 1134 where = self._parse_where() 1135 return query.where(where, copy=False) 1136 1137 def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query: 1138 limit = self._parse_limit() 1139 offset = self._parse_offset() 1140 if limit: 1141 query.limit(limit, copy=False) 1142 if offset: 1143 query.offset(offset, copy=False) 1144 return query 1145 1146 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1147 klass = ( 1148 exp.PartitionedByBucket 1149 if self._prev.text.upper() == "BUCKET" 1150 else exp.PartitionByTruncate 1151 ) 1152 1153 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1154 this, expression = seq_get(args, 0), seq_get(args, 1) 1155 1156 if isinstance(this, exp.Literal): 1157 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1158 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1159 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1160 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1161 # 1162 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1163 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1164 this, expression = expression, this 1165 1166 return self.expression(klass, this=this, expression=expression) 1167 1168 ALTER_PARSERS = { 1169 "ADD": lambda self: self._parse_alter_table_add(), 1170 "AS": lambda self: self._parse_select(), 1171 "ALTER": lambda self: self._parse_alter_table_alter(), 1172 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1173 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1174 "DROP": lambda self: self._parse_alter_table_drop(), 1175 "RENAME": lambda self: self._parse_alter_table_rename(), 1176 "SET": lambda self: self._parse_alter_table_set(), 1177 "SWAP": lambda self: self.expression( 1178 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1179 ), 1180 } 1181 1182 ALTER_ALTER_PARSERS = { 1183 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1184 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1185 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1186 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1187 } 1188 1189 SCHEMA_UNNAMED_CONSTRAINTS = { 1190 "CHECK", 1191 "EXCLUDE", 1192 "FOREIGN KEY", 1193 "LIKE", 1194 "PERIOD", 1195 "PRIMARY KEY", 1196 "UNIQUE", 1197 "WATERMARK", 1198 "BUCKET", 1199 "TRUNCATE", 1200 } 1201 1202 NO_PAREN_FUNCTION_PARSERS = { 1203 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1204 "CASE": lambda self: self._parse_case(), 1205 "CONNECT_BY_ROOT": lambda self: self.expression( 1206 exp.ConnectByRoot, this=self._parse_column() 1207 ), 1208 "IF": lambda self: self._parse_if(), 1209 } 1210 1211 INVALID_FUNC_NAME_TOKENS = { 1212 TokenType.IDENTIFIER, 1213 TokenType.STRING, 1214 } 1215 1216 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1217 1218 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1219 1220 FUNCTION_PARSERS = { 1221 **{ 1222 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1223 }, 1224 **{ 1225 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1226 }, 1227 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1228 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1229 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1230 "DECODE": lambda self: self._parse_decode(), 1231 "EXTRACT": lambda self: self._parse_extract(), 1232 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1233 "GAP_FILL": lambda self: self._parse_gap_fill(), 1234 "JSON_OBJECT": lambda self: self._parse_json_object(), 1235 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1236 "JSON_TABLE": lambda self: self._parse_json_table(), 1237 "MATCH": lambda self: self._parse_match_against(), 1238 "NORMALIZE": lambda self: self._parse_normalize(), 1239 "OPENJSON": lambda self: self._parse_open_json(), 1240 "OVERLAY": lambda self: self._parse_overlay(), 1241 "POSITION": lambda self: self._parse_position(), 1242 "PREDICT": lambda self: self._parse_predict(), 1243 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1244 "STRING_AGG": lambda self: self._parse_string_agg(), 1245 "SUBSTRING": lambda self: self._parse_substring(), 1246 "TRIM": lambda self: self._parse_trim(), 1247 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1248 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1249 "XMLELEMENT": lambda self: self.expression( 1250 exp.XMLElement, 1251 this=self._match_text_seq("NAME") and self._parse_id_var(), 1252 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1253 ), 1254 "XMLTABLE": lambda self: self._parse_xml_table(), 1255 } 1256 1257 QUERY_MODIFIER_PARSERS = { 1258 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1259 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1260 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1261 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1262 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1263 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1264 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1265 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1266 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1267 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1268 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1269 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1270 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1271 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1272 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1273 TokenType.CLUSTER_BY: lambda self: ( 1274 "cluster", 1275 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1276 ), 1277 TokenType.DISTRIBUTE_BY: lambda self: ( 1278 "distribute", 1279 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1280 ), 1281 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1282 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1283 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1284 } 1285 1286 SET_PARSERS = { 1287 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1288 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1289 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1290 "TRANSACTION": lambda self: self._parse_set_transaction(), 1291 } 1292 1293 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1294 1295 TYPE_LITERAL_PARSERS = { 1296 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1297 } 1298 1299 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1300 1301 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1302 1303 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1304 1305 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1306 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1307 "ISOLATION": ( 1308 ("LEVEL", "REPEATABLE", "READ"), 1309 ("LEVEL", "READ", "COMMITTED"), 1310 ("LEVEL", "READ", "UNCOMITTED"), 1311 ("LEVEL", "SERIALIZABLE"), 1312 ), 1313 "READ": ("WRITE", "ONLY"), 1314 } 1315 1316 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1317 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1318 ) 1319 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1320 1321 CREATE_SEQUENCE: OPTIONS_TYPE = { 1322 "SCALE": ("EXTEND", "NOEXTEND"), 1323 "SHARD": ("EXTEND", "NOEXTEND"), 1324 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1325 **dict.fromkeys( 1326 ( 1327 "SESSION", 1328 "GLOBAL", 1329 "KEEP", 1330 "NOKEEP", 1331 "ORDER", 1332 "NOORDER", 1333 "NOCACHE", 1334 "CYCLE", 1335 "NOCYCLE", 1336 "NOMINVALUE", 1337 "NOMAXVALUE", 1338 "NOSCALE", 1339 "NOSHARD", 1340 ), 1341 tuple(), 1342 ), 1343 } 1344 1345 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1346 1347 USABLES: OPTIONS_TYPE = dict.fromkeys( 1348 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1349 ) 1350 1351 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1352 1353 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1354 "TYPE": ("EVOLUTION",), 1355 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1356 } 1357 1358 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1359 1360 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1361 1362 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1363 "NOT": ("ENFORCED",), 1364 "MATCH": ( 1365 "FULL", 1366 "PARTIAL", 1367 "SIMPLE", 1368 ), 1369 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1370 "USING": ( 1371 "BTREE", 1372 "HASH", 1373 ), 1374 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1375 } 1376 1377 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1378 "NO": ("OTHERS",), 1379 "CURRENT": ("ROW",), 1380 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1381 } 1382 1383 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1384 1385 CLONE_KEYWORDS = {"CLONE", "COPY"} 1386 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1387 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1388 1389 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1390 1391 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1392 1393 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1394 1395 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1396 1397 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1398 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1399 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1400 1401 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1402 1403 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1404 1405 ADD_CONSTRAINT_TOKENS = { 1406 TokenType.CONSTRAINT, 1407 TokenType.FOREIGN_KEY, 1408 TokenType.INDEX, 1409 TokenType.KEY, 1410 TokenType.PRIMARY_KEY, 1411 TokenType.UNIQUE, 1412 } 1413 1414 DISTINCT_TOKENS = {TokenType.DISTINCT} 1415 1416 NULL_TOKENS = {TokenType.NULL} 1417 1418 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1419 1420 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1421 1422 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1423 1424 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1425 1426 ODBC_DATETIME_LITERALS = { 1427 "d": exp.Date, 1428 "t": exp.Time, 1429 "ts": exp.Timestamp, 1430 } 1431 1432 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1433 1434 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1435 1436 # The style options for the DESCRIBE statement 1437 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1438 1439 # The style options for the ANALYZE statement 1440 ANALYZE_STYLES = { 1441 "BUFFER_USAGE_LIMIT", 1442 "FULL", 1443 "LOCAL", 1444 "NO_WRITE_TO_BINLOG", 1445 "SAMPLE", 1446 "SKIP_LOCKED", 1447 "VERBOSE", 1448 } 1449 1450 ANALYZE_EXPRESSION_PARSERS = { 1451 "ALL": lambda self: self._parse_analyze_columns(), 1452 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1453 "DELETE": lambda self: self._parse_analyze_delete(), 1454 "DROP": lambda self: self._parse_analyze_histogram(), 1455 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1456 "LIST": lambda self: self._parse_analyze_list(), 1457 "PREDICATE": lambda self: self._parse_analyze_columns(), 1458 "UPDATE": lambda self: self._parse_analyze_histogram(), 1459 "VALIDATE": lambda self: self._parse_analyze_validate(), 1460 } 1461 1462 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1463 1464 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1465 1466 OPERATION_MODIFIERS: t.Set[str] = set() 1467 1468 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1469 1470 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1471 1472 STRICT_CAST = True 1473 1474 PREFIXED_PIVOT_COLUMNS = False 1475 IDENTIFY_PIVOT_STRINGS = False 1476 1477 LOG_DEFAULTS_TO_LN = False 1478 1479 # Whether the table sample clause expects CSV syntax 1480 TABLESAMPLE_CSV = False 1481 1482 # The default method used for table sampling 1483 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1484 1485 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1486 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1487 1488 # Whether the TRIM function expects the characters to trim as its first argument 1489 TRIM_PATTERN_FIRST = False 1490 1491 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1492 STRING_ALIASES = False 1493 1494 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1495 MODIFIERS_ATTACHED_TO_SET_OP = True 1496 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1497 1498 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1499 NO_PAREN_IF_COMMANDS = True 1500 1501 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1502 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1503 1504 # Whether the `:` operator is used to extract a value from a VARIANT column 1505 COLON_IS_VARIANT_EXTRACT = False 1506 1507 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1508 # If this is True and '(' is not found, the keyword will be treated as an identifier 1509 VALUES_FOLLOWED_BY_PAREN = True 1510 1511 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1512 SUPPORTS_IMPLICIT_UNNEST = False 1513 1514 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1515 INTERVAL_SPANS = True 1516 1517 # Whether a PARTITION clause can follow a table reference 1518 SUPPORTS_PARTITION_SELECTION = False 1519 1520 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1521 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1522 1523 # Whether the 'AS' keyword is optional in the CTE definition syntax 1524 OPTIONAL_ALIAS_TOKEN_CTE = True 1525 1526 __slots__ = ( 1527 "error_level", 1528 "error_message_context", 1529 "max_errors", 1530 "dialect", 1531 "sql", 1532 "errors", 1533 "_tokens", 1534 "_index", 1535 "_curr", 1536 "_next", 1537 "_prev", 1538 "_prev_comments", 1539 ) 1540 1541 # Autofilled 1542 SHOW_TRIE: t.Dict = {} 1543 SET_TRIE: t.Dict = {} 1544 1545 def __init__( 1546 self, 1547 error_level: t.Optional[ErrorLevel] = None, 1548 error_message_context: int = 100, 1549 max_errors: int = 3, 1550 dialect: DialectType = None, 1551 ): 1552 from sqlglot.dialects import Dialect 1553 1554 self.error_level = error_level or ErrorLevel.IMMEDIATE 1555 self.error_message_context = error_message_context 1556 self.max_errors = max_errors 1557 self.dialect = Dialect.get_or_raise(dialect) 1558 self.reset() 1559 1560 def reset(self): 1561 self.sql = "" 1562 self.errors = [] 1563 self._tokens = [] 1564 self._index = 0 1565 self._curr = None 1566 self._next = None 1567 self._prev = None 1568 self._prev_comments = None 1569 1570 def parse( 1571 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1572 ) -> t.List[t.Optional[exp.Expression]]: 1573 """ 1574 Parses a list of tokens and returns a list of syntax trees, one tree 1575 per parsed SQL statement. 1576 1577 Args: 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The list of the produced syntax trees. 1583 """ 1584 return self._parse( 1585 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1586 ) 1587 1588 def parse_into( 1589 self, 1590 expression_types: exp.IntoType, 1591 raw_tokens: t.List[Token], 1592 sql: t.Optional[str] = None, 1593 ) -> t.List[t.Optional[exp.Expression]]: 1594 """ 1595 Parses a list of tokens into a given Expression type. If a collection of Expression 1596 types is given instead, this method will try to parse the token list into each one 1597 of them, stopping at the first for which the parsing succeeds. 1598 1599 Args: 1600 expression_types: The expression type(s) to try and parse the token list into. 1601 raw_tokens: The list of tokens. 1602 sql: The original SQL string, used to produce helpful debug messages. 1603 1604 Returns: 1605 The target Expression. 1606 """ 1607 errors = [] 1608 for expression_type in ensure_list(expression_types): 1609 parser = self.EXPRESSION_PARSERS.get(expression_type) 1610 if not parser: 1611 raise TypeError(f"No parser registered for {expression_type}") 1612 1613 try: 1614 return self._parse(parser, raw_tokens, sql) 1615 except ParseError as e: 1616 e.errors[0]["into_expression"] = expression_type 1617 errors.append(e) 1618 1619 raise ParseError( 1620 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1621 errors=merge_errors(errors), 1622 ) from errors[-1] 1623 1624 def _parse( 1625 self, 1626 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1627 raw_tokens: t.List[Token], 1628 sql: t.Optional[str] = None, 1629 ) -> t.List[t.Optional[exp.Expression]]: 1630 self.reset() 1631 self.sql = sql or "" 1632 1633 total = len(raw_tokens) 1634 chunks: t.List[t.List[Token]] = [[]] 1635 1636 for i, token in enumerate(raw_tokens): 1637 if token.token_type == TokenType.SEMICOLON: 1638 if token.comments: 1639 chunks.append([token]) 1640 1641 if i < total - 1: 1642 chunks.append([]) 1643 else: 1644 chunks[-1].append(token) 1645 1646 expressions = [] 1647 1648 for tokens in chunks: 1649 self._index = -1 1650 self._tokens = tokens 1651 self._advance() 1652 1653 expressions.append(parse_method(self)) 1654 1655 if self._index < len(self._tokens): 1656 self.raise_error("Invalid expression / Unexpected token") 1657 1658 self.check_errors() 1659 1660 return expressions 1661 1662 def check_errors(self) -> None: 1663 """Logs or raises any found errors, depending on the chosen error level setting.""" 1664 if self.error_level == ErrorLevel.WARN: 1665 for error in self.errors: 1666 logger.error(str(error)) 1667 elif self.error_level == ErrorLevel.RAISE and self.errors: 1668 raise ParseError( 1669 concat_messages(self.errors, self.max_errors), 1670 errors=merge_errors(self.errors), 1671 ) 1672 1673 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1674 """ 1675 Appends an error in the list of recorded errors or raises it, depending on the chosen 1676 error level setting. 1677 """ 1678 token = token or self._curr or self._prev or Token.string("") 1679 start = token.start 1680 end = token.end + 1 1681 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1682 highlight = self.sql[start:end] 1683 end_context = self.sql[end : end + self.error_message_context] 1684 1685 error = ParseError.new( 1686 f"{message}. Line {token.line}, Col: {token.col}.\n" 1687 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1688 description=message, 1689 line=token.line, 1690 col=token.col, 1691 start_context=start_context, 1692 highlight=highlight, 1693 end_context=end_context, 1694 ) 1695 1696 if self.error_level == ErrorLevel.IMMEDIATE: 1697 raise error 1698 1699 self.errors.append(error) 1700 1701 def expression( 1702 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1703 ) -> E: 1704 """ 1705 Creates a new, validated Expression. 1706 1707 Args: 1708 exp_class: The expression class to instantiate. 1709 comments: An optional list of comments to attach to the expression. 1710 kwargs: The arguments to set for the expression along with their respective values. 1711 1712 Returns: 1713 The target expression. 1714 """ 1715 instance = exp_class(**kwargs) 1716 instance.add_comments(comments) if comments else self._add_comments(instance) 1717 return self.validate_expression(instance) 1718 1719 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1720 if expression and self._prev_comments: 1721 expression.add_comments(self._prev_comments) 1722 self._prev_comments = None 1723 1724 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1725 """ 1726 Validates an Expression, making sure that all its mandatory arguments are set. 1727 1728 Args: 1729 expression: The expression to validate. 1730 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1731 1732 Returns: 1733 The validated expression. 1734 """ 1735 if self.error_level != ErrorLevel.IGNORE: 1736 for error_message in expression.error_messages(args): 1737 self.raise_error(error_message) 1738 1739 return expression 1740 1741 def _find_sql(self, start: Token, end: Token) -> str: 1742 return self.sql[start.start : end.end + 1] 1743 1744 def _is_connected(self) -> bool: 1745 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1746 1747 def _advance(self, times: int = 1) -> None: 1748 self._index += times 1749 self._curr = seq_get(self._tokens, self._index) 1750 self._next = seq_get(self._tokens, self._index + 1) 1751 1752 if self._index > 0: 1753 self._prev = self._tokens[self._index - 1] 1754 self._prev_comments = self._prev.comments 1755 else: 1756 self._prev = None 1757 self._prev_comments = None 1758 1759 def _retreat(self, index: int) -> None: 1760 if index != self._index: 1761 self._advance(index - self._index) 1762 1763 def _warn_unsupported(self) -> None: 1764 if len(self._tokens) <= 1: 1765 return 1766 1767 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1768 # interested in emitting a warning for the one being currently processed. 1769 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1770 1771 logger.warning( 1772 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1773 ) 1774 1775 def _parse_command(self) -> exp.Command: 1776 self._warn_unsupported() 1777 return self.expression( 1778 exp.Command, 1779 comments=self._prev_comments, 1780 this=self._prev.text.upper(), 1781 expression=self._parse_string(), 1782 ) 1783 1784 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1785 """ 1786 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1787 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1788 solve this by setting & resetting the parser state accordingly 1789 """ 1790 index = self._index 1791 error_level = self.error_level 1792 1793 self.error_level = ErrorLevel.IMMEDIATE 1794 try: 1795 this = parse_method() 1796 except ParseError: 1797 this = None 1798 finally: 1799 if not this or retreat: 1800 self._retreat(index) 1801 self.error_level = error_level 1802 1803 return this 1804 1805 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1806 start = self._prev 1807 exists = self._parse_exists() if allow_exists else None 1808 1809 self._match(TokenType.ON) 1810 1811 materialized = self._match_text_seq("MATERIALIZED") 1812 kind = self._match_set(self.CREATABLES) and self._prev 1813 if not kind: 1814 return self._parse_as_command(start) 1815 1816 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1817 this = self._parse_user_defined_function(kind=kind.token_type) 1818 elif kind.token_type == TokenType.TABLE: 1819 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1820 elif kind.token_type == TokenType.COLUMN: 1821 this = self._parse_column() 1822 else: 1823 this = self._parse_id_var() 1824 1825 self._match(TokenType.IS) 1826 1827 return self.expression( 1828 exp.Comment, 1829 this=this, 1830 kind=kind.text, 1831 expression=self._parse_string(), 1832 exists=exists, 1833 materialized=materialized, 1834 ) 1835 1836 def _parse_to_table( 1837 self, 1838 ) -> exp.ToTableProperty: 1839 table = self._parse_table_parts(schema=True) 1840 return self.expression(exp.ToTableProperty, this=table) 1841 1842 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1843 def _parse_ttl(self) -> exp.Expression: 1844 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1845 this = self._parse_bitwise() 1846 1847 if self._match_text_seq("DELETE"): 1848 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1849 if self._match_text_seq("RECOMPRESS"): 1850 return self.expression( 1851 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1852 ) 1853 if self._match_text_seq("TO", "DISK"): 1854 return self.expression( 1855 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1856 ) 1857 if self._match_text_seq("TO", "VOLUME"): 1858 return self.expression( 1859 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1860 ) 1861 1862 return this 1863 1864 expressions = self._parse_csv(_parse_ttl_action) 1865 where = self._parse_where() 1866 group = self._parse_group() 1867 1868 aggregates = None 1869 if group and self._match(TokenType.SET): 1870 aggregates = self._parse_csv(self._parse_set_item) 1871 1872 return self.expression( 1873 exp.MergeTreeTTL, 1874 expressions=expressions, 1875 where=where, 1876 group=group, 1877 aggregates=aggregates, 1878 ) 1879 1880 def _parse_statement(self) -> t.Optional[exp.Expression]: 1881 if self._curr is None: 1882 return None 1883 1884 if self._match_set(self.STATEMENT_PARSERS): 1885 comments = self._prev_comments 1886 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1887 stmt.add_comments(comments, prepend=True) 1888 return stmt 1889 1890 if self._match_set(self.dialect.tokenizer.COMMANDS): 1891 return self._parse_command() 1892 1893 expression = self._parse_expression() 1894 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1895 return self._parse_query_modifiers(expression) 1896 1897 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1898 start = self._prev 1899 temporary = self._match(TokenType.TEMPORARY) 1900 materialized = self._match_text_seq("MATERIALIZED") 1901 1902 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1903 if not kind: 1904 return self._parse_as_command(start) 1905 1906 concurrently = self._match_text_seq("CONCURRENTLY") 1907 if_exists = exists or self._parse_exists() 1908 1909 if kind == "COLUMN": 1910 this = self._parse_column() 1911 else: 1912 this = self._parse_table_parts( 1913 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1914 ) 1915 1916 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1917 1918 if self._match(TokenType.L_PAREN, advance=False): 1919 expressions = self._parse_wrapped_csv(self._parse_types) 1920 else: 1921 expressions = None 1922 1923 return self.expression( 1924 exp.Drop, 1925 exists=if_exists, 1926 this=this, 1927 expressions=expressions, 1928 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1929 temporary=temporary, 1930 materialized=materialized, 1931 cascade=self._match_text_seq("CASCADE"), 1932 constraints=self._match_text_seq("CONSTRAINTS"), 1933 purge=self._match_text_seq("PURGE"), 1934 cluster=cluster, 1935 concurrently=concurrently, 1936 ) 1937 1938 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1939 return ( 1940 self._match_text_seq("IF") 1941 and (not not_ or self._match(TokenType.NOT)) 1942 and self._match(TokenType.EXISTS) 1943 ) 1944 1945 def _parse_create(self) -> exp.Create | exp.Command: 1946 # Note: this can't be None because we've matched a statement parser 1947 start = self._prev 1948 1949 replace = ( 1950 start.token_type == TokenType.REPLACE 1951 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1952 or self._match_pair(TokenType.OR, TokenType.ALTER) 1953 ) 1954 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1955 1956 unique = self._match(TokenType.UNIQUE) 1957 1958 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1959 clustered = True 1960 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1961 "COLUMNSTORE" 1962 ): 1963 clustered = False 1964 else: 1965 clustered = None 1966 1967 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1968 self._advance() 1969 1970 properties = None 1971 create_token = self._match_set(self.CREATABLES) and self._prev 1972 1973 if not create_token: 1974 # exp.Properties.Location.POST_CREATE 1975 properties = self._parse_properties() 1976 create_token = self._match_set(self.CREATABLES) and self._prev 1977 1978 if not properties or not create_token: 1979 return self._parse_as_command(start) 1980 1981 concurrently = self._match_text_seq("CONCURRENTLY") 1982 exists = self._parse_exists(not_=True) 1983 this = None 1984 expression: t.Optional[exp.Expression] = None 1985 indexes = None 1986 no_schema_binding = None 1987 begin = None 1988 end = None 1989 clone = None 1990 1991 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1992 nonlocal properties 1993 if properties and temp_props: 1994 properties.expressions.extend(temp_props.expressions) 1995 elif temp_props: 1996 properties = temp_props 1997 1998 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1999 this = self._parse_user_defined_function(kind=create_token.token_type) 2000 2001 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2002 extend_props(self._parse_properties()) 2003 2004 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2005 extend_props(self._parse_properties()) 2006 2007 if not expression: 2008 if self._match(TokenType.COMMAND): 2009 expression = self._parse_as_command(self._prev) 2010 else: 2011 begin = self._match(TokenType.BEGIN) 2012 return_ = self._match_text_seq("RETURN") 2013 2014 if self._match(TokenType.STRING, advance=False): 2015 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2016 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2017 expression = self._parse_string() 2018 extend_props(self._parse_properties()) 2019 else: 2020 expression = self._parse_user_defined_function_expression() 2021 2022 end = self._match_text_seq("END") 2023 2024 if return_: 2025 expression = self.expression(exp.Return, this=expression) 2026 elif create_token.token_type == TokenType.INDEX: 2027 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2028 if not self._match(TokenType.ON): 2029 index = self._parse_id_var() 2030 anonymous = False 2031 else: 2032 index = None 2033 anonymous = True 2034 2035 this = self._parse_index(index=index, anonymous=anonymous) 2036 elif create_token.token_type in self.DB_CREATABLES: 2037 table_parts = self._parse_table_parts( 2038 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2039 ) 2040 2041 # exp.Properties.Location.POST_NAME 2042 self._match(TokenType.COMMA) 2043 extend_props(self._parse_properties(before=True)) 2044 2045 this = self._parse_schema(this=table_parts) 2046 2047 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2048 extend_props(self._parse_properties()) 2049 2050 has_alias = self._match(TokenType.ALIAS) 2051 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2052 # exp.Properties.Location.POST_ALIAS 2053 extend_props(self._parse_properties()) 2054 2055 if create_token.token_type == TokenType.SEQUENCE: 2056 expression = self._parse_types() 2057 extend_props(self._parse_properties()) 2058 else: 2059 expression = self._parse_ddl_select() 2060 2061 # Some dialects also support using a table as an alias instead of a SELECT. 2062 # Here we fallback to this as an alternative. 2063 if not expression and has_alias: 2064 expression = self._try_parse(self._parse_table_parts) 2065 2066 if create_token.token_type == TokenType.TABLE: 2067 # exp.Properties.Location.POST_EXPRESSION 2068 extend_props(self._parse_properties()) 2069 2070 indexes = [] 2071 while True: 2072 index = self._parse_index() 2073 2074 # exp.Properties.Location.POST_INDEX 2075 extend_props(self._parse_properties()) 2076 if not index: 2077 break 2078 else: 2079 self._match(TokenType.COMMA) 2080 indexes.append(index) 2081 elif create_token.token_type == TokenType.VIEW: 2082 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2083 no_schema_binding = True 2084 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2085 extend_props(self._parse_properties()) 2086 2087 shallow = self._match_text_seq("SHALLOW") 2088 2089 if self._match_texts(self.CLONE_KEYWORDS): 2090 copy = self._prev.text.lower() == "copy" 2091 clone = self.expression( 2092 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2093 ) 2094 2095 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2096 return self._parse_as_command(start) 2097 2098 create_kind_text = create_token.text.upper() 2099 return self.expression( 2100 exp.Create, 2101 this=this, 2102 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2103 replace=replace, 2104 refresh=refresh, 2105 unique=unique, 2106 expression=expression, 2107 exists=exists, 2108 properties=properties, 2109 indexes=indexes, 2110 no_schema_binding=no_schema_binding, 2111 begin=begin, 2112 end=end, 2113 clone=clone, 2114 concurrently=concurrently, 2115 clustered=clustered, 2116 ) 2117 2118 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2119 seq = exp.SequenceProperties() 2120 2121 options = [] 2122 index = self._index 2123 2124 while self._curr: 2125 self._match(TokenType.COMMA) 2126 if self._match_text_seq("INCREMENT"): 2127 self._match_text_seq("BY") 2128 self._match_text_seq("=") 2129 seq.set("increment", self._parse_term()) 2130 elif self._match_text_seq("MINVALUE"): 2131 seq.set("minvalue", self._parse_term()) 2132 elif self._match_text_seq("MAXVALUE"): 2133 seq.set("maxvalue", self._parse_term()) 2134 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2135 self._match_text_seq("=") 2136 seq.set("start", self._parse_term()) 2137 elif self._match_text_seq("CACHE"): 2138 # T-SQL allows empty CACHE which is initialized dynamically 2139 seq.set("cache", self._parse_number() or True) 2140 elif self._match_text_seq("OWNED", "BY"): 2141 # "OWNED BY NONE" is the default 2142 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2143 else: 2144 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2145 if opt: 2146 options.append(opt) 2147 else: 2148 break 2149 2150 seq.set("options", options if options else None) 2151 return None if self._index == index else seq 2152 2153 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2154 # only used for teradata currently 2155 self._match(TokenType.COMMA) 2156 2157 kwargs = { 2158 "no": self._match_text_seq("NO"), 2159 "dual": self._match_text_seq("DUAL"), 2160 "before": self._match_text_seq("BEFORE"), 2161 "default": self._match_text_seq("DEFAULT"), 2162 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2163 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2164 "after": self._match_text_seq("AFTER"), 2165 "minimum": self._match_texts(("MIN", "MINIMUM")), 2166 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2167 } 2168 2169 if self._match_texts(self.PROPERTY_PARSERS): 2170 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2171 try: 2172 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2173 except TypeError: 2174 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2175 2176 return None 2177 2178 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2179 return self._parse_wrapped_csv(self._parse_property) 2180 2181 def _parse_property(self) -> t.Optional[exp.Expression]: 2182 if self._match_texts(self.PROPERTY_PARSERS): 2183 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2184 2185 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2186 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2187 2188 if self._match_text_seq("COMPOUND", "SORTKEY"): 2189 return self._parse_sortkey(compound=True) 2190 2191 if self._match_text_seq("SQL", "SECURITY"): 2192 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2193 2194 index = self._index 2195 key = self._parse_column() 2196 2197 if not self._match(TokenType.EQ): 2198 self._retreat(index) 2199 return self._parse_sequence_properties() 2200 2201 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2202 if isinstance(key, exp.Column): 2203 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2204 2205 value = self._parse_bitwise() or self._parse_var(any_token=True) 2206 2207 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2208 if isinstance(value, exp.Column): 2209 value = exp.var(value.name) 2210 2211 return self.expression(exp.Property, this=key, value=value) 2212 2213 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2214 if self._match_text_seq("BY"): 2215 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2216 2217 self._match(TokenType.ALIAS) 2218 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2219 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2220 2221 return self.expression( 2222 exp.FileFormatProperty, 2223 this=( 2224 self.expression( 2225 exp.InputOutputFormat, 2226 input_format=input_format, 2227 output_format=output_format, 2228 ) 2229 if input_format or output_format 2230 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2231 ), 2232 ) 2233 2234 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2235 field = self._parse_field() 2236 if isinstance(field, exp.Identifier) and not field.quoted: 2237 field = exp.var(field) 2238 2239 return field 2240 2241 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2242 self._match(TokenType.EQ) 2243 self._match(TokenType.ALIAS) 2244 2245 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2246 2247 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2248 properties = [] 2249 while True: 2250 if before: 2251 prop = self._parse_property_before() 2252 else: 2253 prop = self._parse_property() 2254 if not prop: 2255 break 2256 for p in ensure_list(prop): 2257 properties.append(p) 2258 2259 if properties: 2260 return self.expression(exp.Properties, expressions=properties) 2261 2262 return None 2263 2264 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2265 return self.expression( 2266 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2267 ) 2268 2269 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2270 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2271 security_specifier = self._prev.text.upper() 2272 return self.expression(exp.SecurityProperty, this=security_specifier) 2273 return None 2274 2275 def _parse_settings_property(self) -> exp.SettingsProperty: 2276 return self.expression( 2277 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2278 ) 2279 2280 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2281 if self._index >= 2: 2282 pre_volatile_token = self._tokens[self._index - 2] 2283 else: 2284 pre_volatile_token = None 2285 2286 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2287 return exp.VolatileProperty() 2288 2289 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2290 2291 def _parse_retention_period(self) -> exp.Var: 2292 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2293 number = self._parse_number() 2294 number_str = f"{number} " if number else "" 2295 unit = self._parse_var(any_token=True) 2296 return exp.var(f"{number_str}{unit}") 2297 2298 def _parse_system_versioning_property( 2299 self, with_: bool = False 2300 ) -> exp.WithSystemVersioningProperty: 2301 self._match(TokenType.EQ) 2302 prop = self.expression( 2303 exp.WithSystemVersioningProperty, 2304 **{ # type: ignore 2305 "on": True, 2306 "with": with_, 2307 }, 2308 ) 2309 2310 if self._match_text_seq("OFF"): 2311 prop.set("on", False) 2312 return prop 2313 2314 self._match(TokenType.ON) 2315 if self._match(TokenType.L_PAREN): 2316 while self._curr and not self._match(TokenType.R_PAREN): 2317 if self._match_text_seq("HISTORY_TABLE", "="): 2318 prop.set("this", self._parse_table_parts()) 2319 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2320 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2321 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2322 prop.set("retention_period", self._parse_retention_period()) 2323 2324 self._match(TokenType.COMMA) 2325 2326 return prop 2327 2328 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2329 self._match(TokenType.EQ) 2330 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2331 prop = self.expression(exp.DataDeletionProperty, on=on) 2332 2333 if self._match(TokenType.L_PAREN): 2334 while self._curr and not self._match(TokenType.R_PAREN): 2335 if self._match_text_seq("FILTER_COLUMN", "="): 2336 prop.set("filter_column", self._parse_column()) 2337 elif self._match_text_seq("RETENTION_PERIOD", "="): 2338 prop.set("retention_period", self._parse_retention_period()) 2339 2340 self._match(TokenType.COMMA) 2341 2342 return prop 2343 2344 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2345 kind = "HASH" 2346 expressions: t.Optional[t.List[exp.Expression]] = None 2347 if self._match_text_seq("BY", "HASH"): 2348 expressions = self._parse_wrapped_csv(self._parse_id_var) 2349 elif self._match_text_seq("BY", "RANDOM"): 2350 kind = "RANDOM" 2351 2352 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2353 buckets: t.Optional[exp.Expression] = None 2354 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2355 buckets = self._parse_number() 2356 2357 return self.expression( 2358 exp.DistributedByProperty, 2359 expressions=expressions, 2360 kind=kind, 2361 buckets=buckets, 2362 order=self._parse_order(), 2363 ) 2364 2365 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2366 self._match_text_seq("KEY") 2367 expressions = self._parse_wrapped_id_vars() 2368 return self.expression(expr_type, expressions=expressions) 2369 2370 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2371 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2372 prop = self._parse_system_versioning_property(with_=True) 2373 self._match_r_paren() 2374 return prop 2375 2376 if self._match(TokenType.L_PAREN, advance=False): 2377 return self._parse_wrapped_properties() 2378 2379 if self._match_text_seq("JOURNAL"): 2380 return self._parse_withjournaltable() 2381 2382 if self._match_texts(self.VIEW_ATTRIBUTES): 2383 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2384 2385 if self._match_text_seq("DATA"): 2386 return self._parse_withdata(no=False) 2387 elif self._match_text_seq("NO", "DATA"): 2388 return self._parse_withdata(no=True) 2389 2390 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2391 return self._parse_serde_properties(with_=True) 2392 2393 if self._match(TokenType.SCHEMA): 2394 return self.expression( 2395 exp.WithSchemaBindingProperty, 2396 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2397 ) 2398 2399 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2400 return self.expression( 2401 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2402 ) 2403 2404 if not self._next: 2405 return None 2406 2407 return self._parse_withisolatedloading() 2408 2409 def _parse_procedure_option(self) -> exp.Expression | None: 2410 if self._match_text_seq("EXECUTE", "AS"): 2411 return self.expression( 2412 exp.ExecuteAsProperty, 2413 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2414 or self._parse_string(), 2415 ) 2416 2417 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2418 2419 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2420 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2421 self._match(TokenType.EQ) 2422 2423 user = self._parse_id_var() 2424 self._match(TokenType.PARAMETER) 2425 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2426 2427 if not user or not host: 2428 return None 2429 2430 return exp.DefinerProperty(this=f"{user}@{host}") 2431 2432 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2433 self._match(TokenType.TABLE) 2434 self._match(TokenType.EQ) 2435 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2436 2437 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2438 return self.expression(exp.LogProperty, no=no) 2439 2440 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2441 return self.expression(exp.JournalProperty, **kwargs) 2442 2443 def _parse_checksum(self) -> exp.ChecksumProperty: 2444 self._match(TokenType.EQ) 2445 2446 on = None 2447 if self._match(TokenType.ON): 2448 on = True 2449 elif self._match_text_seq("OFF"): 2450 on = False 2451 2452 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2453 2454 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2455 return self.expression( 2456 exp.Cluster, 2457 expressions=( 2458 self._parse_wrapped_csv(self._parse_ordered) 2459 if wrapped 2460 else self._parse_csv(self._parse_ordered) 2461 ), 2462 ) 2463 2464 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2465 self._match_text_seq("BY") 2466 2467 self._match_l_paren() 2468 expressions = self._parse_csv(self._parse_column) 2469 self._match_r_paren() 2470 2471 if self._match_text_seq("SORTED", "BY"): 2472 self._match_l_paren() 2473 sorted_by = self._parse_csv(self._parse_ordered) 2474 self._match_r_paren() 2475 else: 2476 sorted_by = None 2477 2478 self._match(TokenType.INTO) 2479 buckets = self._parse_number() 2480 self._match_text_seq("BUCKETS") 2481 2482 return self.expression( 2483 exp.ClusteredByProperty, 2484 expressions=expressions, 2485 sorted_by=sorted_by, 2486 buckets=buckets, 2487 ) 2488 2489 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2490 if not self._match_text_seq("GRANTS"): 2491 self._retreat(self._index - 1) 2492 return None 2493 2494 return self.expression(exp.CopyGrantsProperty) 2495 2496 def _parse_freespace(self) -> exp.FreespaceProperty: 2497 self._match(TokenType.EQ) 2498 return self.expression( 2499 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2500 ) 2501 2502 def _parse_mergeblockratio( 2503 self, no: bool = False, default: bool = False 2504 ) -> exp.MergeBlockRatioProperty: 2505 if self._match(TokenType.EQ): 2506 return self.expression( 2507 exp.MergeBlockRatioProperty, 2508 this=self._parse_number(), 2509 percent=self._match(TokenType.PERCENT), 2510 ) 2511 2512 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2513 2514 def _parse_datablocksize( 2515 self, 2516 default: t.Optional[bool] = None, 2517 minimum: t.Optional[bool] = None, 2518 maximum: t.Optional[bool] = None, 2519 ) -> exp.DataBlocksizeProperty: 2520 self._match(TokenType.EQ) 2521 size = self._parse_number() 2522 2523 units = None 2524 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2525 units = self._prev.text 2526 2527 return self.expression( 2528 exp.DataBlocksizeProperty, 2529 size=size, 2530 units=units, 2531 default=default, 2532 minimum=minimum, 2533 maximum=maximum, 2534 ) 2535 2536 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2537 self._match(TokenType.EQ) 2538 always = self._match_text_seq("ALWAYS") 2539 manual = self._match_text_seq("MANUAL") 2540 never = self._match_text_seq("NEVER") 2541 default = self._match_text_seq("DEFAULT") 2542 2543 autotemp = None 2544 if self._match_text_seq("AUTOTEMP"): 2545 autotemp = self._parse_schema() 2546 2547 return self.expression( 2548 exp.BlockCompressionProperty, 2549 always=always, 2550 manual=manual, 2551 never=never, 2552 default=default, 2553 autotemp=autotemp, 2554 ) 2555 2556 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2557 index = self._index 2558 no = self._match_text_seq("NO") 2559 concurrent = self._match_text_seq("CONCURRENT") 2560 2561 if not self._match_text_seq("ISOLATED", "LOADING"): 2562 self._retreat(index) 2563 return None 2564 2565 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2566 return self.expression( 2567 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2568 ) 2569 2570 def _parse_locking(self) -> exp.LockingProperty: 2571 if self._match(TokenType.TABLE): 2572 kind = "TABLE" 2573 elif self._match(TokenType.VIEW): 2574 kind = "VIEW" 2575 elif self._match(TokenType.ROW): 2576 kind = "ROW" 2577 elif self._match_text_seq("DATABASE"): 2578 kind = "DATABASE" 2579 else: 2580 kind = None 2581 2582 if kind in ("DATABASE", "TABLE", "VIEW"): 2583 this = self._parse_table_parts() 2584 else: 2585 this = None 2586 2587 if self._match(TokenType.FOR): 2588 for_or_in = "FOR" 2589 elif self._match(TokenType.IN): 2590 for_or_in = "IN" 2591 else: 2592 for_or_in = None 2593 2594 if self._match_text_seq("ACCESS"): 2595 lock_type = "ACCESS" 2596 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2597 lock_type = "EXCLUSIVE" 2598 elif self._match_text_seq("SHARE"): 2599 lock_type = "SHARE" 2600 elif self._match_text_seq("READ"): 2601 lock_type = "READ" 2602 elif self._match_text_seq("WRITE"): 2603 lock_type = "WRITE" 2604 elif self._match_text_seq("CHECKSUM"): 2605 lock_type = "CHECKSUM" 2606 else: 2607 lock_type = None 2608 2609 override = self._match_text_seq("OVERRIDE") 2610 2611 return self.expression( 2612 exp.LockingProperty, 2613 this=this, 2614 kind=kind, 2615 for_or_in=for_or_in, 2616 lock_type=lock_type, 2617 override=override, 2618 ) 2619 2620 def _parse_partition_by(self) -> t.List[exp.Expression]: 2621 if self._match(TokenType.PARTITION_BY): 2622 return self._parse_csv(self._parse_assignment) 2623 return [] 2624 2625 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2626 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2627 if self._match_text_seq("MINVALUE"): 2628 return exp.var("MINVALUE") 2629 if self._match_text_seq("MAXVALUE"): 2630 return exp.var("MAXVALUE") 2631 return self._parse_bitwise() 2632 2633 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2634 expression = None 2635 from_expressions = None 2636 to_expressions = None 2637 2638 if self._match(TokenType.IN): 2639 this = self._parse_wrapped_csv(self._parse_bitwise) 2640 elif self._match(TokenType.FROM): 2641 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2642 self._match_text_seq("TO") 2643 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2644 elif self._match_text_seq("WITH", "(", "MODULUS"): 2645 this = self._parse_number() 2646 self._match_text_seq(",", "REMAINDER") 2647 expression = self._parse_number() 2648 self._match_r_paren() 2649 else: 2650 self.raise_error("Failed to parse partition bound spec.") 2651 2652 return self.expression( 2653 exp.PartitionBoundSpec, 2654 this=this, 2655 expression=expression, 2656 from_expressions=from_expressions, 2657 to_expressions=to_expressions, 2658 ) 2659 2660 # https://www.postgresql.org/docs/current/sql-createtable.html 2661 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2662 if not self._match_text_seq("OF"): 2663 self._retreat(self._index - 1) 2664 return None 2665 2666 this = self._parse_table(schema=True) 2667 2668 if self._match(TokenType.DEFAULT): 2669 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2670 elif self._match_text_seq("FOR", "VALUES"): 2671 expression = self._parse_partition_bound_spec() 2672 else: 2673 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2674 2675 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2676 2677 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2678 self._match(TokenType.EQ) 2679 return self.expression( 2680 exp.PartitionedByProperty, 2681 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2682 ) 2683 2684 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2685 if self._match_text_seq("AND", "STATISTICS"): 2686 statistics = True 2687 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2688 statistics = False 2689 else: 2690 statistics = None 2691 2692 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2693 2694 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2695 if self._match_text_seq("SQL"): 2696 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2697 return None 2698 2699 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2700 if self._match_text_seq("SQL", "DATA"): 2701 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2702 return None 2703 2704 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2705 if self._match_text_seq("PRIMARY", "INDEX"): 2706 return exp.NoPrimaryIndexProperty() 2707 if self._match_text_seq("SQL"): 2708 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2709 return None 2710 2711 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2712 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2713 return exp.OnCommitProperty() 2714 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2715 return exp.OnCommitProperty(delete=True) 2716 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2717 2718 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2719 if self._match_text_seq("SQL", "DATA"): 2720 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2721 return None 2722 2723 def _parse_distkey(self) -> exp.DistKeyProperty: 2724 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2725 2726 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2727 table = self._parse_table(schema=True) 2728 2729 options = [] 2730 while self._match_texts(("INCLUDING", "EXCLUDING")): 2731 this = self._prev.text.upper() 2732 2733 id_var = self._parse_id_var() 2734 if not id_var: 2735 return None 2736 2737 options.append( 2738 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2739 ) 2740 2741 return self.expression(exp.LikeProperty, this=table, expressions=options) 2742 2743 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2744 return self.expression( 2745 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2746 ) 2747 2748 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2749 self._match(TokenType.EQ) 2750 return self.expression( 2751 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2752 ) 2753 2754 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2755 self._match_text_seq("WITH", "CONNECTION") 2756 return self.expression( 2757 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2758 ) 2759 2760 def _parse_returns(self) -> exp.ReturnsProperty: 2761 value: t.Optional[exp.Expression] 2762 null = None 2763 is_table = self._match(TokenType.TABLE) 2764 2765 if is_table: 2766 if self._match(TokenType.LT): 2767 value = self.expression( 2768 exp.Schema, 2769 this="TABLE", 2770 expressions=self._parse_csv(self._parse_struct_types), 2771 ) 2772 if not self._match(TokenType.GT): 2773 self.raise_error("Expecting >") 2774 else: 2775 value = self._parse_schema(exp.var("TABLE")) 2776 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2777 null = True 2778 value = None 2779 else: 2780 value = self._parse_types() 2781 2782 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2783 2784 def _parse_describe(self) -> exp.Describe: 2785 kind = self._match_set(self.CREATABLES) and self._prev.text 2786 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2787 if self._match(TokenType.DOT): 2788 style = None 2789 self._retreat(self._index - 2) 2790 2791 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2792 2793 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2794 this = self._parse_statement() 2795 else: 2796 this = self._parse_table(schema=True) 2797 2798 properties = self._parse_properties() 2799 expressions = properties.expressions if properties else None 2800 partition = self._parse_partition() 2801 return self.expression( 2802 exp.Describe, 2803 this=this, 2804 style=style, 2805 kind=kind, 2806 expressions=expressions, 2807 partition=partition, 2808 format=format, 2809 ) 2810 2811 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2812 kind = self._prev.text.upper() 2813 expressions = [] 2814 2815 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2816 if self._match(TokenType.WHEN): 2817 expression = self._parse_disjunction() 2818 self._match(TokenType.THEN) 2819 else: 2820 expression = None 2821 2822 else_ = self._match(TokenType.ELSE) 2823 2824 if not self._match(TokenType.INTO): 2825 return None 2826 2827 return self.expression( 2828 exp.ConditionalInsert, 2829 this=self.expression( 2830 exp.Insert, 2831 this=self._parse_table(schema=True), 2832 expression=self._parse_derived_table_values(), 2833 ), 2834 expression=expression, 2835 else_=else_, 2836 ) 2837 2838 expression = parse_conditional_insert() 2839 while expression is not None: 2840 expressions.append(expression) 2841 expression = parse_conditional_insert() 2842 2843 return self.expression( 2844 exp.MultitableInserts, 2845 kind=kind, 2846 comments=comments, 2847 expressions=expressions, 2848 source=self._parse_table(), 2849 ) 2850 2851 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2852 comments = [] 2853 hint = self._parse_hint() 2854 overwrite = self._match(TokenType.OVERWRITE) 2855 ignore = self._match(TokenType.IGNORE) 2856 local = self._match_text_seq("LOCAL") 2857 alternative = None 2858 is_function = None 2859 2860 if self._match_text_seq("DIRECTORY"): 2861 this: t.Optional[exp.Expression] = self.expression( 2862 exp.Directory, 2863 this=self._parse_var_or_string(), 2864 local=local, 2865 row_format=self._parse_row_format(match_row=True), 2866 ) 2867 else: 2868 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2869 comments += ensure_list(self._prev_comments) 2870 return self._parse_multitable_inserts(comments) 2871 2872 if self._match(TokenType.OR): 2873 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2874 2875 self._match(TokenType.INTO) 2876 comments += ensure_list(self._prev_comments) 2877 self._match(TokenType.TABLE) 2878 is_function = self._match(TokenType.FUNCTION) 2879 2880 this = ( 2881 self._parse_table(schema=True, parse_partition=True) 2882 if not is_function 2883 else self._parse_function() 2884 ) 2885 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2886 this.set("alias", self._parse_table_alias()) 2887 2888 returning = self._parse_returning() 2889 2890 return self.expression( 2891 exp.Insert, 2892 comments=comments, 2893 hint=hint, 2894 is_function=is_function, 2895 this=this, 2896 stored=self._match_text_seq("STORED") and self._parse_stored(), 2897 by_name=self._match_text_seq("BY", "NAME"), 2898 exists=self._parse_exists(), 2899 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2900 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2901 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2902 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2903 conflict=self._parse_on_conflict(), 2904 returning=returning or self._parse_returning(), 2905 overwrite=overwrite, 2906 alternative=alternative, 2907 ignore=ignore, 2908 source=self._match(TokenType.TABLE) and self._parse_table(), 2909 ) 2910 2911 def _parse_kill(self) -> exp.Kill: 2912 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2913 2914 return self.expression( 2915 exp.Kill, 2916 this=self._parse_primary(), 2917 kind=kind, 2918 ) 2919 2920 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2921 conflict = self._match_text_seq("ON", "CONFLICT") 2922 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2923 2924 if not conflict and not duplicate: 2925 return None 2926 2927 conflict_keys = None 2928 constraint = None 2929 2930 if conflict: 2931 if self._match_text_seq("ON", "CONSTRAINT"): 2932 constraint = self._parse_id_var() 2933 elif self._match(TokenType.L_PAREN): 2934 conflict_keys = self._parse_csv(self._parse_id_var) 2935 self._match_r_paren() 2936 2937 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2938 if self._prev.token_type == TokenType.UPDATE: 2939 self._match(TokenType.SET) 2940 expressions = self._parse_csv(self._parse_equality) 2941 else: 2942 expressions = None 2943 2944 return self.expression( 2945 exp.OnConflict, 2946 duplicate=duplicate, 2947 expressions=expressions, 2948 action=action, 2949 conflict_keys=conflict_keys, 2950 constraint=constraint, 2951 where=self._parse_where(), 2952 ) 2953 2954 def _parse_returning(self) -> t.Optional[exp.Returning]: 2955 if not self._match(TokenType.RETURNING): 2956 return None 2957 return self.expression( 2958 exp.Returning, 2959 expressions=self._parse_csv(self._parse_expression), 2960 into=self._match(TokenType.INTO) and self._parse_table_part(), 2961 ) 2962 2963 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2964 if not self._match(TokenType.FORMAT): 2965 return None 2966 return self._parse_row_format() 2967 2968 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2969 index = self._index 2970 with_ = with_ or self._match_text_seq("WITH") 2971 2972 if not self._match(TokenType.SERDE_PROPERTIES): 2973 self._retreat(index) 2974 return None 2975 return self.expression( 2976 exp.SerdeProperties, 2977 **{ # type: ignore 2978 "expressions": self._parse_wrapped_properties(), 2979 "with": with_, 2980 }, 2981 ) 2982 2983 def _parse_row_format( 2984 self, match_row: bool = False 2985 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2986 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2987 return None 2988 2989 if self._match_text_seq("SERDE"): 2990 this = self._parse_string() 2991 2992 serde_properties = self._parse_serde_properties() 2993 2994 return self.expression( 2995 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2996 ) 2997 2998 self._match_text_seq("DELIMITED") 2999 3000 kwargs = {} 3001 3002 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3003 kwargs["fields"] = self._parse_string() 3004 if self._match_text_seq("ESCAPED", "BY"): 3005 kwargs["escaped"] = self._parse_string() 3006 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3007 kwargs["collection_items"] = self._parse_string() 3008 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3009 kwargs["map_keys"] = self._parse_string() 3010 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3011 kwargs["lines"] = self._parse_string() 3012 if self._match_text_seq("NULL", "DEFINED", "AS"): 3013 kwargs["null"] = self._parse_string() 3014 3015 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3016 3017 def _parse_load(self) -> exp.LoadData | exp.Command: 3018 if self._match_text_seq("DATA"): 3019 local = self._match_text_seq("LOCAL") 3020 self._match_text_seq("INPATH") 3021 inpath = self._parse_string() 3022 overwrite = self._match(TokenType.OVERWRITE) 3023 self._match_pair(TokenType.INTO, TokenType.TABLE) 3024 3025 return self.expression( 3026 exp.LoadData, 3027 this=self._parse_table(schema=True), 3028 local=local, 3029 overwrite=overwrite, 3030 inpath=inpath, 3031 partition=self._parse_partition(), 3032 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3033 serde=self._match_text_seq("SERDE") and self._parse_string(), 3034 ) 3035 return self._parse_as_command(self._prev) 3036 3037 def _parse_delete(self) -> exp.Delete: 3038 # This handles MySQL's "Multiple-Table Syntax" 3039 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3040 tables = None 3041 if not self._match(TokenType.FROM, advance=False): 3042 tables = self._parse_csv(self._parse_table) or None 3043 3044 returning = self._parse_returning() 3045 3046 return self.expression( 3047 exp.Delete, 3048 tables=tables, 3049 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3050 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3051 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3052 where=self._parse_where(), 3053 returning=returning or self._parse_returning(), 3054 limit=self._parse_limit(), 3055 ) 3056 3057 def _parse_update(self) -> exp.Update: 3058 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3059 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3060 returning = self._parse_returning() 3061 return self.expression( 3062 exp.Update, 3063 **{ # type: ignore 3064 "this": this, 3065 "expressions": expressions, 3066 "from": self._parse_from(joins=True), 3067 "where": self._parse_where(), 3068 "returning": returning or self._parse_returning(), 3069 "order": self._parse_order(), 3070 "limit": self._parse_limit(), 3071 }, 3072 ) 3073 3074 def _parse_use(self) -> exp.Use: 3075 return self.expression( 3076 exp.Use, 3077 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3078 this=self._parse_table(schema=False), 3079 ) 3080 3081 def _parse_uncache(self) -> exp.Uncache: 3082 if not self._match(TokenType.TABLE): 3083 self.raise_error("Expecting TABLE after UNCACHE") 3084 3085 return self.expression( 3086 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3087 ) 3088 3089 def _parse_cache(self) -> exp.Cache: 3090 lazy = self._match_text_seq("LAZY") 3091 self._match(TokenType.TABLE) 3092 table = self._parse_table(schema=True) 3093 3094 options = [] 3095 if self._match_text_seq("OPTIONS"): 3096 self._match_l_paren() 3097 k = self._parse_string() 3098 self._match(TokenType.EQ) 3099 v = self._parse_string() 3100 options = [k, v] 3101 self._match_r_paren() 3102 3103 self._match(TokenType.ALIAS) 3104 return self.expression( 3105 exp.Cache, 3106 this=table, 3107 lazy=lazy, 3108 options=options, 3109 expression=self._parse_select(nested=True), 3110 ) 3111 3112 def _parse_partition(self) -> t.Optional[exp.Partition]: 3113 if not self._match_texts(self.PARTITION_KEYWORDS): 3114 return None 3115 3116 return self.expression( 3117 exp.Partition, 3118 subpartition=self._prev.text.upper() == "SUBPARTITION", 3119 expressions=self._parse_wrapped_csv(self._parse_assignment), 3120 ) 3121 3122 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3123 def _parse_value_expression() -> t.Optional[exp.Expression]: 3124 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3125 return exp.var(self._prev.text.upper()) 3126 return self._parse_expression() 3127 3128 if self._match(TokenType.L_PAREN): 3129 expressions = self._parse_csv(_parse_value_expression) 3130 self._match_r_paren() 3131 return self.expression(exp.Tuple, expressions=expressions) 3132 3133 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3134 expression = self._parse_expression() 3135 if expression: 3136 return self.expression(exp.Tuple, expressions=[expression]) 3137 return None 3138 3139 def _parse_projections(self) -> t.List[exp.Expression]: 3140 return self._parse_expressions() 3141 3142 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3143 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3144 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3145 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3146 ) 3147 elif self._match(TokenType.FROM): 3148 from_ = self._parse_from(skip_from_token=True) 3149 # Support parentheses for duckdb FROM-first syntax 3150 select = self._parse_select() 3151 if select: 3152 select.set("from", from_) 3153 this = select 3154 else: 3155 this = exp.select("*").from_(t.cast(exp.From, from_)) 3156 else: 3157 this = ( 3158 self._parse_table() 3159 if table 3160 else self._parse_select(nested=True, parse_set_operation=False) 3161 ) 3162 3163 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3164 # in case a modifier (e.g. join) is following 3165 if table and isinstance(this, exp.Values) and this.alias: 3166 alias = this.args["alias"].pop() 3167 this = exp.Table(this=this, alias=alias) 3168 3169 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3170 3171 return this 3172 3173 def _parse_select( 3174 self, 3175 nested: bool = False, 3176 table: bool = False, 3177 parse_subquery_alias: bool = True, 3178 parse_set_operation: bool = True, 3179 ) -> t.Optional[exp.Expression]: 3180 cte = self._parse_with() 3181 3182 if cte: 3183 this = self._parse_statement() 3184 3185 if not this: 3186 self.raise_error("Failed to parse any statement following CTE") 3187 return cte 3188 3189 if "with" in this.arg_types: 3190 this.set("with", cte) 3191 else: 3192 self.raise_error(f"{this.key} does not support CTE") 3193 this = cte 3194 3195 return this 3196 3197 # duckdb supports leading with FROM x 3198 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3199 3200 if self._match(TokenType.SELECT): 3201 comments = self._prev_comments 3202 3203 hint = self._parse_hint() 3204 3205 if self._next and not self._next.token_type == TokenType.DOT: 3206 all_ = self._match(TokenType.ALL) 3207 distinct = self._match_set(self.DISTINCT_TOKENS) 3208 else: 3209 all_, distinct = None, None 3210 3211 kind = ( 3212 self._match(TokenType.ALIAS) 3213 and self._match_texts(("STRUCT", "VALUE")) 3214 and self._prev.text.upper() 3215 ) 3216 3217 if distinct: 3218 distinct = self.expression( 3219 exp.Distinct, 3220 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3221 ) 3222 3223 if all_ and distinct: 3224 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3225 3226 operation_modifiers = [] 3227 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3228 operation_modifiers.append(exp.var(self._prev.text.upper())) 3229 3230 limit = self._parse_limit(top=True) 3231 projections = self._parse_projections() 3232 3233 this = self.expression( 3234 exp.Select, 3235 kind=kind, 3236 hint=hint, 3237 distinct=distinct, 3238 expressions=projections, 3239 limit=limit, 3240 operation_modifiers=operation_modifiers or None, 3241 ) 3242 this.comments = comments 3243 3244 into = self._parse_into() 3245 if into: 3246 this.set("into", into) 3247 3248 if not from_: 3249 from_ = self._parse_from() 3250 3251 if from_: 3252 this.set("from", from_) 3253 3254 this = self._parse_query_modifiers(this) 3255 elif (table or nested) and self._match(TokenType.L_PAREN): 3256 this = self._parse_wrapped_select(table=table) 3257 3258 # We return early here so that the UNION isn't attached to the subquery by the 3259 # following call to _parse_set_operations, but instead becomes the parent node 3260 self._match_r_paren() 3261 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3262 elif self._match(TokenType.VALUES, advance=False): 3263 this = self._parse_derived_table_values() 3264 elif from_: 3265 this = exp.select("*").from_(from_.this, copy=False) 3266 if self._match(TokenType.PIPE_GT, advance=False): 3267 return self._parse_pipe_syntax_query(this) 3268 elif self._match(TokenType.SUMMARIZE): 3269 table = self._match(TokenType.TABLE) 3270 this = self._parse_select() or self._parse_string() or self._parse_table() 3271 return self.expression(exp.Summarize, this=this, table=table) 3272 elif self._match(TokenType.DESCRIBE): 3273 this = self._parse_describe() 3274 elif self._match_text_seq("STREAM"): 3275 this = self._parse_function() 3276 if this: 3277 this = self.expression(exp.Stream, this=this) 3278 else: 3279 self._retreat(self._index - 1) 3280 else: 3281 this = None 3282 3283 return self._parse_set_operations(this) if parse_set_operation else this 3284 3285 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3286 self._match_text_seq("SEARCH") 3287 3288 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3289 3290 if not kind: 3291 return None 3292 3293 self._match_text_seq("FIRST", "BY") 3294 3295 return self.expression( 3296 exp.RecursiveWithSearch, 3297 kind=kind, 3298 this=self._parse_id_var(), 3299 expression=self._match_text_seq("SET") and self._parse_id_var(), 3300 using=self._match_text_seq("USING") and self._parse_id_var(), 3301 ) 3302 3303 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3304 if not skip_with_token and not self._match(TokenType.WITH): 3305 return None 3306 3307 comments = self._prev_comments 3308 recursive = self._match(TokenType.RECURSIVE) 3309 3310 last_comments = None 3311 expressions = [] 3312 while True: 3313 cte = self._parse_cte() 3314 if isinstance(cte, exp.CTE): 3315 expressions.append(cte) 3316 if last_comments: 3317 cte.add_comments(last_comments) 3318 3319 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3320 break 3321 else: 3322 self._match(TokenType.WITH) 3323 3324 last_comments = self._prev_comments 3325 3326 return self.expression( 3327 exp.With, 3328 comments=comments, 3329 expressions=expressions, 3330 recursive=recursive, 3331 search=self._parse_recursive_with_search(), 3332 ) 3333 3334 def _parse_cte(self) -> t.Optional[exp.CTE]: 3335 index = self._index 3336 3337 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3338 if not alias or not alias.this: 3339 self.raise_error("Expected CTE to have alias") 3340 3341 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3342 self._retreat(index) 3343 return None 3344 3345 comments = self._prev_comments 3346 3347 if self._match_text_seq("NOT", "MATERIALIZED"): 3348 materialized = False 3349 elif self._match_text_seq("MATERIALIZED"): 3350 materialized = True 3351 else: 3352 materialized = None 3353 3354 cte = self.expression( 3355 exp.CTE, 3356 this=self._parse_wrapped(self._parse_statement), 3357 alias=alias, 3358 materialized=materialized, 3359 comments=comments, 3360 ) 3361 3362 if isinstance(cte.this, exp.Values): 3363 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3364 3365 return cte 3366 3367 def _parse_table_alias( 3368 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3369 ) -> t.Optional[exp.TableAlias]: 3370 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3371 # so this section tries to parse the clause version and if it fails, it treats the token 3372 # as an identifier (alias) 3373 if self._can_parse_limit_or_offset(): 3374 return None 3375 3376 any_token = self._match(TokenType.ALIAS) 3377 alias = ( 3378 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3379 or self._parse_string_as_identifier() 3380 ) 3381 3382 index = self._index 3383 if self._match(TokenType.L_PAREN): 3384 columns = self._parse_csv(self._parse_function_parameter) 3385 self._match_r_paren() if columns else self._retreat(index) 3386 else: 3387 columns = None 3388 3389 if not alias and not columns: 3390 return None 3391 3392 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3393 3394 # We bubble up comments from the Identifier to the TableAlias 3395 if isinstance(alias, exp.Identifier): 3396 table_alias.add_comments(alias.pop_comments()) 3397 3398 return table_alias 3399 3400 def _parse_subquery( 3401 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3402 ) -> t.Optional[exp.Subquery]: 3403 if not this: 3404 return None 3405 3406 return self.expression( 3407 exp.Subquery, 3408 this=this, 3409 pivots=self._parse_pivots(), 3410 alias=self._parse_table_alias() if parse_alias else None, 3411 sample=self._parse_table_sample(), 3412 ) 3413 3414 def _implicit_unnests_to_explicit(self, this: E) -> E: 3415 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3416 3417 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3418 for i, join in enumerate(this.args.get("joins") or []): 3419 table = join.this 3420 normalized_table = table.copy() 3421 normalized_table.meta["maybe_column"] = True 3422 normalized_table = _norm(normalized_table, dialect=self.dialect) 3423 3424 if isinstance(table, exp.Table) and not join.args.get("on"): 3425 if normalized_table.parts[0].name in refs: 3426 table_as_column = table.to_column() 3427 unnest = exp.Unnest(expressions=[table_as_column]) 3428 3429 # Table.to_column creates a parent Alias node that we want to convert to 3430 # a TableAlias and attach to the Unnest, so it matches the parser's output 3431 if isinstance(table.args.get("alias"), exp.TableAlias): 3432 table_as_column.replace(table_as_column.this) 3433 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3434 3435 table.replace(unnest) 3436 3437 refs.add(normalized_table.alias_or_name) 3438 3439 return this 3440 3441 def _parse_query_modifiers( 3442 self, this: t.Optional[exp.Expression] 3443 ) -> t.Optional[exp.Expression]: 3444 if isinstance(this, self.MODIFIABLES): 3445 for join in self._parse_joins(): 3446 this.append("joins", join) 3447 for lateral in iter(self._parse_lateral, None): 3448 this.append("laterals", lateral) 3449 3450 while True: 3451 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3452 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3453 key, expression = parser(self) 3454 3455 if expression: 3456 this.set(key, expression) 3457 if key == "limit": 3458 offset = expression.args.pop("offset", None) 3459 3460 if offset: 3461 offset = exp.Offset(expression=offset) 3462 this.set("offset", offset) 3463 3464 limit_by_expressions = expression.expressions 3465 expression.set("expressions", None) 3466 offset.set("expressions", limit_by_expressions) 3467 continue 3468 break 3469 3470 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3471 this = self._implicit_unnests_to_explicit(this) 3472 3473 return this 3474 3475 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3476 start = self._curr 3477 while self._curr: 3478 self._advance() 3479 3480 end = self._tokens[self._index - 1] 3481 return exp.Hint(expressions=[self._find_sql(start, end)]) 3482 3483 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3484 return self._parse_function_call() 3485 3486 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3487 start_index = self._index 3488 should_fallback_to_string = False 3489 3490 hints = [] 3491 try: 3492 for hint in iter( 3493 lambda: self._parse_csv( 3494 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3495 ), 3496 [], 3497 ): 3498 hints.extend(hint) 3499 except ParseError: 3500 should_fallback_to_string = True 3501 3502 if should_fallback_to_string or self._curr: 3503 self._retreat(start_index) 3504 return self._parse_hint_fallback_to_string() 3505 3506 return self.expression(exp.Hint, expressions=hints) 3507 3508 def _parse_hint(self) -> t.Optional[exp.Hint]: 3509 if self._match(TokenType.HINT) and self._prev_comments: 3510 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3511 3512 return None 3513 3514 def _parse_into(self) -> t.Optional[exp.Into]: 3515 if not self._match(TokenType.INTO): 3516 return None 3517 3518 temp = self._match(TokenType.TEMPORARY) 3519 unlogged = self._match_text_seq("UNLOGGED") 3520 self._match(TokenType.TABLE) 3521 3522 return self.expression( 3523 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3524 ) 3525 3526 def _parse_from( 3527 self, joins: bool = False, skip_from_token: bool = False 3528 ) -> t.Optional[exp.From]: 3529 if not skip_from_token and not self._match(TokenType.FROM): 3530 return None 3531 3532 return self.expression( 3533 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3534 ) 3535 3536 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3537 return self.expression( 3538 exp.MatchRecognizeMeasure, 3539 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3540 this=self._parse_expression(), 3541 ) 3542 3543 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3544 if not self._match(TokenType.MATCH_RECOGNIZE): 3545 return None 3546 3547 self._match_l_paren() 3548 3549 partition = self._parse_partition_by() 3550 order = self._parse_order() 3551 3552 measures = ( 3553 self._parse_csv(self._parse_match_recognize_measure) 3554 if self._match_text_seq("MEASURES") 3555 else None 3556 ) 3557 3558 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3559 rows = exp.var("ONE ROW PER MATCH") 3560 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3561 text = "ALL ROWS PER MATCH" 3562 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3563 text += " SHOW EMPTY MATCHES" 3564 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3565 text += " OMIT EMPTY MATCHES" 3566 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3567 text += " WITH UNMATCHED ROWS" 3568 rows = exp.var(text) 3569 else: 3570 rows = None 3571 3572 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3573 text = "AFTER MATCH SKIP" 3574 if self._match_text_seq("PAST", "LAST", "ROW"): 3575 text += " PAST LAST ROW" 3576 elif self._match_text_seq("TO", "NEXT", "ROW"): 3577 text += " TO NEXT ROW" 3578 elif self._match_text_seq("TO", "FIRST"): 3579 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3580 elif self._match_text_seq("TO", "LAST"): 3581 text += f" TO LAST {self._advance_any().text}" # type: ignore 3582 after = exp.var(text) 3583 else: 3584 after = None 3585 3586 if self._match_text_seq("PATTERN"): 3587 self._match_l_paren() 3588 3589 if not self._curr: 3590 self.raise_error("Expecting )", self._curr) 3591 3592 paren = 1 3593 start = self._curr 3594 3595 while self._curr and paren > 0: 3596 if self._curr.token_type == TokenType.L_PAREN: 3597 paren += 1 3598 if self._curr.token_type == TokenType.R_PAREN: 3599 paren -= 1 3600 3601 end = self._prev 3602 self._advance() 3603 3604 if paren > 0: 3605 self.raise_error("Expecting )", self._curr) 3606 3607 pattern = exp.var(self._find_sql(start, end)) 3608 else: 3609 pattern = None 3610 3611 define = ( 3612 self._parse_csv(self._parse_name_as_expression) 3613 if self._match_text_seq("DEFINE") 3614 else None 3615 ) 3616 3617 self._match_r_paren() 3618 3619 return self.expression( 3620 exp.MatchRecognize, 3621 partition_by=partition, 3622 order=order, 3623 measures=measures, 3624 rows=rows, 3625 after=after, 3626 pattern=pattern, 3627 define=define, 3628 alias=self._parse_table_alias(), 3629 ) 3630 3631 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3632 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3633 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3634 cross_apply = False 3635 3636 if cross_apply is not None: 3637 this = self._parse_select(table=True) 3638 view = None 3639 outer = None 3640 elif self._match(TokenType.LATERAL): 3641 this = self._parse_select(table=True) 3642 view = self._match(TokenType.VIEW) 3643 outer = self._match(TokenType.OUTER) 3644 else: 3645 return None 3646 3647 if not this: 3648 this = ( 3649 self._parse_unnest() 3650 or self._parse_function() 3651 or self._parse_id_var(any_token=False) 3652 ) 3653 3654 while self._match(TokenType.DOT): 3655 this = exp.Dot( 3656 this=this, 3657 expression=self._parse_function() or self._parse_id_var(any_token=False), 3658 ) 3659 3660 ordinality: t.Optional[bool] = None 3661 3662 if view: 3663 table = self._parse_id_var(any_token=False) 3664 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3665 table_alias: t.Optional[exp.TableAlias] = self.expression( 3666 exp.TableAlias, this=table, columns=columns 3667 ) 3668 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3669 # We move the alias from the lateral's child node to the lateral itself 3670 table_alias = this.args["alias"].pop() 3671 else: 3672 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3673 table_alias = self._parse_table_alias() 3674 3675 return self.expression( 3676 exp.Lateral, 3677 this=this, 3678 view=view, 3679 outer=outer, 3680 alias=table_alias, 3681 cross_apply=cross_apply, 3682 ordinality=ordinality, 3683 ) 3684 3685 def _parse_join_parts( 3686 self, 3687 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3688 return ( 3689 self._match_set(self.JOIN_METHODS) and self._prev, 3690 self._match_set(self.JOIN_SIDES) and self._prev, 3691 self._match_set(self.JOIN_KINDS) and self._prev, 3692 ) 3693 3694 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3695 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3696 this = self._parse_column() 3697 if isinstance(this, exp.Column): 3698 return this.this 3699 return this 3700 3701 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3702 3703 def _parse_join( 3704 self, skip_join_token: bool = False, parse_bracket: bool = False 3705 ) -> t.Optional[exp.Join]: 3706 if self._match(TokenType.COMMA): 3707 table = self._try_parse(self._parse_table) 3708 if table: 3709 return self.expression(exp.Join, this=table) 3710 return None 3711 3712 index = self._index 3713 method, side, kind = self._parse_join_parts() 3714 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3715 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3716 3717 if not skip_join_token and not join: 3718 self._retreat(index) 3719 kind = None 3720 method = None 3721 side = None 3722 3723 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3724 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3725 3726 if not skip_join_token and not join and not outer_apply and not cross_apply: 3727 return None 3728 3729 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3730 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3731 kwargs["expressions"] = self._parse_csv( 3732 lambda: self._parse_table(parse_bracket=parse_bracket) 3733 ) 3734 3735 if method: 3736 kwargs["method"] = method.text 3737 if side: 3738 kwargs["side"] = side.text 3739 if kind: 3740 kwargs["kind"] = kind.text 3741 if hint: 3742 kwargs["hint"] = hint 3743 3744 if self._match(TokenType.MATCH_CONDITION): 3745 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3746 3747 if self._match(TokenType.ON): 3748 kwargs["on"] = self._parse_assignment() 3749 elif self._match(TokenType.USING): 3750 kwargs["using"] = self._parse_using_identifiers() 3751 elif ( 3752 not (outer_apply or cross_apply) 3753 and not isinstance(kwargs["this"], exp.Unnest) 3754 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3755 ): 3756 index = self._index 3757 joins: t.Optional[list] = list(self._parse_joins()) 3758 3759 if joins and self._match(TokenType.ON): 3760 kwargs["on"] = self._parse_assignment() 3761 elif joins and self._match(TokenType.USING): 3762 kwargs["using"] = self._parse_using_identifiers() 3763 else: 3764 joins = None 3765 self._retreat(index) 3766 3767 kwargs["this"].set("joins", joins if joins else None) 3768 3769 kwargs["pivots"] = self._parse_pivots() 3770 3771 comments = [c for token in (method, side, kind) if token for c in token.comments] 3772 return self.expression(exp.Join, comments=comments, **kwargs) 3773 3774 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3775 this = self._parse_assignment() 3776 3777 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3778 return this 3779 3780 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3781 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3782 3783 return this 3784 3785 def _parse_index_params(self) -> exp.IndexParameters: 3786 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3787 3788 if self._match(TokenType.L_PAREN, advance=False): 3789 columns = self._parse_wrapped_csv(self._parse_with_operator) 3790 else: 3791 columns = None 3792 3793 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3794 partition_by = self._parse_partition_by() 3795 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3796 tablespace = ( 3797 self._parse_var(any_token=True) 3798 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3799 else None 3800 ) 3801 where = self._parse_where() 3802 3803 on = self._parse_field() if self._match(TokenType.ON) else None 3804 3805 return self.expression( 3806 exp.IndexParameters, 3807 using=using, 3808 columns=columns, 3809 include=include, 3810 partition_by=partition_by, 3811 where=where, 3812 with_storage=with_storage, 3813 tablespace=tablespace, 3814 on=on, 3815 ) 3816 3817 def _parse_index( 3818 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3819 ) -> t.Optional[exp.Index]: 3820 if index or anonymous: 3821 unique = None 3822 primary = None 3823 amp = None 3824 3825 self._match(TokenType.ON) 3826 self._match(TokenType.TABLE) # hive 3827 table = self._parse_table_parts(schema=True) 3828 else: 3829 unique = self._match(TokenType.UNIQUE) 3830 primary = self._match_text_seq("PRIMARY") 3831 amp = self._match_text_seq("AMP") 3832 3833 if not self._match(TokenType.INDEX): 3834 return None 3835 3836 index = self._parse_id_var() 3837 table = None 3838 3839 params = self._parse_index_params() 3840 3841 return self.expression( 3842 exp.Index, 3843 this=index, 3844 table=table, 3845 unique=unique, 3846 primary=primary, 3847 amp=amp, 3848 params=params, 3849 ) 3850 3851 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3852 hints: t.List[exp.Expression] = [] 3853 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3854 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3855 hints.append( 3856 self.expression( 3857 exp.WithTableHint, 3858 expressions=self._parse_csv( 3859 lambda: self._parse_function() or self._parse_var(any_token=True) 3860 ), 3861 ) 3862 ) 3863 self._match_r_paren() 3864 else: 3865 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3866 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3867 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3868 3869 self._match_set((TokenType.INDEX, TokenType.KEY)) 3870 if self._match(TokenType.FOR): 3871 hint.set("target", self._advance_any() and self._prev.text.upper()) 3872 3873 hint.set("expressions", self._parse_wrapped_id_vars()) 3874 hints.append(hint) 3875 3876 return hints or None 3877 3878 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3879 return ( 3880 (not schema and self._parse_function(optional_parens=False)) 3881 or self._parse_id_var(any_token=False) 3882 or self._parse_string_as_identifier() 3883 or self._parse_placeholder() 3884 ) 3885 3886 def _parse_table_parts( 3887 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3888 ) -> exp.Table: 3889 catalog = None 3890 db = None 3891 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3892 3893 while self._match(TokenType.DOT): 3894 if catalog: 3895 # This allows nesting the table in arbitrarily many dot expressions if needed 3896 table = self.expression( 3897 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3898 ) 3899 else: 3900 catalog = db 3901 db = table 3902 # "" used for tsql FROM a..b case 3903 table = self._parse_table_part(schema=schema) or "" 3904 3905 if ( 3906 wildcard 3907 and self._is_connected() 3908 and (isinstance(table, exp.Identifier) or not table) 3909 and self._match(TokenType.STAR) 3910 ): 3911 if isinstance(table, exp.Identifier): 3912 table.args["this"] += "*" 3913 else: 3914 table = exp.Identifier(this="*") 3915 3916 # We bubble up comments from the Identifier to the Table 3917 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3918 3919 if is_db_reference: 3920 catalog = db 3921 db = table 3922 table = None 3923 3924 if not table and not is_db_reference: 3925 self.raise_error(f"Expected table name but got {self._curr}") 3926 if not db and is_db_reference: 3927 self.raise_error(f"Expected database name but got {self._curr}") 3928 3929 table = self.expression( 3930 exp.Table, 3931 comments=comments, 3932 this=table, 3933 db=db, 3934 catalog=catalog, 3935 ) 3936 3937 changes = self._parse_changes() 3938 if changes: 3939 table.set("changes", changes) 3940 3941 at_before = self._parse_historical_data() 3942 if at_before: 3943 table.set("when", at_before) 3944 3945 pivots = self._parse_pivots() 3946 if pivots: 3947 table.set("pivots", pivots) 3948 3949 return table 3950 3951 def _parse_table( 3952 self, 3953 schema: bool = False, 3954 joins: bool = False, 3955 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3956 parse_bracket: bool = False, 3957 is_db_reference: bool = False, 3958 parse_partition: bool = False, 3959 ) -> t.Optional[exp.Expression]: 3960 lateral = self._parse_lateral() 3961 if lateral: 3962 return lateral 3963 3964 unnest = self._parse_unnest() 3965 if unnest: 3966 return unnest 3967 3968 values = self._parse_derived_table_values() 3969 if values: 3970 return values 3971 3972 subquery = self._parse_select(table=True) 3973 if subquery: 3974 if not subquery.args.get("pivots"): 3975 subquery.set("pivots", self._parse_pivots()) 3976 return subquery 3977 3978 bracket = parse_bracket and self._parse_bracket(None) 3979 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3980 3981 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3982 self._parse_table 3983 ) 3984 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3985 3986 only = self._match(TokenType.ONLY) 3987 3988 this = t.cast( 3989 exp.Expression, 3990 bracket 3991 or rows_from 3992 or self._parse_bracket( 3993 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3994 ), 3995 ) 3996 3997 if only: 3998 this.set("only", only) 3999 4000 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4001 self._match_text_seq("*") 4002 4003 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4004 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4005 this.set("partition", self._parse_partition()) 4006 4007 if schema: 4008 return self._parse_schema(this=this) 4009 4010 version = self._parse_version() 4011 4012 if version: 4013 this.set("version", version) 4014 4015 if self.dialect.ALIAS_POST_TABLESAMPLE: 4016 this.set("sample", self._parse_table_sample()) 4017 4018 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4019 if alias: 4020 this.set("alias", alias) 4021 4022 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4023 return self.expression( 4024 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4025 ) 4026 4027 this.set("hints", self._parse_table_hints()) 4028 4029 if not this.args.get("pivots"): 4030 this.set("pivots", self._parse_pivots()) 4031 4032 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4033 this.set("sample", self._parse_table_sample()) 4034 4035 if joins: 4036 for join in self._parse_joins(): 4037 this.append("joins", join) 4038 4039 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4040 this.set("ordinality", True) 4041 this.set("alias", self._parse_table_alias()) 4042 4043 return this 4044 4045 def _parse_version(self) -> t.Optional[exp.Version]: 4046 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4047 this = "TIMESTAMP" 4048 elif self._match(TokenType.VERSION_SNAPSHOT): 4049 this = "VERSION" 4050 else: 4051 return None 4052 4053 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4054 kind = self._prev.text.upper() 4055 start = self._parse_bitwise() 4056 self._match_texts(("TO", "AND")) 4057 end = self._parse_bitwise() 4058 expression: t.Optional[exp.Expression] = self.expression( 4059 exp.Tuple, expressions=[start, end] 4060 ) 4061 elif self._match_text_seq("CONTAINED", "IN"): 4062 kind = "CONTAINED IN" 4063 expression = self.expression( 4064 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4065 ) 4066 elif self._match(TokenType.ALL): 4067 kind = "ALL" 4068 expression = None 4069 else: 4070 self._match_text_seq("AS", "OF") 4071 kind = "AS OF" 4072 expression = self._parse_type() 4073 4074 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4075 4076 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4077 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4078 index = self._index 4079 historical_data = None 4080 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4081 this = self._prev.text.upper() 4082 kind = ( 4083 self._match(TokenType.L_PAREN) 4084 and self._match_texts(self.HISTORICAL_DATA_KIND) 4085 and self._prev.text.upper() 4086 ) 4087 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4088 4089 if expression: 4090 self._match_r_paren() 4091 historical_data = self.expression( 4092 exp.HistoricalData, this=this, kind=kind, expression=expression 4093 ) 4094 else: 4095 self._retreat(index) 4096 4097 return historical_data 4098 4099 def _parse_changes(self) -> t.Optional[exp.Changes]: 4100 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4101 return None 4102 4103 information = self._parse_var(any_token=True) 4104 self._match_r_paren() 4105 4106 return self.expression( 4107 exp.Changes, 4108 information=information, 4109 at_before=self._parse_historical_data(), 4110 end=self._parse_historical_data(), 4111 ) 4112 4113 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4114 if not self._match(TokenType.UNNEST): 4115 return None 4116 4117 expressions = self._parse_wrapped_csv(self._parse_equality) 4118 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4119 4120 alias = self._parse_table_alias() if with_alias else None 4121 4122 if alias: 4123 if self.dialect.UNNEST_COLUMN_ONLY: 4124 if alias.args.get("columns"): 4125 self.raise_error("Unexpected extra column alias in unnest.") 4126 4127 alias.set("columns", [alias.this]) 4128 alias.set("this", None) 4129 4130 columns = alias.args.get("columns") or [] 4131 if offset and len(expressions) < len(columns): 4132 offset = columns.pop() 4133 4134 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4135 self._match(TokenType.ALIAS) 4136 offset = self._parse_id_var( 4137 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4138 ) or exp.to_identifier("offset") 4139 4140 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4141 4142 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4143 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4144 if not is_derived and not ( 4145 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4146 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4147 ): 4148 return None 4149 4150 expressions = self._parse_csv(self._parse_value) 4151 alias = self._parse_table_alias() 4152 4153 if is_derived: 4154 self._match_r_paren() 4155 4156 return self.expression( 4157 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4158 ) 4159 4160 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4161 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4162 as_modifier and self._match_text_seq("USING", "SAMPLE") 4163 ): 4164 return None 4165 4166 bucket_numerator = None 4167 bucket_denominator = None 4168 bucket_field = None 4169 percent = None 4170 size = None 4171 seed = None 4172 4173 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4174 matched_l_paren = self._match(TokenType.L_PAREN) 4175 4176 if self.TABLESAMPLE_CSV: 4177 num = None 4178 expressions = self._parse_csv(self._parse_primary) 4179 else: 4180 expressions = None 4181 num = ( 4182 self._parse_factor() 4183 if self._match(TokenType.NUMBER, advance=False) 4184 else self._parse_primary() or self._parse_placeholder() 4185 ) 4186 4187 if self._match_text_seq("BUCKET"): 4188 bucket_numerator = self._parse_number() 4189 self._match_text_seq("OUT", "OF") 4190 bucket_denominator = bucket_denominator = self._parse_number() 4191 self._match(TokenType.ON) 4192 bucket_field = self._parse_field() 4193 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4194 percent = num 4195 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4196 size = num 4197 else: 4198 percent = num 4199 4200 if matched_l_paren: 4201 self._match_r_paren() 4202 4203 if self._match(TokenType.L_PAREN): 4204 method = self._parse_var(upper=True) 4205 seed = self._match(TokenType.COMMA) and self._parse_number() 4206 self._match_r_paren() 4207 elif self._match_texts(("SEED", "REPEATABLE")): 4208 seed = self._parse_wrapped(self._parse_number) 4209 4210 if not method and self.DEFAULT_SAMPLING_METHOD: 4211 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4212 4213 return self.expression( 4214 exp.TableSample, 4215 expressions=expressions, 4216 method=method, 4217 bucket_numerator=bucket_numerator, 4218 bucket_denominator=bucket_denominator, 4219 bucket_field=bucket_field, 4220 percent=percent, 4221 size=size, 4222 seed=seed, 4223 ) 4224 4225 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4226 return list(iter(self._parse_pivot, None)) or None 4227 4228 def _parse_joins(self) -> t.Iterator[exp.Join]: 4229 return iter(self._parse_join, None) 4230 4231 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4232 if not self._match(TokenType.INTO): 4233 return None 4234 4235 return self.expression( 4236 exp.UnpivotColumns, 4237 this=self._match_text_seq("NAME") and self._parse_column(), 4238 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4239 ) 4240 4241 # https://duckdb.org/docs/sql/statements/pivot 4242 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4243 def _parse_on() -> t.Optional[exp.Expression]: 4244 this = self._parse_bitwise() 4245 4246 if self._match(TokenType.IN): 4247 # PIVOT ... ON col IN (row_val1, row_val2) 4248 return self._parse_in(this) 4249 if self._match(TokenType.ALIAS, advance=False): 4250 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4251 return self._parse_alias(this) 4252 4253 return this 4254 4255 this = self._parse_table() 4256 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4257 into = self._parse_unpivot_columns() 4258 using = self._match(TokenType.USING) and self._parse_csv( 4259 lambda: self._parse_alias(self._parse_function()) 4260 ) 4261 group = self._parse_group() 4262 4263 return self.expression( 4264 exp.Pivot, 4265 this=this, 4266 expressions=expressions, 4267 using=using, 4268 group=group, 4269 unpivot=is_unpivot, 4270 into=into, 4271 ) 4272 4273 def _parse_pivot_in(self) -> exp.In: 4274 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4275 this = self._parse_select_or_expression() 4276 4277 self._match(TokenType.ALIAS) 4278 alias = self._parse_bitwise() 4279 if alias: 4280 if isinstance(alias, exp.Column) and not alias.db: 4281 alias = alias.this 4282 return self.expression(exp.PivotAlias, this=this, alias=alias) 4283 4284 return this 4285 4286 value = self._parse_column() 4287 4288 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4289 self.raise_error("Expecting IN (") 4290 4291 if self._match(TokenType.ANY): 4292 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4293 else: 4294 exprs = self._parse_csv(_parse_aliased_expression) 4295 4296 self._match_r_paren() 4297 return self.expression(exp.In, this=value, expressions=exprs) 4298 4299 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4300 index = self._index 4301 include_nulls = None 4302 4303 if self._match(TokenType.PIVOT): 4304 unpivot = False 4305 elif self._match(TokenType.UNPIVOT): 4306 unpivot = True 4307 4308 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4309 if self._match_text_seq("INCLUDE", "NULLS"): 4310 include_nulls = True 4311 elif self._match_text_seq("EXCLUDE", "NULLS"): 4312 include_nulls = False 4313 else: 4314 return None 4315 4316 expressions = [] 4317 4318 if not self._match(TokenType.L_PAREN): 4319 self._retreat(index) 4320 return None 4321 4322 if unpivot: 4323 expressions = self._parse_csv(self._parse_column) 4324 else: 4325 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4326 4327 if not expressions: 4328 self.raise_error("Failed to parse PIVOT's aggregation list") 4329 4330 if not self._match(TokenType.FOR): 4331 self.raise_error("Expecting FOR") 4332 4333 fields = [] 4334 while True: 4335 field = self._try_parse(self._parse_pivot_in) 4336 if not field: 4337 break 4338 fields.append(field) 4339 4340 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4341 self._parse_bitwise 4342 ) 4343 4344 group = self._parse_group() 4345 4346 self._match_r_paren() 4347 4348 pivot = self.expression( 4349 exp.Pivot, 4350 expressions=expressions, 4351 fields=fields, 4352 unpivot=unpivot, 4353 include_nulls=include_nulls, 4354 default_on_null=default_on_null, 4355 group=group, 4356 ) 4357 4358 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4359 pivot.set("alias", self._parse_table_alias()) 4360 4361 if not unpivot: 4362 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4363 4364 columns: t.List[exp.Expression] = [] 4365 all_fields = [] 4366 for pivot_field in pivot.fields: 4367 pivot_field_expressions = pivot_field.expressions 4368 4369 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4370 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4371 continue 4372 4373 all_fields.append( 4374 [ 4375 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4376 for fld in pivot_field_expressions 4377 ] 4378 ) 4379 4380 if all_fields: 4381 if names: 4382 all_fields.append(names) 4383 4384 # Generate all possible combinations of the pivot columns 4385 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4386 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4387 for fld_parts_tuple in itertools.product(*all_fields): 4388 fld_parts = list(fld_parts_tuple) 4389 4390 if names and self.PREFIXED_PIVOT_COLUMNS: 4391 # Move the "name" to the front of the list 4392 fld_parts.insert(0, fld_parts.pop(-1)) 4393 4394 columns.append(exp.to_identifier("_".join(fld_parts))) 4395 4396 pivot.set("columns", columns) 4397 4398 return pivot 4399 4400 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4401 return [agg.alias for agg in aggregations if agg.alias] 4402 4403 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4404 if not skip_where_token and not self._match(TokenType.PREWHERE): 4405 return None 4406 4407 return self.expression( 4408 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4409 ) 4410 4411 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4412 if not skip_where_token and not self._match(TokenType.WHERE): 4413 return None 4414 4415 return self.expression( 4416 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4417 ) 4418 4419 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4420 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4421 return None 4422 4423 elements: t.Dict[str, t.Any] = defaultdict(list) 4424 4425 if self._match(TokenType.ALL): 4426 elements["all"] = True 4427 elif self._match(TokenType.DISTINCT): 4428 elements["all"] = False 4429 4430 while True: 4431 index = self._index 4432 4433 elements["expressions"].extend( 4434 self._parse_csv( 4435 lambda: None 4436 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4437 else self._parse_assignment() 4438 ) 4439 ) 4440 4441 before_with_index = self._index 4442 with_prefix = self._match(TokenType.WITH) 4443 4444 if self._match(TokenType.ROLLUP): 4445 elements["rollup"].append( 4446 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4447 ) 4448 elif self._match(TokenType.CUBE): 4449 elements["cube"].append( 4450 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4451 ) 4452 elif self._match(TokenType.GROUPING_SETS): 4453 elements["grouping_sets"].append( 4454 self.expression( 4455 exp.GroupingSets, 4456 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4457 ) 4458 ) 4459 elif self._match_text_seq("TOTALS"): 4460 elements["totals"] = True # type: ignore 4461 4462 if before_with_index <= self._index <= before_with_index + 1: 4463 self._retreat(before_with_index) 4464 break 4465 4466 if index == self._index: 4467 break 4468 4469 return self.expression(exp.Group, **elements) # type: ignore 4470 4471 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4472 return self.expression( 4473 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4474 ) 4475 4476 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4477 if self._match(TokenType.L_PAREN): 4478 grouping_set = self._parse_csv(self._parse_column) 4479 self._match_r_paren() 4480 return self.expression(exp.Tuple, expressions=grouping_set) 4481 4482 return self._parse_column() 4483 4484 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4485 if not skip_having_token and not self._match(TokenType.HAVING): 4486 return None 4487 return self.expression(exp.Having, this=self._parse_assignment()) 4488 4489 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4490 if not self._match(TokenType.QUALIFY): 4491 return None 4492 return self.expression(exp.Qualify, this=self._parse_assignment()) 4493 4494 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4495 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4496 exp.Prior, this=self._parse_bitwise() 4497 ) 4498 connect = self._parse_assignment() 4499 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4500 return connect 4501 4502 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4503 if skip_start_token: 4504 start = None 4505 elif self._match(TokenType.START_WITH): 4506 start = self._parse_assignment() 4507 else: 4508 return None 4509 4510 self._match(TokenType.CONNECT_BY) 4511 nocycle = self._match_text_seq("NOCYCLE") 4512 connect = self._parse_connect_with_prior() 4513 4514 if not start and self._match(TokenType.START_WITH): 4515 start = self._parse_assignment() 4516 4517 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4518 4519 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4520 this = self._parse_id_var(any_token=True) 4521 if self._match(TokenType.ALIAS): 4522 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4523 return this 4524 4525 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4526 if self._match_text_seq("INTERPOLATE"): 4527 return self._parse_wrapped_csv(self._parse_name_as_expression) 4528 return None 4529 4530 def _parse_order( 4531 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4532 ) -> t.Optional[exp.Expression]: 4533 siblings = None 4534 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4535 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4536 return this 4537 4538 siblings = True 4539 4540 return self.expression( 4541 exp.Order, 4542 this=this, 4543 expressions=self._parse_csv(self._parse_ordered), 4544 siblings=siblings, 4545 ) 4546 4547 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4548 if not self._match(token): 4549 return None 4550 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4551 4552 def _parse_ordered( 4553 self, parse_method: t.Optional[t.Callable] = None 4554 ) -> t.Optional[exp.Ordered]: 4555 this = parse_method() if parse_method else self._parse_assignment() 4556 if not this: 4557 return None 4558 4559 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4560 this = exp.var("ALL") 4561 4562 asc = self._match(TokenType.ASC) 4563 desc = self._match(TokenType.DESC) or (asc and False) 4564 4565 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4566 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4567 4568 nulls_first = is_nulls_first or False 4569 explicitly_null_ordered = is_nulls_first or is_nulls_last 4570 4571 if ( 4572 not explicitly_null_ordered 4573 and ( 4574 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4575 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4576 ) 4577 and self.dialect.NULL_ORDERING != "nulls_are_last" 4578 ): 4579 nulls_first = True 4580 4581 if self._match_text_seq("WITH", "FILL"): 4582 with_fill = self.expression( 4583 exp.WithFill, 4584 **{ # type: ignore 4585 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4586 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4587 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4588 "interpolate": self._parse_interpolate(), 4589 }, 4590 ) 4591 else: 4592 with_fill = None 4593 4594 return self.expression( 4595 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4596 ) 4597 4598 def _parse_limit_options(self) -> exp.LimitOptions: 4599 percent = self._match(TokenType.PERCENT) 4600 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4601 self._match_text_seq("ONLY") 4602 with_ties = self._match_text_seq("WITH", "TIES") 4603 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4604 4605 def _parse_limit( 4606 self, 4607 this: t.Optional[exp.Expression] = None, 4608 top: bool = False, 4609 skip_limit_token: bool = False, 4610 ) -> t.Optional[exp.Expression]: 4611 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4612 comments = self._prev_comments 4613 if top: 4614 limit_paren = self._match(TokenType.L_PAREN) 4615 expression = self._parse_term() if limit_paren else self._parse_number() 4616 4617 if limit_paren: 4618 self._match_r_paren() 4619 4620 limit_options = self._parse_limit_options() 4621 else: 4622 limit_options = None 4623 expression = self._parse_term() 4624 4625 if self._match(TokenType.COMMA): 4626 offset = expression 4627 expression = self._parse_term() 4628 else: 4629 offset = None 4630 4631 limit_exp = self.expression( 4632 exp.Limit, 4633 this=this, 4634 expression=expression, 4635 offset=offset, 4636 comments=comments, 4637 limit_options=limit_options, 4638 expressions=self._parse_limit_by(), 4639 ) 4640 4641 return limit_exp 4642 4643 if self._match(TokenType.FETCH): 4644 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4645 direction = self._prev.text.upper() if direction else "FIRST" 4646 4647 count = self._parse_field(tokens=self.FETCH_TOKENS) 4648 4649 return self.expression( 4650 exp.Fetch, 4651 direction=direction, 4652 count=count, 4653 limit_options=self._parse_limit_options(), 4654 ) 4655 4656 return this 4657 4658 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4659 if not self._match(TokenType.OFFSET): 4660 return this 4661 4662 count = self._parse_term() 4663 self._match_set((TokenType.ROW, TokenType.ROWS)) 4664 4665 return self.expression( 4666 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4667 ) 4668 4669 def _can_parse_limit_or_offset(self) -> bool: 4670 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4671 return False 4672 4673 index = self._index 4674 result = bool( 4675 self._try_parse(self._parse_limit, retreat=True) 4676 or self._try_parse(self._parse_offset, retreat=True) 4677 ) 4678 self._retreat(index) 4679 return result 4680 4681 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4682 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4683 4684 def _parse_locks(self) -> t.List[exp.Lock]: 4685 locks = [] 4686 while True: 4687 if self._match_text_seq("FOR", "UPDATE"): 4688 update = True 4689 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4690 "LOCK", "IN", "SHARE", "MODE" 4691 ): 4692 update = False 4693 else: 4694 break 4695 4696 expressions = None 4697 if self._match_text_seq("OF"): 4698 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4699 4700 wait: t.Optional[bool | exp.Expression] = None 4701 if self._match_text_seq("NOWAIT"): 4702 wait = True 4703 elif self._match_text_seq("WAIT"): 4704 wait = self._parse_primary() 4705 elif self._match_text_seq("SKIP", "LOCKED"): 4706 wait = False 4707 4708 locks.append( 4709 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4710 ) 4711 4712 return locks 4713 4714 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4715 start = self._index 4716 _, side_token, kind_token = self._parse_join_parts() 4717 4718 side = side_token.text if side_token else None 4719 kind = kind_token.text if kind_token else None 4720 4721 if not self._match_set(self.SET_OPERATIONS): 4722 self._retreat(start) 4723 return None 4724 4725 token_type = self._prev.token_type 4726 4727 if token_type == TokenType.UNION: 4728 operation: t.Type[exp.SetOperation] = exp.Union 4729 elif token_type == TokenType.EXCEPT: 4730 operation = exp.Except 4731 else: 4732 operation = exp.Intersect 4733 4734 comments = self._prev.comments 4735 4736 if self._match(TokenType.DISTINCT): 4737 distinct: t.Optional[bool] = True 4738 elif self._match(TokenType.ALL): 4739 distinct = False 4740 else: 4741 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4742 if distinct is None: 4743 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4744 4745 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4746 "STRICT", "CORRESPONDING" 4747 ) 4748 if self._match_text_seq("CORRESPONDING"): 4749 by_name = True 4750 if not side and not kind: 4751 kind = "INNER" 4752 4753 on_column_list = None 4754 if by_name and self._match_texts(("ON", "BY")): 4755 on_column_list = self._parse_wrapped_csv(self._parse_column) 4756 4757 expression = self._parse_select(nested=True, parse_set_operation=False) 4758 4759 return self.expression( 4760 operation, 4761 comments=comments, 4762 this=this, 4763 distinct=distinct, 4764 by_name=by_name, 4765 expression=expression, 4766 side=side, 4767 kind=kind, 4768 on=on_column_list, 4769 ) 4770 4771 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4772 while this: 4773 setop = self.parse_set_operation(this) 4774 if not setop: 4775 break 4776 this = setop 4777 4778 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4779 expression = this.expression 4780 4781 if expression: 4782 for arg in self.SET_OP_MODIFIERS: 4783 expr = expression.args.get(arg) 4784 if expr: 4785 this.set(arg, expr.pop()) 4786 4787 return this 4788 4789 def _parse_expression(self) -> t.Optional[exp.Expression]: 4790 return self._parse_alias(self._parse_assignment()) 4791 4792 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4793 this = self._parse_disjunction() 4794 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4795 # This allows us to parse <non-identifier token> := <expr> 4796 this = exp.column( 4797 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4798 ) 4799 4800 while self._match_set(self.ASSIGNMENT): 4801 if isinstance(this, exp.Column) and len(this.parts) == 1: 4802 this = this.this 4803 4804 this = self.expression( 4805 self.ASSIGNMENT[self._prev.token_type], 4806 this=this, 4807 comments=self._prev_comments, 4808 expression=self._parse_assignment(), 4809 ) 4810 4811 return this 4812 4813 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4814 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4815 4816 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4817 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4818 4819 def _parse_equality(self) -> t.Optional[exp.Expression]: 4820 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4821 4822 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4823 return self._parse_tokens(self._parse_range, self.COMPARISON) 4824 4825 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4826 this = this or self._parse_bitwise() 4827 negate = self._match(TokenType.NOT) 4828 4829 if self._match_set(self.RANGE_PARSERS): 4830 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4831 if not expression: 4832 return this 4833 4834 this = expression 4835 elif self._match(TokenType.ISNULL): 4836 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4837 4838 # Postgres supports ISNULL and NOTNULL for conditions. 4839 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4840 if self._match(TokenType.NOTNULL): 4841 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4842 this = self.expression(exp.Not, this=this) 4843 4844 if negate: 4845 this = self._negate_range(this) 4846 4847 if self._match(TokenType.IS): 4848 this = self._parse_is(this) 4849 4850 return this 4851 4852 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4853 if not this: 4854 return this 4855 4856 return self.expression(exp.Not, this=this) 4857 4858 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4859 index = self._index - 1 4860 negate = self._match(TokenType.NOT) 4861 4862 if self._match_text_seq("DISTINCT", "FROM"): 4863 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4864 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4865 4866 if self._match(TokenType.JSON): 4867 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4868 4869 if self._match_text_seq("WITH"): 4870 _with = True 4871 elif self._match_text_seq("WITHOUT"): 4872 _with = False 4873 else: 4874 _with = None 4875 4876 unique = self._match(TokenType.UNIQUE) 4877 self._match_text_seq("KEYS") 4878 expression: t.Optional[exp.Expression] = self.expression( 4879 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4880 ) 4881 else: 4882 expression = self._parse_primary() or self._parse_null() 4883 if not expression: 4884 self._retreat(index) 4885 return None 4886 4887 this = self.expression(exp.Is, this=this, expression=expression) 4888 return self.expression(exp.Not, this=this) if negate else this 4889 4890 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4891 unnest = self._parse_unnest(with_alias=False) 4892 if unnest: 4893 this = self.expression(exp.In, this=this, unnest=unnest) 4894 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4895 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4896 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4897 4898 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4899 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4900 else: 4901 this = self.expression(exp.In, this=this, expressions=expressions) 4902 4903 if matched_l_paren: 4904 self._match_r_paren(this) 4905 elif not self._match(TokenType.R_BRACKET, expression=this): 4906 self.raise_error("Expecting ]") 4907 else: 4908 this = self.expression(exp.In, this=this, field=self._parse_column()) 4909 4910 return this 4911 4912 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4913 low = self._parse_bitwise() 4914 self._match(TokenType.AND) 4915 high = self._parse_bitwise() 4916 return self.expression(exp.Between, this=this, low=low, high=high) 4917 4918 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4919 if not self._match(TokenType.ESCAPE): 4920 return this 4921 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4922 4923 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4924 index = self._index 4925 4926 if not self._match(TokenType.INTERVAL) and match_interval: 4927 return None 4928 4929 if self._match(TokenType.STRING, advance=False): 4930 this = self._parse_primary() 4931 else: 4932 this = self._parse_term() 4933 4934 if not this or ( 4935 isinstance(this, exp.Column) 4936 and not this.table 4937 and not this.this.quoted 4938 and this.name.upper() == "IS" 4939 ): 4940 self._retreat(index) 4941 return None 4942 4943 unit = self._parse_function() or ( 4944 not self._match(TokenType.ALIAS, advance=False) 4945 and self._parse_var(any_token=True, upper=True) 4946 ) 4947 4948 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4949 # each INTERVAL expression into this canonical form so it's easy to transpile 4950 if this and this.is_number: 4951 this = exp.Literal.string(this.to_py()) 4952 elif this and this.is_string: 4953 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4954 if parts and unit: 4955 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4956 unit = None 4957 self._retreat(self._index - 1) 4958 4959 if len(parts) == 1: 4960 this = exp.Literal.string(parts[0][0]) 4961 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4962 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4963 unit = self.expression( 4964 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4965 ) 4966 4967 interval = self.expression(exp.Interval, this=this, unit=unit) 4968 4969 index = self._index 4970 self._match(TokenType.PLUS) 4971 4972 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4973 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4974 return self.expression( 4975 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4976 ) 4977 4978 self._retreat(index) 4979 return interval 4980 4981 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4982 this = self._parse_term() 4983 4984 while True: 4985 if self._match_set(self.BITWISE): 4986 this = self.expression( 4987 self.BITWISE[self._prev.token_type], 4988 this=this, 4989 expression=self._parse_term(), 4990 ) 4991 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4992 this = self.expression( 4993 exp.DPipe, 4994 this=this, 4995 expression=self._parse_term(), 4996 safe=not self.dialect.STRICT_STRING_CONCAT, 4997 ) 4998 elif self._match(TokenType.DQMARK): 4999 this = self.expression( 5000 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5001 ) 5002 elif self._match_pair(TokenType.LT, TokenType.LT): 5003 this = self.expression( 5004 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5005 ) 5006 elif self._match_pair(TokenType.GT, TokenType.GT): 5007 this = self.expression( 5008 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5009 ) 5010 else: 5011 break 5012 5013 return this 5014 5015 def _parse_term(self) -> t.Optional[exp.Expression]: 5016 this = self._parse_factor() 5017 5018 while self._match_set(self.TERM): 5019 klass = self.TERM[self._prev.token_type] 5020 comments = self._prev_comments 5021 expression = self._parse_factor() 5022 5023 this = self.expression(klass, this=this, comments=comments, expression=expression) 5024 5025 if isinstance(this, exp.Collate): 5026 expr = this.expression 5027 5028 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5029 # fallback to Identifier / Var 5030 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5031 ident = expr.this 5032 if isinstance(ident, exp.Identifier): 5033 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5034 5035 return this 5036 5037 def _parse_factor(self) -> t.Optional[exp.Expression]: 5038 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5039 this = parse_method() 5040 5041 while self._match_set(self.FACTOR): 5042 klass = self.FACTOR[self._prev.token_type] 5043 comments = self._prev_comments 5044 expression = parse_method() 5045 5046 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5047 self._retreat(self._index - 1) 5048 return this 5049 5050 this = self.expression(klass, this=this, comments=comments, expression=expression) 5051 5052 if isinstance(this, exp.Div): 5053 this.args["typed"] = self.dialect.TYPED_DIVISION 5054 this.args["safe"] = self.dialect.SAFE_DIVISION 5055 5056 return this 5057 5058 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5059 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5060 5061 def _parse_unary(self) -> t.Optional[exp.Expression]: 5062 if self._match_set(self.UNARY_PARSERS): 5063 return self.UNARY_PARSERS[self._prev.token_type](self) 5064 return self._parse_at_time_zone(self._parse_type()) 5065 5066 def _parse_type( 5067 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5068 ) -> t.Optional[exp.Expression]: 5069 interval = parse_interval and self._parse_interval() 5070 if interval: 5071 return interval 5072 5073 index = self._index 5074 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5075 5076 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5077 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5078 if isinstance(data_type, exp.Cast): 5079 # This constructor can contain ops directly after it, for instance struct unnesting: 5080 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5081 return self._parse_column_ops(data_type) 5082 5083 if data_type: 5084 index2 = self._index 5085 this = self._parse_primary() 5086 5087 if isinstance(this, exp.Literal): 5088 this = self._parse_column_ops(this) 5089 5090 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5091 if parser: 5092 return parser(self, this, data_type) 5093 5094 return self.expression(exp.Cast, this=this, to=data_type) 5095 5096 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5097 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5098 # 5099 # If the index difference here is greater than 1, that means the parser itself must have 5100 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5101 # 5102 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5103 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5104 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5105 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5106 # 5107 # In these cases, we don't really want to return the converted type, but instead retreat 5108 # and try to parse a Column or Identifier in the section below. 5109 if data_type.expressions and index2 - index > 1: 5110 self._retreat(index2) 5111 return self._parse_column_ops(data_type) 5112 5113 self._retreat(index) 5114 5115 if fallback_to_identifier: 5116 return self._parse_id_var() 5117 5118 this = self._parse_column() 5119 return this and self._parse_column_ops(this) 5120 5121 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5122 this = self._parse_type() 5123 if not this: 5124 return None 5125 5126 if isinstance(this, exp.Column) and not this.table: 5127 this = exp.var(this.name.upper()) 5128 5129 return self.expression( 5130 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5131 ) 5132 5133 def _parse_types( 5134 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5135 ) -> t.Optional[exp.Expression]: 5136 index = self._index 5137 5138 this: t.Optional[exp.Expression] = None 5139 prefix = self._match_text_seq("SYSUDTLIB", ".") 5140 5141 if not self._match_set(self.TYPE_TOKENS): 5142 identifier = allow_identifiers and self._parse_id_var( 5143 any_token=False, tokens=(TokenType.VAR,) 5144 ) 5145 if isinstance(identifier, exp.Identifier): 5146 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5147 5148 if len(tokens) != 1: 5149 self.raise_error("Unexpected identifier", self._prev) 5150 5151 if tokens[0].token_type in self.TYPE_TOKENS: 5152 self._prev = tokens[0] 5153 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5154 type_name = identifier.name 5155 5156 while self._match(TokenType.DOT): 5157 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5158 5159 this = exp.DataType.build(type_name, udt=True) 5160 else: 5161 self._retreat(self._index - 1) 5162 return None 5163 else: 5164 return None 5165 5166 type_token = self._prev.token_type 5167 5168 if type_token == TokenType.PSEUDO_TYPE: 5169 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5170 5171 if type_token == TokenType.OBJECT_IDENTIFIER: 5172 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5173 5174 # https://materialize.com/docs/sql/types/map/ 5175 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5176 key_type = self._parse_types( 5177 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5178 ) 5179 if not self._match(TokenType.FARROW): 5180 self._retreat(index) 5181 return None 5182 5183 value_type = self._parse_types( 5184 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5185 ) 5186 if not self._match(TokenType.R_BRACKET): 5187 self._retreat(index) 5188 return None 5189 5190 return exp.DataType( 5191 this=exp.DataType.Type.MAP, 5192 expressions=[key_type, value_type], 5193 nested=True, 5194 prefix=prefix, 5195 ) 5196 5197 nested = type_token in self.NESTED_TYPE_TOKENS 5198 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5199 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5200 expressions = None 5201 maybe_func = False 5202 5203 if self._match(TokenType.L_PAREN): 5204 if is_struct: 5205 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5206 elif nested: 5207 expressions = self._parse_csv( 5208 lambda: self._parse_types( 5209 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5210 ) 5211 ) 5212 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5213 this = expressions[0] 5214 this.set("nullable", True) 5215 self._match_r_paren() 5216 return this 5217 elif type_token in self.ENUM_TYPE_TOKENS: 5218 expressions = self._parse_csv(self._parse_equality) 5219 elif is_aggregate: 5220 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5221 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5222 ) 5223 if not func_or_ident: 5224 return None 5225 expressions = [func_or_ident] 5226 if self._match(TokenType.COMMA): 5227 expressions.extend( 5228 self._parse_csv( 5229 lambda: self._parse_types( 5230 check_func=check_func, 5231 schema=schema, 5232 allow_identifiers=allow_identifiers, 5233 ) 5234 ) 5235 ) 5236 else: 5237 expressions = self._parse_csv(self._parse_type_size) 5238 5239 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5240 if type_token == TokenType.VECTOR and len(expressions) == 2: 5241 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5242 5243 if not expressions or not self._match(TokenType.R_PAREN): 5244 self._retreat(index) 5245 return None 5246 5247 maybe_func = True 5248 5249 values: t.Optional[t.List[exp.Expression]] = None 5250 5251 if nested and self._match(TokenType.LT): 5252 if is_struct: 5253 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5254 else: 5255 expressions = self._parse_csv( 5256 lambda: self._parse_types( 5257 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5258 ) 5259 ) 5260 5261 if not self._match(TokenType.GT): 5262 self.raise_error("Expecting >") 5263 5264 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5265 values = self._parse_csv(self._parse_assignment) 5266 if not values and is_struct: 5267 values = None 5268 self._retreat(self._index - 1) 5269 else: 5270 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5271 5272 if type_token in self.TIMESTAMPS: 5273 if self._match_text_seq("WITH", "TIME", "ZONE"): 5274 maybe_func = False 5275 tz_type = ( 5276 exp.DataType.Type.TIMETZ 5277 if type_token in self.TIMES 5278 else exp.DataType.Type.TIMESTAMPTZ 5279 ) 5280 this = exp.DataType(this=tz_type, expressions=expressions) 5281 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5282 maybe_func = False 5283 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5284 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5285 maybe_func = False 5286 elif type_token == TokenType.INTERVAL: 5287 unit = self._parse_var(upper=True) 5288 if unit: 5289 if self._match_text_seq("TO"): 5290 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5291 5292 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5293 else: 5294 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5295 elif type_token == TokenType.VOID: 5296 this = exp.DataType(this=exp.DataType.Type.NULL) 5297 5298 if maybe_func and check_func: 5299 index2 = self._index 5300 peek = self._parse_string() 5301 5302 if not peek: 5303 self._retreat(index) 5304 return None 5305 5306 self._retreat(index2) 5307 5308 if not this: 5309 if self._match_text_seq("UNSIGNED"): 5310 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5311 if not unsigned_type_token: 5312 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5313 5314 type_token = unsigned_type_token or type_token 5315 5316 this = exp.DataType( 5317 this=exp.DataType.Type[type_token.value], 5318 expressions=expressions, 5319 nested=nested, 5320 prefix=prefix, 5321 ) 5322 5323 # Empty arrays/structs are allowed 5324 if values is not None: 5325 cls = exp.Struct if is_struct else exp.Array 5326 this = exp.cast(cls(expressions=values), this, copy=False) 5327 5328 elif expressions: 5329 this.set("expressions", expressions) 5330 5331 # https://materialize.com/docs/sql/types/list/#type-name 5332 while self._match(TokenType.LIST): 5333 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5334 5335 index = self._index 5336 5337 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5338 matched_array = self._match(TokenType.ARRAY) 5339 5340 while self._curr: 5341 datatype_token = self._prev.token_type 5342 matched_l_bracket = self._match(TokenType.L_BRACKET) 5343 5344 if (not matched_l_bracket and not matched_array) or ( 5345 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5346 ): 5347 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5348 # not to be confused with the fixed size array parsing 5349 break 5350 5351 matched_array = False 5352 values = self._parse_csv(self._parse_assignment) or None 5353 if ( 5354 values 5355 and not schema 5356 and ( 5357 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5358 ) 5359 ): 5360 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5361 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5362 self._retreat(index) 5363 break 5364 5365 this = exp.DataType( 5366 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5367 ) 5368 self._match(TokenType.R_BRACKET) 5369 5370 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5371 converter = self.TYPE_CONVERTERS.get(this.this) 5372 if converter: 5373 this = converter(t.cast(exp.DataType, this)) 5374 5375 return this 5376 5377 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5378 index = self._index 5379 5380 if ( 5381 self._curr 5382 and self._next 5383 and self._curr.token_type in self.TYPE_TOKENS 5384 and self._next.token_type in self.TYPE_TOKENS 5385 ): 5386 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5387 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5388 this = self._parse_id_var() 5389 else: 5390 this = ( 5391 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5392 or self._parse_id_var() 5393 ) 5394 5395 self._match(TokenType.COLON) 5396 5397 if ( 5398 type_required 5399 and not isinstance(this, exp.DataType) 5400 and not self._match_set(self.TYPE_TOKENS, advance=False) 5401 ): 5402 self._retreat(index) 5403 return self._parse_types() 5404 5405 return self._parse_column_def(this) 5406 5407 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5408 if not self._match_text_seq("AT", "TIME", "ZONE"): 5409 return this 5410 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5411 5412 def _parse_column(self) -> t.Optional[exp.Expression]: 5413 this = self._parse_column_reference() 5414 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5415 5416 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5417 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5418 5419 return column 5420 5421 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5422 this = self._parse_field() 5423 if ( 5424 not this 5425 and self._match(TokenType.VALUES, advance=False) 5426 and self.VALUES_FOLLOWED_BY_PAREN 5427 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5428 ): 5429 this = self._parse_id_var() 5430 5431 if isinstance(this, exp.Identifier): 5432 # We bubble up comments from the Identifier to the Column 5433 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5434 5435 return this 5436 5437 def _parse_colon_as_variant_extract( 5438 self, this: t.Optional[exp.Expression] 5439 ) -> t.Optional[exp.Expression]: 5440 casts = [] 5441 json_path = [] 5442 escape = None 5443 5444 while self._match(TokenType.COLON): 5445 start_index = self._index 5446 5447 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5448 path = self._parse_column_ops( 5449 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5450 ) 5451 5452 # The cast :: operator has a lower precedence than the extraction operator :, so 5453 # we rearrange the AST appropriately to avoid casting the JSON path 5454 while isinstance(path, exp.Cast): 5455 casts.append(path.to) 5456 path = path.this 5457 5458 if casts: 5459 dcolon_offset = next( 5460 i 5461 for i, t in enumerate(self._tokens[start_index:]) 5462 if t.token_type == TokenType.DCOLON 5463 ) 5464 end_token = self._tokens[start_index + dcolon_offset - 1] 5465 else: 5466 end_token = self._prev 5467 5468 if path: 5469 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5470 # it'll roundtrip to a string literal in GET_PATH 5471 if isinstance(path, exp.Identifier) and path.quoted: 5472 escape = True 5473 5474 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5475 5476 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5477 # Databricks transforms it back to the colon/dot notation 5478 if json_path: 5479 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5480 5481 if json_path_expr: 5482 json_path_expr.set("escape", escape) 5483 5484 this = self.expression( 5485 exp.JSONExtract, 5486 this=this, 5487 expression=json_path_expr, 5488 variant_extract=True, 5489 ) 5490 5491 while casts: 5492 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5493 5494 return this 5495 5496 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5497 return self._parse_types() 5498 5499 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5500 this = self._parse_bracket(this) 5501 5502 while self._match_set(self.COLUMN_OPERATORS): 5503 op_token = self._prev.token_type 5504 op = self.COLUMN_OPERATORS.get(op_token) 5505 5506 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5507 field = self._parse_dcolon() 5508 if not field: 5509 self.raise_error("Expected type") 5510 elif op and self._curr: 5511 field = self._parse_column_reference() or self._parse_bracket() 5512 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5513 field = self._parse_column_ops(field) 5514 else: 5515 field = self._parse_field(any_token=True, anonymous_func=True) 5516 5517 if isinstance(field, (exp.Func, exp.Window)) and this: 5518 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5519 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5520 this = exp.replace_tree( 5521 this, 5522 lambda n: ( 5523 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5524 if n.table 5525 else n.this 5526 ) 5527 if isinstance(n, exp.Column) 5528 else n, 5529 ) 5530 5531 if op: 5532 this = op(self, this, field) 5533 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5534 this = self.expression( 5535 exp.Column, 5536 comments=this.comments, 5537 this=field, 5538 table=this.this, 5539 db=this.args.get("table"), 5540 catalog=this.args.get("db"), 5541 ) 5542 elif isinstance(field, exp.Window): 5543 # Move the exp.Dot's to the window's function 5544 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5545 field.set("this", window_func) 5546 this = field 5547 else: 5548 this = self.expression(exp.Dot, this=this, expression=field) 5549 5550 if field and field.comments: 5551 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5552 5553 this = self._parse_bracket(this) 5554 5555 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5556 5557 def _parse_primary(self) -> t.Optional[exp.Expression]: 5558 if self._match_set(self.PRIMARY_PARSERS): 5559 token_type = self._prev.token_type 5560 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5561 5562 if token_type == TokenType.STRING: 5563 expressions = [primary] 5564 while self._match(TokenType.STRING): 5565 expressions.append(exp.Literal.string(self._prev.text)) 5566 5567 if len(expressions) > 1: 5568 return self.expression(exp.Concat, expressions=expressions) 5569 5570 return primary 5571 5572 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5573 return exp.Literal.number(f"0.{self._prev.text}") 5574 5575 if self._match(TokenType.L_PAREN): 5576 comments = self._prev_comments 5577 query = self._parse_select() 5578 5579 if query: 5580 expressions = [query] 5581 else: 5582 expressions = self._parse_expressions() 5583 5584 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5585 5586 if not this and self._match(TokenType.R_PAREN, advance=False): 5587 this = self.expression(exp.Tuple) 5588 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5589 this = self._parse_subquery(this=this, parse_alias=False) 5590 elif isinstance(this, exp.Subquery): 5591 this = self._parse_subquery( 5592 this=self._parse_set_operations(this), parse_alias=False 5593 ) 5594 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5595 this = self.expression(exp.Tuple, expressions=expressions) 5596 else: 5597 this = self.expression(exp.Paren, this=this) 5598 5599 if this: 5600 this.add_comments(comments) 5601 5602 self._match_r_paren(expression=this) 5603 return this 5604 5605 return None 5606 5607 def _parse_field( 5608 self, 5609 any_token: bool = False, 5610 tokens: t.Optional[t.Collection[TokenType]] = None, 5611 anonymous_func: bool = False, 5612 ) -> t.Optional[exp.Expression]: 5613 if anonymous_func: 5614 field = ( 5615 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5616 or self._parse_primary() 5617 ) 5618 else: 5619 field = self._parse_primary() or self._parse_function( 5620 anonymous=anonymous_func, any_token=any_token 5621 ) 5622 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5623 5624 def _parse_function( 5625 self, 5626 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5627 anonymous: bool = False, 5628 optional_parens: bool = True, 5629 any_token: bool = False, 5630 ) -> t.Optional[exp.Expression]: 5631 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5632 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5633 fn_syntax = False 5634 if ( 5635 self._match(TokenType.L_BRACE, advance=False) 5636 and self._next 5637 and self._next.text.upper() == "FN" 5638 ): 5639 self._advance(2) 5640 fn_syntax = True 5641 5642 func = self._parse_function_call( 5643 functions=functions, 5644 anonymous=anonymous, 5645 optional_parens=optional_parens, 5646 any_token=any_token, 5647 ) 5648 5649 if fn_syntax: 5650 self._match(TokenType.R_BRACE) 5651 5652 return func 5653 5654 def _parse_function_call( 5655 self, 5656 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5657 anonymous: bool = False, 5658 optional_parens: bool = True, 5659 any_token: bool = False, 5660 ) -> t.Optional[exp.Expression]: 5661 if not self._curr: 5662 return None 5663 5664 comments = self._curr.comments 5665 token = self._curr 5666 token_type = self._curr.token_type 5667 this = self._curr.text 5668 upper = this.upper() 5669 5670 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5671 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5672 self._advance() 5673 return self._parse_window(parser(self)) 5674 5675 if not self._next or self._next.token_type != TokenType.L_PAREN: 5676 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5677 self._advance() 5678 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5679 5680 return None 5681 5682 if any_token: 5683 if token_type in self.RESERVED_TOKENS: 5684 return None 5685 elif token_type not in self.FUNC_TOKENS: 5686 return None 5687 5688 self._advance(2) 5689 5690 parser = self.FUNCTION_PARSERS.get(upper) 5691 if parser and not anonymous: 5692 this = parser(self) 5693 else: 5694 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5695 5696 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5697 this = self.expression( 5698 subquery_predicate, comments=comments, this=self._parse_select() 5699 ) 5700 self._match_r_paren() 5701 return this 5702 5703 if functions is None: 5704 functions = self.FUNCTIONS 5705 5706 function = functions.get(upper) 5707 known_function = function and not anonymous 5708 5709 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5710 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5711 5712 post_func_comments = self._curr and self._curr.comments 5713 if known_function and post_func_comments: 5714 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5715 # call we'll construct it as exp.Anonymous, even if it's "known" 5716 if any( 5717 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5718 for comment in post_func_comments 5719 ): 5720 known_function = False 5721 5722 if alias and known_function: 5723 args = self._kv_to_prop_eq(args) 5724 5725 if known_function: 5726 func_builder = t.cast(t.Callable, function) 5727 5728 if "dialect" in func_builder.__code__.co_varnames: 5729 func = func_builder(args, dialect=self.dialect) 5730 else: 5731 func = func_builder(args) 5732 5733 func = self.validate_expression(func, args) 5734 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5735 func.meta["name"] = this 5736 5737 this = func 5738 else: 5739 if token_type == TokenType.IDENTIFIER: 5740 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5741 5742 this = self.expression(exp.Anonymous, this=this, expressions=args) 5743 this = this.update_positions(token) 5744 5745 if isinstance(this, exp.Expression): 5746 this.add_comments(comments) 5747 5748 self._match_r_paren(this) 5749 return self._parse_window(this) 5750 5751 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5752 return expression 5753 5754 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5755 transformed = [] 5756 5757 for index, e in enumerate(expressions): 5758 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5759 if isinstance(e, exp.Alias): 5760 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5761 5762 if not isinstance(e, exp.PropertyEQ): 5763 e = self.expression( 5764 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5765 ) 5766 5767 if isinstance(e.this, exp.Column): 5768 e.this.replace(e.this.this) 5769 else: 5770 e = self._to_prop_eq(e, index) 5771 5772 transformed.append(e) 5773 5774 return transformed 5775 5776 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5777 return self._parse_statement() 5778 5779 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5780 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5781 5782 def _parse_user_defined_function( 5783 self, kind: t.Optional[TokenType] = None 5784 ) -> t.Optional[exp.Expression]: 5785 this = self._parse_table_parts(schema=True) 5786 5787 if not self._match(TokenType.L_PAREN): 5788 return this 5789 5790 expressions = self._parse_csv(self._parse_function_parameter) 5791 self._match_r_paren() 5792 return self.expression( 5793 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5794 ) 5795 5796 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5797 literal = self._parse_primary() 5798 if literal: 5799 return self.expression(exp.Introducer, this=token.text, expression=literal) 5800 5801 return self._identifier_expression(token) 5802 5803 def _parse_session_parameter(self) -> exp.SessionParameter: 5804 kind = None 5805 this = self._parse_id_var() or self._parse_primary() 5806 5807 if this and self._match(TokenType.DOT): 5808 kind = this.name 5809 this = self._parse_var() or self._parse_primary() 5810 5811 return self.expression(exp.SessionParameter, this=this, kind=kind) 5812 5813 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5814 return self._parse_id_var() 5815 5816 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5817 index = self._index 5818 5819 if self._match(TokenType.L_PAREN): 5820 expressions = t.cast( 5821 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5822 ) 5823 5824 if not self._match(TokenType.R_PAREN): 5825 self._retreat(index) 5826 else: 5827 expressions = [self._parse_lambda_arg()] 5828 5829 if self._match_set(self.LAMBDAS): 5830 return self.LAMBDAS[self._prev.token_type](self, expressions) 5831 5832 self._retreat(index) 5833 5834 this: t.Optional[exp.Expression] 5835 5836 if self._match(TokenType.DISTINCT): 5837 this = self.expression( 5838 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5839 ) 5840 else: 5841 this = self._parse_select_or_expression(alias=alias) 5842 5843 return self._parse_limit( 5844 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5845 ) 5846 5847 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5848 index = self._index 5849 if not self._match(TokenType.L_PAREN): 5850 return this 5851 5852 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5853 # expr can be of both types 5854 if self._match_set(self.SELECT_START_TOKENS): 5855 self._retreat(index) 5856 return this 5857 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5858 self._match_r_paren() 5859 return self.expression(exp.Schema, this=this, expressions=args) 5860 5861 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5862 return self._parse_column_def(self._parse_field(any_token=True)) 5863 5864 def _parse_column_def( 5865 self, this: t.Optional[exp.Expression], computed_column: bool = True 5866 ) -> t.Optional[exp.Expression]: 5867 # column defs are not really columns, they're identifiers 5868 if isinstance(this, exp.Column): 5869 this = this.this 5870 5871 if not computed_column: 5872 self._match(TokenType.ALIAS) 5873 5874 kind = self._parse_types(schema=True) 5875 5876 if self._match_text_seq("FOR", "ORDINALITY"): 5877 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5878 5879 constraints: t.List[exp.Expression] = [] 5880 5881 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5882 ("ALIAS", "MATERIALIZED") 5883 ): 5884 persisted = self._prev.text.upper() == "MATERIALIZED" 5885 constraint_kind = exp.ComputedColumnConstraint( 5886 this=self._parse_assignment(), 5887 persisted=persisted or self._match_text_seq("PERSISTED"), 5888 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5889 ) 5890 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5891 elif ( 5892 kind 5893 and self._match(TokenType.ALIAS, advance=False) 5894 and ( 5895 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5896 or (self._next and self._next.token_type == TokenType.L_PAREN) 5897 ) 5898 ): 5899 self._advance() 5900 constraints.append( 5901 self.expression( 5902 exp.ColumnConstraint, 5903 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5904 ) 5905 ) 5906 5907 while True: 5908 constraint = self._parse_column_constraint() 5909 if not constraint: 5910 break 5911 constraints.append(constraint) 5912 5913 if not kind and not constraints: 5914 return this 5915 5916 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5917 5918 def _parse_auto_increment( 5919 self, 5920 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5921 start = None 5922 increment = None 5923 5924 if self._match(TokenType.L_PAREN, advance=False): 5925 args = self._parse_wrapped_csv(self._parse_bitwise) 5926 start = seq_get(args, 0) 5927 increment = seq_get(args, 1) 5928 elif self._match_text_seq("START"): 5929 start = self._parse_bitwise() 5930 self._match_text_seq("INCREMENT") 5931 increment = self._parse_bitwise() 5932 5933 if start and increment: 5934 return exp.GeneratedAsIdentityColumnConstraint( 5935 start=start, increment=increment, this=False 5936 ) 5937 5938 return exp.AutoIncrementColumnConstraint() 5939 5940 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5941 if not self._match_text_seq("REFRESH"): 5942 self._retreat(self._index - 1) 5943 return None 5944 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5945 5946 def _parse_compress(self) -> exp.CompressColumnConstraint: 5947 if self._match(TokenType.L_PAREN, advance=False): 5948 return self.expression( 5949 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5950 ) 5951 5952 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5953 5954 def _parse_generated_as_identity( 5955 self, 5956 ) -> ( 5957 exp.GeneratedAsIdentityColumnConstraint 5958 | exp.ComputedColumnConstraint 5959 | exp.GeneratedAsRowColumnConstraint 5960 ): 5961 if self._match_text_seq("BY", "DEFAULT"): 5962 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5963 this = self.expression( 5964 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5965 ) 5966 else: 5967 self._match_text_seq("ALWAYS") 5968 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5969 5970 self._match(TokenType.ALIAS) 5971 5972 if self._match_text_seq("ROW"): 5973 start = self._match_text_seq("START") 5974 if not start: 5975 self._match(TokenType.END) 5976 hidden = self._match_text_seq("HIDDEN") 5977 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5978 5979 identity = self._match_text_seq("IDENTITY") 5980 5981 if self._match(TokenType.L_PAREN): 5982 if self._match(TokenType.START_WITH): 5983 this.set("start", self._parse_bitwise()) 5984 if self._match_text_seq("INCREMENT", "BY"): 5985 this.set("increment", self._parse_bitwise()) 5986 if self._match_text_seq("MINVALUE"): 5987 this.set("minvalue", self._parse_bitwise()) 5988 if self._match_text_seq("MAXVALUE"): 5989 this.set("maxvalue", self._parse_bitwise()) 5990 5991 if self._match_text_seq("CYCLE"): 5992 this.set("cycle", True) 5993 elif self._match_text_seq("NO", "CYCLE"): 5994 this.set("cycle", False) 5995 5996 if not identity: 5997 this.set("expression", self._parse_range()) 5998 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5999 args = self._parse_csv(self._parse_bitwise) 6000 this.set("start", seq_get(args, 0)) 6001 this.set("increment", seq_get(args, 1)) 6002 6003 self._match_r_paren() 6004 6005 return this 6006 6007 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6008 self._match_text_seq("LENGTH") 6009 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6010 6011 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6012 if self._match_text_seq("NULL"): 6013 return self.expression(exp.NotNullColumnConstraint) 6014 if self._match_text_seq("CASESPECIFIC"): 6015 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6016 if self._match_text_seq("FOR", "REPLICATION"): 6017 return self.expression(exp.NotForReplicationColumnConstraint) 6018 6019 # Unconsume the `NOT` token 6020 self._retreat(self._index - 1) 6021 return None 6022 6023 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6024 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6025 6026 procedure_option_follows = ( 6027 self._match(TokenType.WITH, advance=False) 6028 and self._next 6029 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6030 ) 6031 6032 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6033 return self.expression( 6034 exp.ColumnConstraint, 6035 this=this, 6036 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6037 ) 6038 6039 return this 6040 6041 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6042 if not self._match(TokenType.CONSTRAINT): 6043 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6044 6045 return self.expression( 6046 exp.Constraint, 6047 this=self._parse_id_var(), 6048 expressions=self._parse_unnamed_constraints(), 6049 ) 6050 6051 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6052 constraints = [] 6053 while True: 6054 constraint = self._parse_unnamed_constraint() or self._parse_function() 6055 if not constraint: 6056 break 6057 constraints.append(constraint) 6058 6059 return constraints 6060 6061 def _parse_unnamed_constraint( 6062 self, constraints: t.Optional[t.Collection[str]] = None 6063 ) -> t.Optional[exp.Expression]: 6064 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6065 constraints or self.CONSTRAINT_PARSERS 6066 ): 6067 return None 6068 6069 constraint = self._prev.text.upper() 6070 if constraint not in self.CONSTRAINT_PARSERS: 6071 self.raise_error(f"No parser found for schema constraint {constraint}.") 6072 6073 return self.CONSTRAINT_PARSERS[constraint](self) 6074 6075 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6076 return self._parse_id_var(any_token=False) 6077 6078 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6079 self._match_text_seq("KEY") 6080 return self.expression( 6081 exp.UniqueColumnConstraint, 6082 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6083 this=self._parse_schema(self._parse_unique_key()), 6084 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6085 on_conflict=self._parse_on_conflict(), 6086 options=self._parse_key_constraint_options(), 6087 ) 6088 6089 def _parse_key_constraint_options(self) -> t.List[str]: 6090 options = [] 6091 while True: 6092 if not self._curr: 6093 break 6094 6095 if self._match(TokenType.ON): 6096 action = None 6097 on = self._advance_any() and self._prev.text 6098 6099 if self._match_text_seq("NO", "ACTION"): 6100 action = "NO ACTION" 6101 elif self._match_text_seq("CASCADE"): 6102 action = "CASCADE" 6103 elif self._match_text_seq("RESTRICT"): 6104 action = "RESTRICT" 6105 elif self._match_pair(TokenType.SET, TokenType.NULL): 6106 action = "SET NULL" 6107 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6108 action = "SET DEFAULT" 6109 else: 6110 self.raise_error("Invalid key constraint") 6111 6112 options.append(f"ON {on} {action}") 6113 else: 6114 var = self._parse_var_from_options( 6115 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6116 ) 6117 if not var: 6118 break 6119 options.append(var.name) 6120 6121 return options 6122 6123 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6124 if match and not self._match(TokenType.REFERENCES): 6125 return None 6126 6127 expressions = None 6128 this = self._parse_table(schema=True) 6129 options = self._parse_key_constraint_options() 6130 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6131 6132 def _parse_foreign_key(self) -> exp.ForeignKey: 6133 expressions = ( 6134 self._parse_wrapped_id_vars() 6135 if not self._match(TokenType.REFERENCES, advance=False) 6136 else None 6137 ) 6138 reference = self._parse_references() 6139 on_options = {} 6140 6141 while self._match(TokenType.ON): 6142 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6143 self.raise_error("Expected DELETE or UPDATE") 6144 6145 kind = self._prev.text.lower() 6146 6147 if self._match_text_seq("NO", "ACTION"): 6148 action = "NO ACTION" 6149 elif self._match(TokenType.SET): 6150 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6151 action = "SET " + self._prev.text.upper() 6152 else: 6153 self._advance() 6154 action = self._prev.text.upper() 6155 6156 on_options[kind] = action 6157 6158 return self.expression( 6159 exp.ForeignKey, 6160 expressions=expressions, 6161 reference=reference, 6162 options=self._parse_key_constraint_options(), 6163 **on_options, # type: ignore 6164 ) 6165 6166 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6167 return self._parse_ordered() or self._parse_field() 6168 6169 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6170 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6171 self._retreat(self._index - 1) 6172 return None 6173 6174 id_vars = self._parse_wrapped_id_vars() 6175 return self.expression( 6176 exp.PeriodForSystemTimeConstraint, 6177 this=seq_get(id_vars, 0), 6178 expression=seq_get(id_vars, 1), 6179 ) 6180 6181 def _parse_primary_key( 6182 self, wrapped_optional: bool = False, in_props: bool = False 6183 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6184 desc = ( 6185 self._match_set((TokenType.ASC, TokenType.DESC)) 6186 and self._prev.token_type == TokenType.DESC 6187 ) 6188 6189 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6190 return self.expression( 6191 exp.PrimaryKeyColumnConstraint, 6192 desc=desc, 6193 options=self._parse_key_constraint_options(), 6194 ) 6195 6196 expressions = self._parse_wrapped_csv( 6197 self._parse_primary_key_part, optional=wrapped_optional 6198 ) 6199 options = self._parse_key_constraint_options() 6200 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6201 6202 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6203 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6204 6205 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6206 """ 6207 Parses a datetime column in ODBC format. We parse the column into the corresponding 6208 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6209 same as we did for `DATE('yyyy-mm-dd')`. 6210 6211 Reference: 6212 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6213 """ 6214 self._match(TokenType.VAR) 6215 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6216 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6217 if not self._match(TokenType.R_BRACE): 6218 self.raise_error("Expected }") 6219 return expression 6220 6221 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6222 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6223 return this 6224 6225 bracket_kind = self._prev.token_type 6226 if ( 6227 bracket_kind == TokenType.L_BRACE 6228 and self._curr 6229 and self._curr.token_type == TokenType.VAR 6230 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6231 ): 6232 return self._parse_odbc_datetime_literal() 6233 6234 expressions = self._parse_csv( 6235 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6236 ) 6237 6238 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6239 self.raise_error("Expected ]") 6240 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6241 self.raise_error("Expected }") 6242 6243 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6244 if bracket_kind == TokenType.L_BRACE: 6245 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6246 elif not this: 6247 this = build_array_constructor( 6248 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6249 ) 6250 else: 6251 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6252 if constructor_type: 6253 return build_array_constructor( 6254 constructor_type, 6255 args=expressions, 6256 bracket_kind=bracket_kind, 6257 dialect=self.dialect, 6258 ) 6259 6260 expressions = apply_index_offset( 6261 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6262 ) 6263 this = self.expression( 6264 exp.Bracket, 6265 this=this, 6266 expressions=expressions, 6267 comments=this.pop_comments(), 6268 ) 6269 6270 self._add_comments(this) 6271 return self._parse_bracket(this) 6272 6273 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6274 if self._match(TokenType.COLON): 6275 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6276 return this 6277 6278 def _parse_case(self) -> t.Optional[exp.Expression]: 6279 ifs = [] 6280 default = None 6281 6282 comments = self._prev_comments 6283 expression = self._parse_assignment() 6284 6285 while self._match(TokenType.WHEN): 6286 this = self._parse_assignment() 6287 self._match(TokenType.THEN) 6288 then = self._parse_assignment() 6289 ifs.append(self.expression(exp.If, this=this, true=then)) 6290 6291 if self._match(TokenType.ELSE): 6292 default = self._parse_assignment() 6293 6294 if not self._match(TokenType.END): 6295 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6296 default = exp.column("interval") 6297 else: 6298 self.raise_error("Expected END after CASE", self._prev) 6299 6300 return self.expression( 6301 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6302 ) 6303 6304 def _parse_if(self) -> t.Optional[exp.Expression]: 6305 if self._match(TokenType.L_PAREN): 6306 args = self._parse_csv( 6307 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6308 ) 6309 this = self.validate_expression(exp.If.from_arg_list(args), args) 6310 self._match_r_paren() 6311 else: 6312 index = self._index - 1 6313 6314 if self.NO_PAREN_IF_COMMANDS and index == 0: 6315 return self._parse_as_command(self._prev) 6316 6317 condition = self._parse_assignment() 6318 6319 if not condition: 6320 self._retreat(index) 6321 return None 6322 6323 self._match(TokenType.THEN) 6324 true = self._parse_assignment() 6325 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6326 self._match(TokenType.END) 6327 this = self.expression(exp.If, this=condition, true=true, false=false) 6328 6329 return this 6330 6331 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6332 if not self._match_text_seq("VALUE", "FOR"): 6333 self._retreat(self._index - 1) 6334 return None 6335 6336 return self.expression( 6337 exp.NextValueFor, 6338 this=self._parse_column(), 6339 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6340 ) 6341 6342 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6343 this = self._parse_function() or self._parse_var_or_string(upper=True) 6344 6345 if self._match(TokenType.FROM): 6346 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6347 6348 if not self._match(TokenType.COMMA): 6349 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6350 6351 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6352 6353 def _parse_gap_fill(self) -> exp.GapFill: 6354 self._match(TokenType.TABLE) 6355 this = self._parse_table() 6356 6357 self._match(TokenType.COMMA) 6358 args = [this, *self._parse_csv(self._parse_lambda)] 6359 6360 gap_fill = exp.GapFill.from_arg_list(args) 6361 return self.validate_expression(gap_fill, args) 6362 6363 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6364 this = self._parse_assignment() 6365 6366 if not self._match(TokenType.ALIAS): 6367 if self._match(TokenType.COMMA): 6368 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6369 6370 self.raise_error("Expected AS after CAST") 6371 6372 fmt = None 6373 to = self._parse_types() 6374 6375 default = self._match(TokenType.DEFAULT) 6376 if default: 6377 default = self._parse_bitwise() 6378 self._match_text_seq("ON", "CONVERSION", "ERROR") 6379 6380 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6381 fmt_string = self._parse_string() 6382 fmt = self._parse_at_time_zone(fmt_string) 6383 6384 if not to: 6385 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6386 if to.this in exp.DataType.TEMPORAL_TYPES: 6387 this = self.expression( 6388 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6389 this=this, 6390 format=exp.Literal.string( 6391 format_time( 6392 fmt_string.this if fmt_string else "", 6393 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6394 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6395 ) 6396 ), 6397 safe=safe, 6398 ) 6399 6400 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6401 this.set("zone", fmt.args["zone"]) 6402 return this 6403 elif not to: 6404 self.raise_error("Expected TYPE after CAST") 6405 elif isinstance(to, exp.Identifier): 6406 to = exp.DataType.build(to.name, udt=True) 6407 elif to.this == exp.DataType.Type.CHAR: 6408 if self._match(TokenType.CHARACTER_SET): 6409 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6410 6411 return self.expression( 6412 exp.Cast if strict else exp.TryCast, 6413 this=this, 6414 to=to, 6415 format=fmt, 6416 safe=safe, 6417 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6418 default=default, 6419 ) 6420 6421 def _parse_string_agg(self) -> exp.GroupConcat: 6422 if self._match(TokenType.DISTINCT): 6423 args: t.List[t.Optional[exp.Expression]] = [ 6424 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6425 ] 6426 if self._match(TokenType.COMMA): 6427 args.extend(self._parse_csv(self._parse_assignment)) 6428 else: 6429 args = self._parse_csv(self._parse_assignment) # type: ignore 6430 6431 if self._match_text_seq("ON", "OVERFLOW"): 6432 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6433 if self._match_text_seq("ERROR"): 6434 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6435 else: 6436 self._match_text_seq("TRUNCATE") 6437 on_overflow = self.expression( 6438 exp.OverflowTruncateBehavior, 6439 this=self._parse_string(), 6440 with_count=( 6441 self._match_text_seq("WITH", "COUNT") 6442 or not self._match_text_seq("WITHOUT", "COUNT") 6443 ), 6444 ) 6445 else: 6446 on_overflow = None 6447 6448 index = self._index 6449 if not self._match(TokenType.R_PAREN) and args: 6450 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6451 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6452 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6453 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6454 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6455 6456 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6457 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6458 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6459 if not self._match_text_seq("WITHIN", "GROUP"): 6460 self._retreat(index) 6461 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6462 6463 # The corresponding match_r_paren will be called in parse_function (caller) 6464 self._match_l_paren() 6465 6466 return self.expression( 6467 exp.GroupConcat, 6468 this=self._parse_order(this=seq_get(args, 0)), 6469 separator=seq_get(args, 1), 6470 on_overflow=on_overflow, 6471 ) 6472 6473 def _parse_convert( 6474 self, strict: bool, safe: t.Optional[bool] = None 6475 ) -> t.Optional[exp.Expression]: 6476 this = self._parse_bitwise() 6477 6478 if self._match(TokenType.USING): 6479 to: t.Optional[exp.Expression] = self.expression( 6480 exp.CharacterSet, this=self._parse_var() 6481 ) 6482 elif self._match(TokenType.COMMA): 6483 to = self._parse_types() 6484 else: 6485 to = None 6486 6487 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6488 6489 def _parse_xml_table(self) -> exp.XMLTable: 6490 namespaces = None 6491 passing = None 6492 columns = None 6493 6494 if self._match_text_seq("XMLNAMESPACES", "("): 6495 namespaces = self._parse_xml_namespace() 6496 self._match_text_seq(")", ",") 6497 6498 this = self._parse_string() 6499 6500 if self._match_text_seq("PASSING"): 6501 # The BY VALUE keywords are optional and are provided for semantic clarity 6502 self._match_text_seq("BY", "VALUE") 6503 passing = self._parse_csv(self._parse_column) 6504 6505 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6506 6507 if self._match_text_seq("COLUMNS"): 6508 columns = self._parse_csv(self._parse_field_def) 6509 6510 return self.expression( 6511 exp.XMLTable, 6512 this=this, 6513 namespaces=namespaces, 6514 passing=passing, 6515 columns=columns, 6516 by_ref=by_ref, 6517 ) 6518 6519 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6520 namespaces = [] 6521 6522 while True: 6523 if self._match(TokenType.DEFAULT): 6524 uri = self._parse_string() 6525 else: 6526 uri = self._parse_alias(self._parse_string()) 6527 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6528 if not self._match(TokenType.COMMA): 6529 break 6530 6531 return namespaces 6532 6533 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6534 """ 6535 There are generally two variants of the DECODE function: 6536 6537 - DECODE(bin, charset) 6538 - DECODE(expression, search, result [, search, result] ... [, default]) 6539 6540 The second variant will always be parsed into a CASE expression. Note that NULL 6541 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6542 instead of relying on pattern matching. 6543 """ 6544 args = self._parse_csv(self._parse_assignment) 6545 6546 if len(args) < 3: 6547 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6548 6549 expression, *expressions = args 6550 if not expression: 6551 return None 6552 6553 ifs = [] 6554 for search, result in zip(expressions[::2], expressions[1::2]): 6555 if not search or not result: 6556 return None 6557 6558 if isinstance(search, exp.Literal): 6559 ifs.append( 6560 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6561 ) 6562 elif isinstance(search, exp.Null): 6563 ifs.append( 6564 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6565 ) 6566 else: 6567 cond = exp.or_( 6568 exp.EQ(this=expression.copy(), expression=search), 6569 exp.and_( 6570 exp.Is(this=expression.copy(), expression=exp.Null()), 6571 exp.Is(this=search.copy(), expression=exp.Null()), 6572 copy=False, 6573 ), 6574 copy=False, 6575 ) 6576 ifs.append(exp.If(this=cond, true=result)) 6577 6578 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6579 6580 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6581 self._match_text_seq("KEY") 6582 key = self._parse_column() 6583 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6584 self._match_text_seq("VALUE") 6585 value = self._parse_bitwise() 6586 6587 if not key and not value: 6588 return None 6589 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6590 6591 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6592 if not this or not self._match_text_seq("FORMAT", "JSON"): 6593 return this 6594 6595 return self.expression(exp.FormatJson, this=this) 6596 6597 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6598 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6599 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6600 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6601 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6602 else: 6603 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6604 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6605 6606 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6607 6608 if not empty and not error and not null: 6609 return None 6610 6611 return self.expression( 6612 exp.OnCondition, 6613 empty=empty, 6614 error=error, 6615 null=null, 6616 ) 6617 6618 def _parse_on_handling( 6619 self, on: str, *values: str 6620 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6621 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6622 for value in values: 6623 if self._match_text_seq(value, "ON", on): 6624 return f"{value} ON {on}" 6625 6626 index = self._index 6627 if self._match(TokenType.DEFAULT): 6628 default_value = self._parse_bitwise() 6629 if self._match_text_seq("ON", on): 6630 return default_value 6631 6632 self._retreat(index) 6633 6634 return None 6635 6636 @t.overload 6637 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6638 6639 @t.overload 6640 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6641 6642 def _parse_json_object(self, agg=False): 6643 star = self._parse_star() 6644 expressions = ( 6645 [star] 6646 if star 6647 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6648 ) 6649 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6650 6651 unique_keys = None 6652 if self._match_text_seq("WITH", "UNIQUE"): 6653 unique_keys = True 6654 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6655 unique_keys = False 6656 6657 self._match_text_seq("KEYS") 6658 6659 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6660 self._parse_type() 6661 ) 6662 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6663 6664 return self.expression( 6665 exp.JSONObjectAgg if agg else exp.JSONObject, 6666 expressions=expressions, 6667 null_handling=null_handling, 6668 unique_keys=unique_keys, 6669 return_type=return_type, 6670 encoding=encoding, 6671 ) 6672 6673 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6674 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6675 if not self._match_text_seq("NESTED"): 6676 this = self._parse_id_var() 6677 kind = self._parse_types(allow_identifiers=False) 6678 nested = None 6679 else: 6680 this = None 6681 kind = None 6682 nested = True 6683 6684 path = self._match_text_seq("PATH") and self._parse_string() 6685 nested_schema = nested and self._parse_json_schema() 6686 6687 return self.expression( 6688 exp.JSONColumnDef, 6689 this=this, 6690 kind=kind, 6691 path=path, 6692 nested_schema=nested_schema, 6693 ) 6694 6695 def _parse_json_schema(self) -> exp.JSONSchema: 6696 self._match_text_seq("COLUMNS") 6697 return self.expression( 6698 exp.JSONSchema, 6699 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6700 ) 6701 6702 def _parse_json_table(self) -> exp.JSONTable: 6703 this = self._parse_format_json(self._parse_bitwise()) 6704 path = self._match(TokenType.COMMA) and self._parse_string() 6705 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6706 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6707 schema = self._parse_json_schema() 6708 6709 return exp.JSONTable( 6710 this=this, 6711 schema=schema, 6712 path=path, 6713 error_handling=error_handling, 6714 empty_handling=empty_handling, 6715 ) 6716 6717 def _parse_match_against(self) -> exp.MatchAgainst: 6718 expressions = self._parse_csv(self._parse_column) 6719 6720 self._match_text_seq(")", "AGAINST", "(") 6721 6722 this = self._parse_string() 6723 6724 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6725 modifier = "IN NATURAL LANGUAGE MODE" 6726 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6727 modifier = f"{modifier} WITH QUERY EXPANSION" 6728 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6729 modifier = "IN BOOLEAN MODE" 6730 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6731 modifier = "WITH QUERY EXPANSION" 6732 else: 6733 modifier = None 6734 6735 return self.expression( 6736 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6737 ) 6738 6739 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6740 def _parse_open_json(self) -> exp.OpenJSON: 6741 this = self._parse_bitwise() 6742 path = self._match(TokenType.COMMA) and self._parse_string() 6743 6744 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6745 this = self._parse_field(any_token=True) 6746 kind = self._parse_types() 6747 path = self._parse_string() 6748 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6749 6750 return self.expression( 6751 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6752 ) 6753 6754 expressions = None 6755 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6756 self._match_l_paren() 6757 expressions = self._parse_csv(_parse_open_json_column_def) 6758 6759 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6760 6761 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6762 args = self._parse_csv(self._parse_bitwise) 6763 6764 if self._match(TokenType.IN): 6765 return self.expression( 6766 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6767 ) 6768 6769 if haystack_first: 6770 haystack = seq_get(args, 0) 6771 needle = seq_get(args, 1) 6772 else: 6773 haystack = seq_get(args, 1) 6774 needle = seq_get(args, 0) 6775 6776 return self.expression( 6777 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6778 ) 6779 6780 def _parse_predict(self) -> exp.Predict: 6781 self._match_text_seq("MODEL") 6782 this = self._parse_table() 6783 6784 self._match(TokenType.COMMA) 6785 self._match_text_seq("TABLE") 6786 6787 return self.expression( 6788 exp.Predict, 6789 this=this, 6790 expression=self._parse_table(), 6791 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6792 ) 6793 6794 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6795 args = self._parse_csv(self._parse_table) 6796 return exp.JoinHint(this=func_name.upper(), expressions=args) 6797 6798 def _parse_substring(self) -> exp.Substring: 6799 # Postgres supports the form: substring(string [from int] [for int]) 6800 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6801 6802 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6803 6804 if self._match(TokenType.FROM): 6805 args.append(self._parse_bitwise()) 6806 if self._match(TokenType.FOR): 6807 if len(args) == 1: 6808 args.append(exp.Literal.number(1)) 6809 args.append(self._parse_bitwise()) 6810 6811 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6812 6813 def _parse_trim(self) -> exp.Trim: 6814 # https://www.w3resource.com/sql/character-functions/trim.php 6815 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6816 6817 position = None 6818 collation = None 6819 expression = None 6820 6821 if self._match_texts(self.TRIM_TYPES): 6822 position = self._prev.text.upper() 6823 6824 this = self._parse_bitwise() 6825 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6826 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6827 expression = self._parse_bitwise() 6828 6829 if invert_order: 6830 this, expression = expression, this 6831 6832 if self._match(TokenType.COLLATE): 6833 collation = self._parse_bitwise() 6834 6835 return self.expression( 6836 exp.Trim, this=this, position=position, expression=expression, collation=collation 6837 ) 6838 6839 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6840 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6841 6842 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6843 return self._parse_window(self._parse_id_var(), alias=True) 6844 6845 def _parse_respect_or_ignore_nulls( 6846 self, this: t.Optional[exp.Expression] 6847 ) -> t.Optional[exp.Expression]: 6848 if self._match_text_seq("IGNORE", "NULLS"): 6849 return self.expression(exp.IgnoreNulls, this=this) 6850 if self._match_text_seq("RESPECT", "NULLS"): 6851 return self.expression(exp.RespectNulls, this=this) 6852 return this 6853 6854 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6855 if self._match(TokenType.HAVING): 6856 self._match_texts(("MAX", "MIN")) 6857 max = self._prev.text.upper() != "MIN" 6858 return self.expression( 6859 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6860 ) 6861 6862 return this 6863 6864 def _parse_window( 6865 self, this: t.Optional[exp.Expression], alias: bool = False 6866 ) -> t.Optional[exp.Expression]: 6867 func = this 6868 comments = func.comments if isinstance(func, exp.Expression) else None 6869 6870 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6871 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6872 if self._match_text_seq("WITHIN", "GROUP"): 6873 order = self._parse_wrapped(self._parse_order) 6874 this = self.expression(exp.WithinGroup, this=this, expression=order) 6875 6876 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6877 self._match(TokenType.WHERE) 6878 this = self.expression( 6879 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6880 ) 6881 self._match_r_paren() 6882 6883 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6884 # Some dialects choose to implement and some do not. 6885 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6886 6887 # There is some code above in _parse_lambda that handles 6888 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6889 6890 # The below changes handle 6891 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6892 6893 # Oracle allows both formats 6894 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6895 # and Snowflake chose to do the same for familiarity 6896 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6897 if isinstance(this, exp.AggFunc): 6898 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6899 6900 if ignore_respect and ignore_respect is not this: 6901 ignore_respect.replace(ignore_respect.this) 6902 this = self.expression(ignore_respect.__class__, this=this) 6903 6904 this = self._parse_respect_or_ignore_nulls(this) 6905 6906 # bigquery select from window x AS (partition by ...) 6907 if alias: 6908 over = None 6909 self._match(TokenType.ALIAS) 6910 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6911 return this 6912 else: 6913 over = self._prev.text.upper() 6914 6915 if comments and isinstance(func, exp.Expression): 6916 func.pop_comments() 6917 6918 if not self._match(TokenType.L_PAREN): 6919 return self.expression( 6920 exp.Window, 6921 comments=comments, 6922 this=this, 6923 alias=self._parse_id_var(False), 6924 over=over, 6925 ) 6926 6927 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6928 6929 first = self._match(TokenType.FIRST) 6930 if self._match_text_seq("LAST"): 6931 first = False 6932 6933 partition, order = self._parse_partition_and_order() 6934 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6935 6936 if kind: 6937 self._match(TokenType.BETWEEN) 6938 start = self._parse_window_spec() 6939 self._match(TokenType.AND) 6940 end = self._parse_window_spec() 6941 exclude = ( 6942 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6943 if self._match_text_seq("EXCLUDE") 6944 else None 6945 ) 6946 6947 spec = self.expression( 6948 exp.WindowSpec, 6949 kind=kind, 6950 start=start["value"], 6951 start_side=start["side"], 6952 end=end["value"], 6953 end_side=end["side"], 6954 exclude=exclude, 6955 ) 6956 else: 6957 spec = None 6958 6959 self._match_r_paren() 6960 6961 window = self.expression( 6962 exp.Window, 6963 comments=comments, 6964 this=this, 6965 partition_by=partition, 6966 order=order, 6967 spec=spec, 6968 alias=window_alias, 6969 over=over, 6970 first=first, 6971 ) 6972 6973 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6974 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6975 return self._parse_window(window, alias=alias) 6976 6977 return window 6978 6979 def _parse_partition_and_order( 6980 self, 6981 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6982 return self._parse_partition_by(), self._parse_order() 6983 6984 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6985 self._match(TokenType.BETWEEN) 6986 6987 return { 6988 "value": ( 6989 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6990 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6991 or self._parse_bitwise() 6992 ), 6993 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6994 } 6995 6996 def _parse_alias( 6997 self, this: t.Optional[exp.Expression], explicit: bool = False 6998 ) -> t.Optional[exp.Expression]: 6999 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7000 # so this section tries to parse the clause version and if it fails, it treats the token 7001 # as an identifier (alias) 7002 if self._can_parse_limit_or_offset(): 7003 return this 7004 7005 any_token = self._match(TokenType.ALIAS) 7006 comments = self._prev_comments or [] 7007 7008 if explicit and not any_token: 7009 return this 7010 7011 if self._match(TokenType.L_PAREN): 7012 aliases = self.expression( 7013 exp.Aliases, 7014 comments=comments, 7015 this=this, 7016 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7017 ) 7018 self._match_r_paren(aliases) 7019 return aliases 7020 7021 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7022 self.STRING_ALIASES and self._parse_string_as_identifier() 7023 ) 7024 7025 if alias: 7026 comments.extend(alias.pop_comments()) 7027 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7028 column = this.this 7029 7030 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7031 if not this.comments and column and column.comments: 7032 this.comments = column.pop_comments() 7033 7034 return this 7035 7036 def _parse_id_var( 7037 self, 7038 any_token: bool = True, 7039 tokens: t.Optional[t.Collection[TokenType]] = None, 7040 ) -> t.Optional[exp.Expression]: 7041 expression = self._parse_identifier() 7042 if not expression and ( 7043 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7044 ): 7045 quoted = self._prev.token_type == TokenType.STRING 7046 expression = self._identifier_expression(quoted=quoted) 7047 7048 return expression 7049 7050 def _parse_string(self) -> t.Optional[exp.Expression]: 7051 if self._match_set(self.STRING_PARSERS): 7052 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7053 return self._parse_placeholder() 7054 7055 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7056 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7057 if output: 7058 output.update_positions(self._prev) 7059 return output 7060 7061 def _parse_number(self) -> t.Optional[exp.Expression]: 7062 if self._match_set(self.NUMERIC_PARSERS): 7063 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7064 return self._parse_placeholder() 7065 7066 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7067 if self._match(TokenType.IDENTIFIER): 7068 return self._identifier_expression(quoted=True) 7069 return self._parse_placeholder() 7070 7071 def _parse_var( 7072 self, 7073 any_token: bool = False, 7074 tokens: t.Optional[t.Collection[TokenType]] = None, 7075 upper: bool = False, 7076 ) -> t.Optional[exp.Expression]: 7077 if ( 7078 (any_token and self._advance_any()) 7079 or self._match(TokenType.VAR) 7080 or (self._match_set(tokens) if tokens else False) 7081 ): 7082 return self.expression( 7083 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7084 ) 7085 return self._parse_placeholder() 7086 7087 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7088 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7089 self._advance() 7090 return self._prev 7091 return None 7092 7093 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7094 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7095 7096 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7097 return self._parse_primary() or self._parse_var(any_token=True) 7098 7099 def _parse_null(self) -> t.Optional[exp.Expression]: 7100 if self._match_set(self.NULL_TOKENS): 7101 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7102 return self._parse_placeholder() 7103 7104 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7105 if self._match(TokenType.TRUE): 7106 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7107 if self._match(TokenType.FALSE): 7108 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7109 return self._parse_placeholder() 7110 7111 def _parse_star(self) -> t.Optional[exp.Expression]: 7112 if self._match(TokenType.STAR): 7113 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7114 return self._parse_placeholder() 7115 7116 def _parse_parameter(self) -> exp.Parameter: 7117 this = self._parse_identifier() or self._parse_primary_or_var() 7118 return self.expression(exp.Parameter, this=this) 7119 7120 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7121 if self._match_set(self.PLACEHOLDER_PARSERS): 7122 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7123 if placeholder: 7124 return placeholder 7125 self._advance(-1) 7126 return None 7127 7128 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7129 if not self._match_texts(keywords): 7130 return None 7131 if self._match(TokenType.L_PAREN, advance=False): 7132 return self._parse_wrapped_csv(self._parse_expression) 7133 7134 expression = self._parse_expression() 7135 return [expression] if expression else None 7136 7137 def _parse_csv( 7138 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7139 ) -> t.List[exp.Expression]: 7140 parse_result = parse_method() 7141 items = [parse_result] if parse_result is not None else [] 7142 7143 while self._match(sep): 7144 self._add_comments(parse_result) 7145 parse_result = parse_method() 7146 if parse_result is not None: 7147 items.append(parse_result) 7148 7149 return items 7150 7151 def _parse_tokens( 7152 self, parse_method: t.Callable, expressions: t.Dict 7153 ) -> t.Optional[exp.Expression]: 7154 this = parse_method() 7155 7156 while self._match_set(expressions): 7157 this = self.expression( 7158 expressions[self._prev.token_type], 7159 this=this, 7160 comments=self._prev_comments, 7161 expression=parse_method(), 7162 ) 7163 7164 return this 7165 7166 def _parse_pipe_syntax_query(self, query: exp.Select) -> exp.Query: 7167 while self._match(TokenType.PIPE_GT): 7168 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 7169 if not parser: 7170 self.raise_error(f"Unsupported pipe syntax operator: '{self._curr.text.upper()}'.") 7171 else: 7172 query = parser(self, query) 7173 7174 return query 7175 7176 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7177 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7178 7179 def _parse_wrapped_csv( 7180 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7181 ) -> t.List[exp.Expression]: 7182 return self._parse_wrapped( 7183 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7184 ) 7185 7186 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7187 wrapped = self._match(TokenType.L_PAREN) 7188 if not wrapped and not optional: 7189 self.raise_error("Expecting (") 7190 parse_result = parse_method() 7191 if wrapped: 7192 self._match_r_paren() 7193 return parse_result 7194 7195 def _parse_expressions(self) -> t.List[exp.Expression]: 7196 return self._parse_csv(self._parse_expression) 7197 7198 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7199 return self._parse_select() or self._parse_set_operations( 7200 self._parse_alias(self._parse_assignment(), explicit=True) 7201 if alias 7202 else self._parse_assignment() 7203 ) 7204 7205 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7206 return self._parse_query_modifiers( 7207 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7208 ) 7209 7210 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7211 this = None 7212 if self._match_texts(self.TRANSACTION_KIND): 7213 this = self._prev.text 7214 7215 self._match_texts(("TRANSACTION", "WORK")) 7216 7217 modes = [] 7218 while True: 7219 mode = [] 7220 while self._match(TokenType.VAR): 7221 mode.append(self._prev.text) 7222 7223 if mode: 7224 modes.append(" ".join(mode)) 7225 if not self._match(TokenType.COMMA): 7226 break 7227 7228 return self.expression(exp.Transaction, this=this, modes=modes) 7229 7230 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7231 chain = None 7232 savepoint = None 7233 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7234 7235 self._match_texts(("TRANSACTION", "WORK")) 7236 7237 if self._match_text_seq("TO"): 7238 self._match_text_seq("SAVEPOINT") 7239 savepoint = self._parse_id_var() 7240 7241 if self._match(TokenType.AND): 7242 chain = not self._match_text_seq("NO") 7243 self._match_text_seq("CHAIN") 7244 7245 if is_rollback: 7246 return self.expression(exp.Rollback, savepoint=savepoint) 7247 7248 return self.expression(exp.Commit, chain=chain) 7249 7250 def _parse_refresh(self) -> exp.Refresh: 7251 self._match(TokenType.TABLE) 7252 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7253 7254 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7255 if not self._prev.text.upper() == "ADD": 7256 return None 7257 7258 self._match(TokenType.COLUMN) 7259 exists_column = self._parse_exists(not_=True) 7260 expression = self._parse_field_def() 7261 7262 if expression: 7263 expression.set("exists", exists_column) 7264 7265 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7266 if self._match_texts(("FIRST", "AFTER")): 7267 position = self._prev.text 7268 column_position = self.expression( 7269 exp.ColumnPosition, this=self._parse_column(), position=position 7270 ) 7271 expression.set("position", column_position) 7272 7273 return expression 7274 7275 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7276 drop = self._match(TokenType.DROP) and self._parse_drop() 7277 if drop and not isinstance(drop, exp.Command): 7278 drop.set("kind", drop.args.get("kind", "COLUMN")) 7279 return drop 7280 7281 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7282 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7283 return self.expression( 7284 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7285 ) 7286 7287 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7288 def _parse_add_column_or_constraint(): 7289 self._match_text_seq("ADD") 7290 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7291 return self.expression( 7292 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7293 ) 7294 return self._parse_add_column() 7295 7296 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7297 "COLUMNS" 7298 ): 7299 schema = self._parse_schema() 7300 7301 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7302 7303 return self._parse_csv(_parse_add_column_or_constraint) 7304 7305 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7306 if self._match_texts(self.ALTER_ALTER_PARSERS): 7307 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7308 7309 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7310 # keyword after ALTER we default to parsing this statement 7311 self._match(TokenType.COLUMN) 7312 column = self._parse_field(any_token=True) 7313 7314 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7315 return self.expression(exp.AlterColumn, this=column, drop=True) 7316 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7317 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7318 if self._match(TokenType.COMMENT): 7319 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7320 if self._match_text_seq("DROP", "NOT", "NULL"): 7321 return self.expression( 7322 exp.AlterColumn, 7323 this=column, 7324 drop=True, 7325 allow_null=True, 7326 ) 7327 if self._match_text_seq("SET", "NOT", "NULL"): 7328 return self.expression( 7329 exp.AlterColumn, 7330 this=column, 7331 allow_null=False, 7332 ) 7333 7334 if self._match_text_seq("SET", "VISIBLE"): 7335 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7336 if self._match_text_seq("SET", "INVISIBLE"): 7337 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7338 7339 self._match_text_seq("SET", "DATA") 7340 self._match_text_seq("TYPE") 7341 return self.expression( 7342 exp.AlterColumn, 7343 this=column, 7344 dtype=self._parse_types(), 7345 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7346 using=self._match(TokenType.USING) and self._parse_assignment(), 7347 ) 7348 7349 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7350 if self._match_texts(("ALL", "EVEN", "AUTO")): 7351 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7352 7353 self._match_text_seq("KEY", "DISTKEY") 7354 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7355 7356 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7357 if compound: 7358 self._match_text_seq("SORTKEY") 7359 7360 if self._match(TokenType.L_PAREN, advance=False): 7361 return self.expression( 7362 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7363 ) 7364 7365 self._match_texts(("AUTO", "NONE")) 7366 return self.expression( 7367 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7368 ) 7369 7370 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7371 index = self._index - 1 7372 7373 partition_exists = self._parse_exists() 7374 if self._match(TokenType.PARTITION, advance=False): 7375 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7376 7377 self._retreat(index) 7378 return self._parse_csv(self._parse_drop_column) 7379 7380 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7381 if self._match(TokenType.COLUMN): 7382 exists = self._parse_exists() 7383 old_column = self._parse_column() 7384 to = self._match_text_seq("TO") 7385 new_column = self._parse_column() 7386 7387 if old_column is None or to is None or new_column is None: 7388 return None 7389 7390 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7391 7392 self._match_text_seq("TO") 7393 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7394 7395 def _parse_alter_table_set(self) -> exp.AlterSet: 7396 alter_set = self.expression(exp.AlterSet) 7397 7398 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7399 "TABLE", "PROPERTIES" 7400 ): 7401 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7402 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7403 alter_set.set("expressions", [self._parse_assignment()]) 7404 elif self._match_texts(("LOGGED", "UNLOGGED")): 7405 alter_set.set("option", exp.var(self._prev.text.upper())) 7406 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7407 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7408 elif self._match_text_seq("LOCATION"): 7409 alter_set.set("location", self._parse_field()) 7410 elif self._match_text_seq("ACCESS", "METHOD"): 7411 alter_set.set("access_method", self._parse_field()) 7412 elif self._match_text_seq("TABLESPACE"): 7413 alter_set.set("tablespace", self._parse_field()) 7414 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7415 alter_set.set("file_format", [self._parse_field()]) 7416 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7417 alter_set.set("file_format", self._parse_wrapped_options()) 7418 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7419 alter_set.set("copy_options", self._parse_wrapped_options()) 7420 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7421 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7422 else: 7423 if self._match_text_seq("SERDE"): 7424 alter_set.set("serde", self._parse_field()) 7425 7426 properties = self._parse_wrapped(self._parse_properties, optional=True) 7427 alter_set.set("expressions", [properties]) 7428 7429 return alter_set 7430 7431 def _parse_alter(self) -> exp.Alter | exp.Command: 7432 start = self._prev 7433 7434 alter_token = self._match_set(self.ALTERABLES) and self._prev 7435 if not alter_token: 7436 return self._parse_as_command(start) 7437 7438 exists = self._parse_exists() 7439 only = self._match_text_seq("ONLY") 7440 this = self._parse_table(schema=True) 7441 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7442 7443 if self._next: 7444 self._advance() 7445 7446 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7447 if parser: 7448 actions = ensure_list(parser(self)) 7449 not_valid = self._match_text_seq("NOT", "VALID") 7450 options = self._parse_csv(self._parse_property) 7451 7452 if not self._curr and actions: 7453 return self.expression( 7454 exp.Alter, 7455 this=this, 7456 kind=alter_token.text.upper(), 7457 exists=exists, 7458 actions=actions, 7459 only=only, 7460 options=options, 7461 cluster=cluster, 7462 not_valid=not_valid, 7463 ) 7464 7465 return self._parse_as_command(start) 7466 7467 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7468 start = self._prev 7469 # https://duckdb.org/docs/sql/statements/analyze 7470 if not self._curr: 7471 return self.expression(exp.Analyze) 7472 7473 options = [] 7474 while self._match_texts(self.ANALYZE_STYLES): 7475 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7476 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7477 else: 7478 options.append(self._prev.text.upper()) 7479 7480 this: t.Optional[exp.Expression] = None 7481 inner_expression: t.Optional[exp.Expression] = None 7482 7483 kind = self._curr and self._curr.text.upper() 7484 7485 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7486 this = self._parse_table_parts() 7487 elif self._match_text_seq("TABLES"): 7488 if self._match_set((TokenType.FROM, TokenType.IN)): 7489 kind = f"{kind} {self._prev.text.upper()}" 7490 this = self._parse_table(schema=True, is_db_reference=True) 7491 elif self._match_text_seq("DATABASE"): 7492 this = self._parse_table(schema=True, is_db_reference=True) 7493 elif self._match_text_seq("CLUSTER"): 7494 this = self._parse_table() 7495 # Try matching inner expr keywords before fallback to parse table. 7496 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7497 kind = None 7498 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7499 else: 7500 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7501 kind = None 7502 this = self._parse_table_parts() 7503 7504 partition = self._try_parse(self._parse_partition) 7505 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7506 return self._parse_as_command(start) 7507 7508 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7509 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7510 "WITH", "ASYNC", "MODE" 7511 ): 7512 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7513 else: 7514 mode = None 7515 7516 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7517 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7518 7519 properties = self._parse_properties() 7520 return self.expression( 7521 exp.Analyze, 7522 kind=kind, 7523 this=this, 7524 mode=mode, 7525 partition=partition, 7526 properties=properties, 7527 expression=inner_expression, 7528 options=options, 7529 ) 7530 7531 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7532 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7533 this = None 7534 kind = self._prev.text.upper() 7535 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7536 expressions = [] 7537 7538 if not self._match_text_seq("STATISTICS"): 7539 self.raise_error("Expecting token STATISTICS") 7540 7541 if self._match_text_seq("NOSCAN"): 7542 this = "NOSCAN" 7543 elif self._match(TokenType.FOR): 7544 if self._match_text_seq("ALL", "COLUMNS"): 7545 this = "FOR ALL COLUMNS" 7546 if self._match_texts("COLUMNS"): 7547 this = "FOR COLUMNS" 7548 expressions = self._parse_csv(self._parse_column_reference) 7549 elif self._match_text_seq("SAMPLE"): 7550 sample = self._parse_number() 7551 expressions = [ 7552 self.expression( 7553 exp.AnalyzeSample, 7554 sample=sample, 7555 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7556 ) 7557 ] 7558 7559 return self.expression( 7560 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7561 ) 7562 7563 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7564 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7565 kind = None 7566 this = None 7567 expression: t.Optional[exp.Expression] = None 7568 if self._match_text_seq("REF", "UPDATE"): 7569 kind = "REF" 7570 this = "UPDATE" 7571 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7572 this = "UPDATE SET DANGLING TO NULL" 7573 elif self._match_text_seq("STRUCTURE"): 7574 kind = "STRUCTURE" 7575 if self._match_text_seq("CASCADE", "FAST"): 7576 this = "CASCADE FAST" 7577 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7578 ("ONLINE", "OFFLINE") 7579 ): 7580 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7581 expression = self._parse_into() 7582 7583 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7584 7585 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7586 this = self._prev.text.upper() 7587 if self._match_text_seq("COLUMNS"): 7588 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7589 return None 7590 7591 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7592 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7593 if self._match_text_seq("STATISTICS"): 7594 return self.expression(exp.AnalyzeDelete, kind=kind) 7595 return None 7596 7597 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7598 if self._match_text_seq("CHAINED", "ROWS"): 7599 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7600 return None 7601 7602 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7603 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7604 this = self._prev.text.upper() 7605 expression: t.Optional[exp.Expression] = None 7606 expressions = [] 7607 update_options = None 7608 7609 if self._match_text_seq("HISTOGRAM", "ON"): 7610 expressions = self._parse_csv(self._parse_column_reference) 7611 with_expressions = [] 7612 while self._match(TokenType.WITH): 7613 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7614 if self._match_texts(("SYNC", "ASYNC")): 7615 if self._match_text_seq("MODE", advance=False): 7616 with_expressions.append(f"{self._prev.text.upper()} MODE") 7617 self._advance() 7618 else: 7619 buckets = self._parse_number() 7620 if self._match_text_seq("BUCKETS"): 7621 with_expressions.append(f"{buckets} BUCKETS") 7622 if with_expressions: 7623 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7624 7625 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7626 TokenType.UPDATE, advance=False 7627 ): 7628 update_options = self._prev.text.upper() 7629 self._advance() 7630 elif self._match_text_seq("USING", "DATA"): 7631 expression = self.expression(exp.UsingData, this=self._parse_string()) 7632 7633 return self.expression( 7634 exp.AnalyzeHistogram, 7635 this=this, 7636 expressions=expressions, 7637 expression=expression, 7638 update_options=update_options, 7639 ) 7640 7641 def _parse_merge(self) -> exp.Merge: 7642 self._match(TokenType.INTO) 7643 target = self._parse_table() 7644 7645 if target and self._match(TokenType.ALIAS, advance=False): 7646 target.set("alias", self._parse_table_alias()) 7647 7648 self._match(TokenType.USING) 7649 using = self._parse_table() 7650 7651 self._match(TokenType.ON) 7652 on = self._parse_assignment() 7653 7654 return self.expression( 7655 exp.Merge, 7656 this=target, 7657 using=using, 7658 on=on, 7659 whens=self._parse_when_matched(), 7660 returning=self._parse_returning(), 7661 ) 7662 7663 def _parse_when_matched(self) -> exp.Whens: 7664 whens = [] 7665 7666 while self._match(TokenType.WHEN): 7667 matched = not self._match(TokenType.NOT) 7668 self._match_text_seq("MATCHED") 7669 source = ( 7670 False 7671 if self._match_text_seq("BY", "TARGET") 7672 else self._match_text_seq("BY", "SOURCE") 7673 ) 7674 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7675 7676 self._match(TokenType.THEN) 7677 7678 if self._match(TokenType.INSERT): 7679 this = self._parse_star() 7680 if this: 7681 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7682 else: 7683 then = self.expression( 7684 exp.Insert, 7685 this=exp.var("ROW") 7686 if self._match_text_seq("ROW") 7687 else self._parse_value(values=False), 7688 expression=self._match_text_seq("VALUES") and self._parse_value(), 7689 ) 7690 elif self._match(TokenType.UPDATE): 7691 expressions = self._parse_star() 7692 if expressions: 7693 then = self.expression(exp.Update, expressions=expressions) 7694 else: 7695 then = self.expression( 7696 exp.Update, 7697 expressions=self._match(TokenType.SET) 7698 and self._parse_csv(self._parse_equality), 7699 ) 7700 elif self._match(TokenType.DELETE): 7701 then = self.expression(exp.Var, this=self._prev.text) 7702 else: 7703 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7704 7705 whens.append( 7706 self.expression( 7707 exp.When, 7708 matched=matched, 7709 source=source, 7710 condition=condition, 7711 then=then, 7712 ) 7713 ) 7714 return self.expression(exp.Whens, expressions=whens) 7715 7716 def _parse_show(self) -> t.Optional[exp.Expression]: 7717 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7718 if parser: 7719 return parser(self) 7720 return self._parse_as_command(self._prev) 7721 7722 def _parse_set_item_assignment( 7723 self, kind: t.Optional[str] = None 7724 ) -> t.Optional[exp.Expression]: 7725 index = self._index 7726 7727 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7728 return self._parse_set_transaction(global_=kind == "GLOBAL") 7729 7730 left = self._parse_primary() or self._parse_column() 7731 assignment_delimiter = self._match_texts(("=", "TO")) 7732 7733 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7734 self._retreat(index) 7735 return None 7736 7737 right = self._parse_statement() or self._parse_id_var() 7738 if isinstance(right, (exp.Column, exp.Identifier)): 7739 right = exp.var(right.name) 7740 7741 this = self.expression(exp.EQ, this=left, expression=right) 7742 return self.expression(exp.SetItem, this=this, kind=kind) 7743 7744 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7745 self._match_text_seq("TRANSACTION") 7746 characteristics = self._parse_csv( 7747 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7748 ) 7749 return self.expression( 7750 exp.SetItem, 7751 expressions=characteristics, 7752 kind="TRANSACTION", 7753 **{"global": global_}, # type: ignore 7754 ) 7755 7756 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7757 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7758 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7759 7760 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7761 index = self._index 7762 set_ = self.expression( 7763 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7764 ) 7765 7766 if self._curr: 7767 self._retreat(index) 7768 return self._parse_as_command(self._prev) 7769 7770 return set_ 7771 7772 def _parse_var_from_options( 7773 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7774 ) -> t.Optional[exp.Var]: 7775 start = self._curr 7776 if not start: 7777 return None 7778 7779 option = start.text.upper() 7780 continuations = options.get(option) 7781 7782 index = self._index 7783 self._advance() 7784 for keywords in continuations or []: 7785 if isinstance(keywords, str): 7786 keywords = (keywords,) 7787 7788 if self._match_text_seq(*keywords): 7789 option = f"{option} {' '.join(keywords)}" 7790 break 7791 else: 7792 if continuations or continuations is None: 7793 if raise_unmatched: 7794 self.raise_error(f"Unknown option {option}") 7795 7796 self._retreat(index) 7797 return None 7798 7799 return exp.var(option) 7800 7801 def _parse_as_command(self, start: Token) -> exp.Command: 7802 while self._curr: 7803 self._advance() 7804 text = self._find_sql(start, self._prev) 7805 size = len(start.text) 7806 self._warn_unsupported() 7807 return exp.Command(this=text[:size], expression=text[size:]) 7808 7809 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7810 settings = [] 7811 7812 self._match_l_paren() 7813 kind = self._parse_id_var() 7814 7815 if self._match(TokenType.L_PAREN): 7816 while True: 7817 key = self._parse_id_var() 7818 value = self._parse_primary() 7819 if not key and value is None: 7820 break 7821 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7822 self._match(TokenType.R_PAREN) 7823 7824 self._match_r_paren() 7825 7826 return self.expression( 7827 exp.DictProperty, 7828 this=this, 7829 kind=kind.this if kind else None, 7830 settings=settings, 7831 ) 7832 7833 def _parse_dict_range(self, this: str) -> exp.DictRange: 7834 self._match_l_paren() 7835 has_min = self._match_text_seq("MIN") 7836 if has_min: 7837 min = self._parse_var() or self._parse_primary() 7838 self._match_text_seq("MAX") 7839 max = self._parse_var() or self._parse_primary() 7840 else: 7841 max = self._parse_var() or self._parse_primary() 7842 min = exp.Literal.number(0) 7843 self._match_r_paren() 7844 return self.expression(exp.DictRange, this=this, min=min, max=max) 7845 7846 def _parse_comprehension( 7847 self, this: t.Optional[exp.Expression] 7848 ) -> t.Optional[exp.Comprehension]: 7849 index = self._index 7850 expression = self._parse_column() 7851 if not self._match(TokenType.IN): 7852 self._retreat(index - 1) 7853 return None 7854 iterator = self._parse_column() 7855 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7856 return self.expression( 7857 exp.Comprehension, 7858 this=this, 7859 expression=expression, 7860 iterator=iterator, 7861 condition=condition, 7862 ) 7863 7864 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7865 if self._match(TokenType.HEREDOC_STRING): 7866 return self.expression(exp.Heredoc, this=self._prev.text) 7867 7868 if not self._match_text_seq("$"): 7869 return None 7870 7871 tags = ["$"] 7872 tag_text = None 7873 7874 if self._is_connected(): 7875 self._advance() 7876 tags.append(self._prev.text.upper()) 7877 else: 7878 self.raise_error("No closing $ found") 7879 7880 if tags[-1] != "$": 7881 if self._is_connected() and self._match_text_seq("$"): 7882 tag_text = tags[-1] 7883 tags.append("$") 7884 else: 7885 self.raise_error("No closing $ found") 7886 7887 heredoc_start = self._curr 7888 7889 while self._curr: 7890 if self._match_text_seq(*tags, advance=False): 7891 this = self._find_sql(heredoc_start, self._prev) 7892 self._advance(len(tags)) 7893 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7894 7895 self._advance() 7896 7897 self.raise_error(f"No closing {''.join(tags)} found") 7898 return None 7899 7900 def _find_parser( 7901 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7902 ) -> t.Optional[t.Callable]: 7903 if not self._curr: 7904 return None 7905 7906 index = self._index 7907 this = [] 7908 while True: 7909 # The current token might be multiple words 7910 curr = self._curr.text.upper() 7911 key = curr.split(" ") 7912 this.append(curr) 7913 7914 self._advance() 7915 result, trie = in_trie(trie, key) 7916 if result == TrieResult.FAILED: 7917 break 7918 7919 if result == TrieResult.EXISTS: 7920 subparser = parsers[" ".join(this)] 7921 return subparser 7922 7923 self._retreat(index) 7924 return None 7925 7926 def _match(self, token_type, advance=True, expression=None): 7927 if not self._curr: 7928 return None 7929 7930 if self._curr.token_type == token_type: 7931 if advance: 7932 self._advance() 7933 self._add_comments(expression) 7934 return True 7935 7936 return None 7937 7938 def _match_set(self, types, advance=True): 7939 if not self._curr: 7940 return None 7941 7942 if self._curr.token_type in types: 7943 if advance: 7944 self._advance() 7945 return True 7946 7947 return None 7948 7949 def _match_pair(self, token_type_a, token_type_b, advance=True): 7950 if not self._curr or not self._next: 7951 return None 7952 7953 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7954 if advance: 7955 self._advance(2) 7956 return True 7957 7958 return None 7959 7960 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7961 if not self._match(TokenType.L_PAREN, expression=expression): 7962 self.raise_error("Expecting (") 7963 7964 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7965 if not self._match(TokenType.R_PAREN, expression=expression): 7966 self.raise_error("Expecting )") 7967 7968 def _match_texts(self, texts, advance=True): 7969 if ( 7970 self._curr 7971 and self._curr.token_type != TokenType.STRING 7972 and self._curr.text.upper() in texts 7973 ): 7974 if advance: 7975 self._advance() 7976 return True 7977 return None 7978 7979 def _match_text_seq(self, *texts, advance=True): 7980 index = self._index 7981 for text in texts: 7982 if ( 7983 self._curr 7984 and self._curr.token_type != TokenType.STRING 7985 and self._curr.text.upper() == text 7986 ): 7987 self._advance() 7988 else: 7989 self._retreat(index) 7990 return None 7991 7992 if not advance: 7993 self._retreat(index) 7994 7995 return True 7996 7997 def _replace_lambda( 7998 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7999 ) -> t.Optional[exp.Expression]: 8000 if not node: 8001 return node 8002 8003 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8004 8005 for column in node.find_all(exp.Column): 8006 typ = lambda_types.get(column.parts[0].name) 8007 if typ is not None: 8008 dot_or_id = column.to_dot() if column.table else column.this 8009 8010 if typ: 8011 dot_or_id = self.expression( 8012 exp.Cast, 8013 this=dot_or_id, 8014 to=typ, 8015 ) 8016 8017 parent = column.parent 8018 8019 while isinstance(parent, exp.Dot): 8020 if not isinstance(parent.parent, exp.Dot): 8021 parent.replace(dot_or_id) 8022 break 8023 parent = parent.parent 8024 else: 8025 if column is node: 8026 node = dot_or_id 8027 else: 8028 column.replace(dot_or_id) 8029 return node 8030 8031 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8032 start = self._prev 8033 8034 # Not to be confused with TRUNCATE(number, decimals) function call 8035 if self._match(TokenType.L_PAREN): 8036 self._retreat(self._index - 2) 8037 return self._parse_function() 8038 8039 # Clickhouse supports TRUNCATE DATABASE as well 8040 is_database = self._match(TokenType.DATABASE) 8041 8042 self._match(TokenType.TABLE) 8043 8044 exists = self._parse_exists(not_=False) 8045 8046 expressions = self._parse_csv( 8047 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8048 ) 8049 8050 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8051 8052 if self._match_text_seq("RESTART", "IDENTITY"): 8053 identity = "RESTART" 8054 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8055 identity = "CONTINUE" 8056 else: 8057 identity = None 8058 8059 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8060 option = self._prev.text 8061 else: 8062 option = None 8063 8064 partition = self._parse_partition() 8065 8066 # Fallback case 8067 if self._curr: 8068 return self._parse_as_command(start) 8069 8070 return self.expression( 8071 exp.TruncateTable, 8072 expressions=expressions, 8073 is_database=is_database, 8074 exists=exists, 8075 cluster=cluster, 8076 identity=identity, 8077 option=option, 8078 partition=partition, 8079 ) 8080 8081 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8082 this = self._parse_ordered(self._parse_opclass) 8083 8084 if not self._match(TokenType.WITH): 8085 return this 8086 8087 op = self._parse_var(any_token=True) 8088 8089 return self.expression(exp.WithOperator, this=this, op=op) 8090 8091 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8092 self._match(TokenType.EQ) 8093 self._match(TokenType.L_PAREN) 8094 8095 opts: t.List[t.Optional[exp.Expression]] = [] 8096 option: exp.Expression | None 8097 while self._curr and not self._match(TokenType.R_PAREN): 8098 if self._match_text_seq("FORMAT_NAME", "="): 8099 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8100 option = self._parse_format_name() 8101 else: 8102 option = self._parse_property() 8103 8104 if option is None: 8105 self.raise_error("Unable to parse option") 8106 break 8107 8108 opts.append(option) 8109 8110 return opts 8111 8112 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8113 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8114 8115 options = [] 8116 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8117 option = self._parse_var(any_token=True) 8118 prev = self._prev.text.upper() 8119 8120 # Different dialects might separate options and values by white space, "=" and "AS" 8121 self._match(TokenType.EQ) 8122 self._match(TokenType.ALIAS) 8123 8124 param = self.expression(exp.CopyParameter, this=option) 8125 8126 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8127 TokenType.L_PAREN, advance=False 8128 ): 8129 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8130 param.set("expressions", self._parse_wrapped_options()) 8131 elif prev == "FILE_FORMAT": 8132 # T-SQL's external file format case 8133 param.set("expression", self._parse_field()) 8134 else: 8135 param.set("expression", self._parse_unquoted_field()) 8136 8137 options.append(param) 8138 self._match(sep) 8139 8140 return options 8141 8142 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8143 expr = self.expression(exp.Credentials) 8144 8145 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8146 expr.set("storage", self._parse_field()) 8147 if self._match_text_seq("CREDENTIALS"): 8148 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8149 creds = ( 8150 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8151 ) 8152 expr.set("credentials", creds) 8153 if self._match_text_seq("ENCRYPTION"): 8154 expr.set("encryption", self._parse_wrapped_options()) 8155 if self._match_text_seq("IAM_ROLE"): 8156 expr.set("iam_role", self._parse_field()) 8157 if self._match_text_seq("REGION"): 8158 expr.set("region", self._parse_field()) 8159 8160 return expr 8161 8162 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8163 return self._parse_field() 8164 8165 def _parse_copy(self) -> exp.Copy | exp.Command: 8166 start = self._prev 8167 8168 self._match(TokenType.INTO) 8169 8170 this = ( 8171 self._parse_select(nested=True, parse_subquery_alias=False) 8172 if self._match(TokenType.L_PAREN, advance=False) 8173 else self._parse_table(schema=True) 8174 ) 8175 8176 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8177 8178 files = self._parse_csv(self._parse_file_location) 8179 credentials = self._parse_credentials() 8180 8181 self._match_text_seq("WITH") 8182 8183 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8184 8185 # Fallback case 8186 if self._curr: 8187 return self._parse_as_command(start) 8188 8189 return self.expression( 8190 exp.Copy, 8191 this=this, 8192 kind=kind, 8193 credentials=credentials, 8194 files=files, 8195 params=params, 8196 ) 8197 8198 def _parse_normalize(self) -> exp.Normalize: 8199 return self.expression( 8200 exp.Normalize, 8201 this=self._parse_bitwise(), 8202 form=self._match(TokenType.COMMA) and self._parse_var(), 8203 ) 8204 8205 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8206 args = self._parse_csv(lambda: self._parse_lambda()) 8207 8208 this = seq_get(args, 0) 8209 decimals = seq_get(args, 1) 8210 8211 return expr_type( 8212 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8213 ) 8214 8215 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8216 star_token = self._prev 8217 8218 if self._match_text_seq("COLUMNS", "(", advance=False): 8219 this = self._parse_function() 8220 if isinstance(this, exp.Columns): 8221 this.set("unpack", True) 8222 return this 8223 8224 return self.expression( 8225 exp.Star, 8226 **{ # type: ignore 8227 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8228 "replace": self._parse_star_op("REPLACE"), 8229 "rename": self._parse_star_op("RENAME"), 8230 }, 8231 ).update_positions(star_token) 8232 8233 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8234 privilege_parts = [] 8235 8236 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8237 # (end of privilege list) or L_PAREN (start of column list) are met 8238 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8239 privilege_parts.append(self._curr.text.upper()) 8240 self._advance() 8241 8242 this = exp.var(" ".join(privilege_parts)) 8243 expressions = ( 8244 self._parse_wrapped_csv(self._parse_column) 8245 if self._match(TokenType.L_PAREN, advance=False) 8246 else None 8247 ) 8248 8249 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8250 8251 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8252 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8253 principal = self._parse_id_var() 8254 8255 if not principal: 8256 return None 8257 8258 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8259 8260 def _parse_grant(self) -> exp.Grant | exp.Command: 8261 start = self._prev 8262 8263 privileges = self._parse_csv(self._parse_grant_privilege) 8264 8265 self._match(TokenType.ON) 8266 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8267 8268 # Attempt to parse the securable e.g. MySQL allows names 8269 # such as "foo.*", "*.*" which are not easily parseable yet 8270 securable = self._try_parse(self._parse_table_parts) 8271 8272 if not securable or not self._match_text_seq("TO"): 8273 return self._parse_as_command(start) 8274 8275 principals = self._parse_csv(self._parse_grant_principal) 8276 8277 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8278 8279 if self._curr: 8280 return self._parse_as_command(start) 8281 8282 return self.expression( 8283 exp.Grant, 8284 privileges=privileges, 8285 kind=kind, 8286 securable=securable, 8287 principals=principals, 8288 grant_option=grant_option, 8289 ) 8290 8291 def _parse_overlay(self) -> exp.Overlay: 8292 return self.expression( 8293 exp.Overlay, 8294 **{ # type: ignore 8295 "this": self._parse_bitwise(), 8296 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8297 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8298 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8299 }, 8300 ) 8301 8302 def _parse_format_name(self) -> exp.Property: 8303 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8304 # for FILE_FORMAT = <format_name> 8305 return self.expression( 8306 exp.Property, 8307 this=exp.var("FORMAT_NAME"), 8308 value=self._parse_string() or self._parse_table_parts(), 8309 ) 8310 8311 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8312 args: t.List[exp.Expression] = [] 8313 8314 if self._match(TokenType.DISTINCT): 8315 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8316 self._match(TokenType.COMMA) 8317 8318 args.extend(self._parse_csv(self._parse_assignment)) 8319 8320 return self.expression( 8321 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8322 ) 8323 8324 def _identifier_expression( 8325 self, token: t.Optional[Token] = None, **kwargs: t.Any 8326 ) -> exp.Identifier: 8327 token = token or self._prev 8328 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8329 expression.update_positions(token) 8330 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PIPE_SYNTAX_TRANSFORM_PARSERS = { 935 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 936 "WHERE": lambda self, query: self._parse_pipe_syntax_where(query), 937 "ORDER BY": lambda self, query: query.order_by(self._parse_order(), copy=False), 938 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 939 "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False), 940 } 941 942 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 943 "ALLOWED_VALUES": lambda self: self.expression( 944 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 945 ), 946 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 947 "AUTO": lambda self: self._parse_auto_property(), 948 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 949 "BACKUP": lambda self: self.expression( 950 exp.BackupProperty, this=self._parse_var(any_token=True) 951 ), 952 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 953 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 954 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 955 "CHECKSUM": lambda self: self._parse_checksum(), 956 "CLUSTER BY": lambda self: self._parse_cluster(), 957 "CLUSTERED": lambda self: self._parse_clustered_by(), 958 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 959 exp.CollateProperty, **kwargs 960 ), 961 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 962 "CONTAINS": lambda self: self._parse_contains_property(), 963 "COPY": lambda self: self._parse_copy_property(), 964 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 965 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 966 "DEFINER": lambda self: self._parse_definer(), 967 "DETERMINISTIC": lambda self: self.expression( 968 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 969 ), 970 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 971 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 972 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 973 "DISTKEY": lambda self: self._parse_distkey(), 974 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 975 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 976 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 977 "ENVIRONMENT": lambda self: self.expression( 978 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 979 ), 980 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 981 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 982 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 983 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 984 "FREESPACE": lambda self: self._parse_freespace(), 985 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 986 "HEAP": lambda self: self.expression(exp.HeapProperty), 987 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 988 "IMMUTABLE": lambda self: self.expression( 989 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 990 ), 991 "INHERITS": lambda self: self.expression( 992 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 993 ), 994 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 995 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 996 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 997 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 998 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 999 "LIKE": lambda self: self._parse_create_like(), 1000 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1001 "LOCK": lambda self: self._parse_locking(), 1002 "LOCKING": lambda self: self._parse_locking(), 1003 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1004 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1005 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1006 "MODIFIES": lambda self: self._parse_modifies_property(), 1007 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1008 "NO": lambda self: self._parse_no_property(), 1009 "ON": lambda self: self._parse_on_property(), 1010 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1011 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1012 "PARTITION": lambda self: self._parse_partitioned_of(), 1013 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1014 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1015 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1016 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1017 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1018 "READS": lambda self: self._parse_reads_property(), 1019 "REMOTE": lambda self: self._parse_remote_with_connection(), 1020 "RETURNS": lambda self: self._parse_returns(), 1021 "STRICT": lambda self: self.expression(exp.StrictProperty), 1022 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1023 "ROW": lambda self: self._parse_row(), 1024 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1025 "SAMPLE": lambda self: self.expression( 1026 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1027 ), 1028 "SECURE": lambda self: self.expression(exp.SecureProperty), 1029 "SECURITY": lambda self: self._parse_security(), 1030 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1031 "SETTINGS": lambda self: self._parse_settings_property(), 1032 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1033 "SORTKEY": lambda self: self._parse_sortkey(), 1034 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1035 "STABLE": lambda self: self.expression( 1036 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1037 ), 1038 "STORED": lambda self: self._parse_stored(), 1039 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1040 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1041 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1042 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1043 "TO": lambda self: self._parse_to_table(), 1044 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1045 "TRANSFORM": lambda self: self.expression( 1046 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1047 ), 1048 "TTL": lambda self: self._parse_ttl(), 1049 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1050 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1051 "VOLATILE": lambda self: self._parse_volatile_property(), 1052 "WITH": lambda self: self._parse_with_property(), 1053 } 1054 1055 CONSTRAINT_PARSERS = { 1056 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1057 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1058 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1059 "CHARACTER SET": lambda self: self.expression( 1060 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "CHECK": lambda self: self.expression( 1063 exp.CheckColumnConstraint, 1064 this=self._parse_wrapped(self._parse_assignment), 1065 enforced=self._match_text_seq("ENFORCED"), 1066 ), 1067 "COLLATE": lambda self: self.expression( 1068 exp.CollateColumnConstraint, 1069 this=self._parse_identifier() or self._parse_column(), 1070 ), 1071 "COMMENT": lambda self: self.expression( 1072 exp.CommentColumnConstraint, this=self._parse_string() 1073 ), 1074 "COMPRESS": lambda self: self._parse_compress(), 1075 "CLUSTERED": lambda self: self.expression( 1076 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1077 ), 1078 "NONCLUSTERED": lambda self: self.expression( 1079 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1080 ), 1081 "DEFAULT": lambda self: self.expression( 1082 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1083 ), 1084 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1085 "EPHEMERAL": lambda self: self.expression( 1086 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1087 ), 1088 "EXCLUDE": lambda self: self.expression( 1089 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1090 ), 1091 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1092 "FORMAT": lambda self: self.expression( 1093 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1094 ), 1095 "GENERATED": lambda self: self._parse_generated_as_identity(), 1096 "IDENTITY": lambda self: self._parse_auto_increment(), 1097 "INLINE": lambda self: self._parse_inline(), 1098 "LIKE": lambda self: self._parse_create_like(), 1099 "NOT": lambda self: self._parse_not_constraint(), 1100 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1101 "ON": lambda self: ( 1102 self._match(TokenType.UPDATE) 1103 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1104 ) 1105 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1106 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1107 "PERIOD": lambda self: self._parse_period_for_system_time(), 1108 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1109 "REFERENCES": lambda self: self._parse_references(match=False), 1110 "TITLE": lambda self: self.expression( 1111 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1112 ), 1113 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1114 "UNIQUE": lambda self: self._parse_unique(), 1115 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1116 "WATERMARK": lambda self: self.expression( 1117 exp.WatermarkColumnConstraint, 1118 this=self._match(TokenType.FOR) and self._parse_column(), 1119 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1120 ), 1121 "WITH": lambda self: self.expression( 1122 exp.Properties, expressions=self._parse_wrapped_properties() 1123 ), 1124 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1125 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1126 } 1127 1128 def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query: 1129 select = self._parse_select() 1130 if isinstance(select, exp.Select): 1131 return select.from_(query.subquery(copy=False), copy=False) 1132 return query 1133 1134 def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query: 1135 where = self._parse_where() 1136 return query.where(where, copy=False) 1137 1138 def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query: 1139 limit = self._parse_limit() 1140 offset = self._parse_offset() 1141 if limit: 1142 query.limit(limit, copy=False) 1143 if offset: 1144 query.offset(offset, copy=False) 1145 return query 1146 1147 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1148 klass = ( 1149 exp.PartitionedByBucket 1150 if self._prev.text.upper() == "BUCKET" 1151 else exp.PartitionByTruncate 1152 ) 1153 1154 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1155 this, expression = seq_get(args, 0), seq_get(args, 1) 1156 1157 if isinstance(this, exp.Literal): 1158 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1159 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1160 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1161 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1162 # 1163 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1164 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1165 this, expression = expression, this 1166 1167 return self.expression(klass, this=this, expression=expression) 1168 1169 ALTER_PARSERS = { 1170 "ADD": lambda self: self._parse_alter_table_add(), 1171 "AS": lambda self: self._parse_select(), 1172 "ALTER": lambda self: self._parse_alter_table_alter(), 1173 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1174 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1175 "DROP": lambda self: self._parse_alter_table_drop(), 1176 "RENAME": lambda self: self._parse_alter_table_rename(), 1177 "SET": lambda self: self._parse_alter_table_set(), 1178 "SWAP": lambda self: self.expression( 1179 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1180 ), 1181 } 1182 1183 ALTER_ALTER_PARSERS = { 1184 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1185 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1186 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1187 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1188 } 1189 1190 SCHEMA_UNNAMED_CONSTRAINTS = { 1191 "CHECK", 1192 "EXCLUDE", 1193 "FOREIGN KEY", 1194 "LIKE", 1195 "PERIOD", 1196 "PRIMARY KEY", 1197 "UNIQUE", 1198 "WATERMARK", 1199 "BUCKET", 1200 "TRUNCATE", 1201 } 1202 1203 NO_PAREN_FUNCTION_PARSERS = { 1204 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1205 "CASE": lambda self: self._parse_case(), 1206 "CONNECT_BY_ROOT": lambda self: self.expression( 1207 exp.ConnectByRoot, this=self._parse_column() 1208 ), 1209 "IF": lambda self: self._parse_if(), 1210 } 1211 1212 INVALID_FUNC_NAME_TOKENS = { 1213 TokenType.IDENTIFIER, 1214 TokenType.STRING, 1215 } 1216 1217 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1218 1219 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1220 1221 FUNCTION_PARSERS = { 1222 **{ 1223 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1224 }, 1225 **{ 1226 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1227 }, 1228 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1229 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1230 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1231 "DECODE": lambda self: self._parse_decode(), 1232 "EXTRACT": lambda self: self._parse_extract(), 1233 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1234 "GAP_FILL": lambda self: self._parse_gap_fill(), 1235 "JSON_OBJECT": lambda self: self._parse_json_object(), 1236 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1237 "JSON_TABLE": lambda self: self._parse_json_table(), 1238 "MATCH": lambda self: self._parse_match_against(), 1239 "NORMALIZE": lambda self: self._parse_normalize(), 1240 "OPENJSON": lambda self: self._parse_open_json(), 1241 "OVERLAY": lambda self: self._parse_overlay(), 1242 "POSITION": lambda self: self._parse_position(), 1243 "PREDICT": lambda self: self._parse_predict(), 1244 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1245 "STRING_AGG": lambda self: self._parse_string_agg(), 1246 "SUBSTRING": lambda self: self._parse_substring(), 1247 "TRIM": lambda self: self._parse_trim(), 1248 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1249 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1250 "XMLELEMENT": lambda self: self.expression( 1251 exp.XMLElement, 1252 this=self._match_text_seq("NAME") and self._parse_id_var(), 1253 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1254 ), 1255 "XMLTABLE": lambda self: self._parse_xml_table(), 1256 } 1257 1258 QUERY_MODIFIER_PARSERS = { 1259 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1260 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1261 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1262 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1263 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1264 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1265 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1266 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1267 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1268 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1269 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1270 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1271 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1272 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1273 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1274 TokenType.CLUSTER_BY: lambda self: ( 1275 "cluster", 1276 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1277 ), 1278 TokenType.DISTRIBUTE_BY: lambda self: ( 1279 "distribute", 1280 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1281 ), 1282 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1283 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1284 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1285 } 1286 1287 SET_PARSERS = { 1288 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1289 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1290 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1291 "TRANSACTION": lambda self: self._parse_set_transaction(), 1292 } 1293 1294 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1295 1296 TYPE_LITERAL_PARSERS = { 1297 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1298 } 1299 1300 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1301 1302 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1303 1304 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1305 1306 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1307 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1308 "ISOLATION": ( 1309 ("LEVEL", "REPEATABLE", "READ"), 1310 ("LEVEL", "READ", "COMMITTED"), 1311 ("LEVEL", "READ", "UNCOMITTED"), 1312 ("LEVEL", "SERIALIZABLE"), 1313 ), 1314 "READ": ("WRITE", "ONLY"), 1315 } 1316 1317 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1318 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1319 ) 1320 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1321 1322 CREATE_SEQUENCE: OPTIONS_TYPE = { 1323 "SCALE": ("EXTEND", "NOEXTEND"), 1324 "SHARD": ("EXTEND", "NOEXTEND"), 1325 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1326 **dict.fromkeys( 1327 ( 1328 "SESSION", 1329 "GLOBAL", 1330 "KEEP", 1331 "NOKEEP", 1332 "ORDER", 1333 "NOORDER", 1334 "NOCACHE", 1335 "CYCLE", 1336 "NOCYCLE", 1337 "NOMINVALUE", 1338 "NOMAXVALUE", 1339 "NOSCALE", 1340 "NOSHARD", 1341 ), 1342 tuple(), 1343 ), 1344 } 1345 1346 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1347 1348 USABLES: OPTIONS_TYPE = dict.fromkeys( 1349 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1350 ) 1351 1352 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1353 1354 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1355 "TYPE": ("EVOLUTION",), 1356 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1357 } 1358 1359 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1360 1361 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1362 1363 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1364 "NOT": ("ENFORCED",), 1365 "MATCH": ( 1366 "FULL", 1367 "PARTIAL", 1368 "SIMPLE", 1369 ), 1370 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1371 "USING": ( 1372 "BTREE", 1373 "HASH", 1374 ), 1375 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1376 } 1377 1378 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1379 "NO": ("OTHERS",), 1380 "CURRENT": ("ROW",), 1381 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1382 } 1383 1384 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1385 1386 CLONE_KEYWORDS = {"CLONE", "COPY"} 1387 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1388 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1389 1390 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1391 1392 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1393 1394 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1395 1396 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1397 1398 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1399 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1400 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1401 1402 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1403 1404 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1405 1406 ADD_CONSTRAINT_TOKENS = { 1407 TokenType.CONSTRAINT, 1408 TokenType.FOREIGN_KEY, 1409 TokenType.INDEX, 1410 TokenType.KEY, 1411 TokenType.PRIMARY_KEY, 1412 TokenType.UNIQUE, 1413 } 1414 1415 DISTINCT_TOKENS = {TokenType.DISTINCT} 1416 1417 NULL_TOKENS = {TokenType.NULL} 1418 1419 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1420 1421 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1422 1423 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1424 1425 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1426 1427 ODBC_DATETIME_LITERALS = { 1428 "d": exp.Date, 1429 "t": exp.Time, 1430 "ts": exp.Timestamp, 1431 } 1432 1433 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1434 1435 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1436 1437 # The style options for the DESCRIBE statement 1438 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1439 1440 # The style options for the ANALYZE statement 1441 ANALYZE_STYLES = { 1442 "BUFFER_USAGE_LIMIT", 1443 "FULL", 1444 "LOCAL", 1445 "NO_WRITE_TO_BINLOG", 1446 "SAMPLE", 1447 "SKIP_LOCKED", 1448 "VERBOSE", 1449 } 1450 1451 ANALYZE_EXPRESSION_PARSERS = { 1452 "ALL": lambda self: self._parse_analyze_columns(), 1453 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1454 "DELETE": lambda self: self._parse_analyze_delete(), 1455 "DROP": lambda self: self._parse_analyze_histogram(), 1456 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1457 "LIST": lambda self: self._parse_analyze_list(), 1458 "PREDICATE": lambda self: self._parse_analyze_columns(), 1459 "UPDATE": lambda self: self._parse_analyze_histogram(), 1460 "VALIDATE": lambda self: self._parse_analyze_validate(), 1461 } 1462 1463 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1464 1465 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1466 1467 OPERATION_MODIFIERS: t.Set[str] = set() 1468 1469 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1470 1471 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1472 1473 STRICT_CAST = True 1474 1475 PREFIXED_PIVOT_COLUMNS = False 1476 IDENTIFY_PIVOT_STRINGS = False 1477 1478 LOG_DEFAULTS_TO_LN = False 1479 1480 # Whether the table sample clause expects CSV syntax 1481 TABLESAMPLE_CSV = False 1482 1483 # The default method used for table sampling 1484 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1485 1486 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1487 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1488 1489 # Whether the TRIM function expects the characters to trim as its first argument 1490 TRIM_PATTERN_FIRST = False 1491 1492 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1493 STRING_ALIASES = False 1494 1495 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1496 MODIFIERS_ATTACHED_TO_SET_OP = True 1497 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1498 1499 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1500 NO_PAREN_IF_COMMANDS = True 1501 1502 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1503 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1504 1505 # Whether the `:` operator is used to extract a value from a VARIANT column 1506 COLON_IS_VARIANT_EXTRACT = False 1507 1508 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1509 # If this is True and '(' is not found, the keyword will be treated as an identifier 1510 VALUES_FOLLOWED_BY_PAREN = True 1511 1512 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1513 SUPPORTS_IMPLICIT_UNNEST = False 1514 1515 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1516 INTERVAL_SPANS = True 1517 1518 # Whether a PARTITION clause can follow a table reference 1519 SUPPORTS_PARTITION_SELECTION = False 1520 1521 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1522 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1523 1524 # Whether the 'AS' keyword is optional in the CTE definition syntax 1525 OPTIONAL_ALIAS_TOKEN_CTE = True 1526 1527 __slots__ = ( 1528 "error_level", 1529 "error_message_context", 1530 "max_errors", 1531 "dialect", 1532 "sql", 1533 "errors", 1534 "_tokens", 1535 "_index", 1536 "_curr", 1537 "_next", 1538 "_prev", 1539 "_prev_comments", 1540 ) 1541 1542 # Autofilled 1543 SHOW_TRIE: t.Dict = {} 1544 SET_TRIE: t.Dict = {} 1545 1546 def __init__( 1547 self, 1548 error_level: t.Optional[ErrorLevel] = None, 1549 error_message_context: int = 100, 1550 max_errors: int = 3, 1551 dialect: DialectType = None, 1552 ): 1553 from sqlglot.dialects import Dialect 1554 1555 self.error_level = error_level or ErrorLevel.IMMEDIATE 1556 self.error_message_context = error_message_context 1557 self.max_errors = max_errors 1558 self.dialect = Dialect.get_or_raise(dialect) 1559 self.reset() 1560 1561 def reset(self): 1562 self.sql = "" 1563 self.errors = [] 1564 self._tokens = [] 1565 self._index = 0 1566 self._curr = None 1567 self._next = None 1568 self._prev = None 1569 self._prev_comments = None 1570 1571 def parse( 1572 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1573 ) -> t.List[t.Optional[exp.Expression]]: 1574 """ 1575 Parses a list of tokens and returns a list of syntax trees, one tree 1576 per parsed SQL statement. 1577 1578 Args: 1579 raw_tokens: The list of tokens. 1580 sql: The original SQL string, used to produce helpful debug messages. 1581 1582 Returns: 1583 The list of the produced syntax trees. 1584 """ 1585 return self._parse( 1586 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1587 ) 1588 1589 def parse_into( 1590 self, 1591 expression_types: exp.IntoType, 1592 raw_tokens: t.List[Token], 1593 sql: t.Optional[str] = None, 1594 ) -> t.List[t.Optional[exp.Expression]]: 1595 """ 1596 Parses a list of tokens into a given Expression type. If a collection of Expression 1597 types is given instead, this method will try to parse the token list into each one 1598 of them, stopping at the first for which the parsing succeeds. 1599 1600 Args: 1601 expression_types: The expression type(s) to try and parse the token list into. 1602 raw_tokens: The list of tokens. 1603 sql: The original SQL string, used to produce helpful debug messages. 1604 1605 Returns: 1606 The target Expression. 1607 """ 1608 errors = [] 1609 for expression_type in ensure_list(expression_types): 1610 parser = self.EXPRESSION_PARSERS.get(expression_type) 1611 if not parser: 1612 raise TypeError(f"No parser registered for {expression_type}") 1613 1614 try: 1615 return self._parse(parser, raw_tokens, sql) 1616 except ParseError as e: 1617 e.errors[0]["into_expression"] = expression_type 1618 errors.append(e) 1619 1620 raise ParseError( 1621 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1622 errors=merge_errors(errors), 1623 ) from errors[-1] 1624 1625 def _parse( 1626 self, 1627 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1628 raw_tokens: t.List[Token], 1629 sql: t.Optional[str] = None, 1630 ) -> t.List[t.Optional[exp.Expression]]: 1631 self.reset() 1632 self.sql = sql or "" 1633 1634 total = len(raw_tokens) 1635 chunks: t.List[t.List[Token]] = [[]] 1636 1637 for i, token in enumerate(raw_tokens): 1638 if token.token_type == TokenType.SEMICOLON: 1639 if token.comments: 1640 chunks.append([token]) 1641 1642 if i < total - 1: 1643 chunks.append([]) 1644 else: 1645 chunks[-1].append(token) 1646 1647 expressions = [] 1648 1649 for tokens in chunks: 1650 self._index = -1 1651 self._tokens = tokens 1652 self._advance() 1653 1654 expressions.append(parse_method(self)) 1655 1656 if self._index < len(self._tokens): 1657 self.raise_error("Invalid expression / Unexpected token") 1658 1659 self.check_errors() 1660 1661 return expressions 1662 1663 def check_errors(self) -> None: 1664 """Logs or raises any found errors, depending on the chosen error level setting.""" 1665 if self.error_level == ErrorLevel.WARN: 1666 for error in self.errors: 1667 logger.error(str(error)) 1668 elif self.error_level == ErrorLevel.RAISE and self.errors: 1669 raise ParseError( 1670 concat_messages(self.errors, self.max_errors), 1671 errors=merge_errors(self.errors), 1672 ) 1673 1674 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1675 """ 1676 Appends an error in the list of recorded errors or raises it, depending on the chosen 1677 error level setting. 1678 """ 1679 token = token or self._curr or self._prev or Token.string("") 1680 start = token.start 1681 end = token.end + 1 1682 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1683 highlight = self.sql[start:end] 1684 end_context = self.sql[end : end + self.error_message_context] 1685 1686 error = ParseError.new( 1687 f"{message}. Line {token.line}, Col: {token.col}.\n" 1688 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1689 description=message, 1690 line=token.line, 1691 col=token.col, 1692 start_context=start_context, 1693 highlight=highlight, 1694 end_context=end_context, 1695 ) 1696 1697 if self.error_level == ErrorLevel.IMMEDIATE: 1698 raise error 1699 1700 self.errors.append(error) 1701 1702 def expression( 1703 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1704 ) -> E: 1705 """ 1706 Creates a new, validated Expression. 1707 1708 Args: 1709 exp_class: The expression class to instantiate. 1710 comments: An optional list of comments to attach to the expression. 1711 kwargs: The arguments to set for the expression along with their respective values. 1712 1713 Returns: 1714 The target expression. 1715 """ 1716 instance = exp_class(**kwargs) 1717 instance.add_comments(comments) if comments else self._add_comments(instance) 1718 return self.validate_expression(instance) 1719 1720 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1721 if expression and self._prev_comments: 1722 expression.add_comments(self._prev_comments) 1723 self._prev_comments = None 1724 1725 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1726 """ 1727 Validates an Expression, making sure that all its mandatory arguments are set. 1728 1729 Args: 1730 expression: The expression to validate. 1731 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1732 1733 Returns: 1734 The validated expression. 1735 """ 1736 if self.error_level != ErrorLevel.IGNORE: 1737 for error_message in expression.error_messages(args): 1738 self.raise_error(error_message) 1739 1740 return expression 1741 1742 def _find_sql(self, start: Token, end: Token) -> str: 1743 return self.sql[start.start : end.end + 1] 1744 1745 def _is_connected(self) -> bool: 1746 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1747 1748 def _advance(self, times: int = 1) -> None: 1749 self._index += times 1750 self._curr = seq_get(self._tokens, self._index) 1751 self._next = seq_get(self._tokens, self._index + 1) 1752 1753 if self._index > 0: 1754 self._prev = self._tokens[self._index - 1] 1755 self._prev_comments = self._prev.comments 1756 else: 1757 self._prev = None 1758 self._prev_comments = None 1759 1760 def _retreat(self, index: int) -> None: 1761 if index != self._index: 1762 self._advance(index - self._index) 1763 1764 def _warn_unsupported(self) -> None: 1765 if len(self._tokens) <= 1: 1766 return 1767 1768 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1769 # interested in emitting a warning for the one being currently processed. 1770 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1771 1772 logger.warning( 1773 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1774 ) 1775 1776 def _parse_command(self) -> exp.Command: 1777 self._warn_unsupported() 1778 return self.expression( 1779 exp.Command, 1780 comments=self._prev_comments, 1781 this=self._prev.text.upper(), 1782 expression=self._parse_string(), 1783 ) 1784 1785 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1786 """ 1787 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1788 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1789 solve this by setting & resetting the parser state accordingly 1790 """ 1791 index = self._index 1792 error_level = self.error_level 1793 1794 self.error_level = ErrorLevel.IMMEDIATE 1795 try: 1796 this = parse_method() 1797 except ParseError: 1798 this = None 1799 finally: 1800 if not this or retreat: 1801 self._retreat(index) 1802 self.error_level = error_level 1803 1804 return this 1805 1806 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1807 start = self._prev 1808 exists = self._parse_exists() if allow_exists else None 1809 1810 self._match(TokenType.ON) 1811 1812 materialized = self._match_text_seq("MATERIALIZED") 1813 kind = self._match_set(self.CREATABLES) and self._prev 1814 if not kind: 1815 return self._parse_as_command(start) 1816 1817 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1818 this = self._parse_user_defined_function(kind=kind.token_type) 1819 elif kind.token_type == TokenType.TABLE: 1820 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1821 elif kind.token_type == TokenType.COLUMN: 1822 this = self._parse_column() 1823 else: 1824 this = self._parse_id_var() 1825 1826 self._match(TokenType.IS) 1827 1828 return self.expression( 1829 exp.Comment, 1830 this=this, 1831 kind=kind.text, 1832 expression=self._parse_string(), 1833 exists=exists, 1834 materialized=materialized, 1835 ) 1836 1837 def _parse_to_table( 1838 self, 1839 ) -> exp.ToTableProperty: 1840 table = self._parse_table_parts(schema=True) 1841 return self.expression(exp.ToTableProperty, this=table) 1842 1843 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1844 def _parse_ttl(self) -> exp.Expression: 1845 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1846 this = self._parse_bitwise() 1847 1848 if self._match_text_seq("DELETE"): 1849 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1850 if self._match_text_seq("RECOMPRESS"): 1851 return self.expression( 1852 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1853 ) 1854 if self._match_text_seq("TO", "DISK"): 1855 return self.expression( 1856 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1857 ) 1858 if self._match_text_seq("TO", "VOLUME"): 1859 return self.expression( 1860 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1861 ) 1862 1863 return this 1864 1865 expressions = self._parse_csv(_parse_ttl_action) 1866 where = self._parse_where() 1867 group = self._parse_group() 1868 1869 aggregates = None 1870 if group and self._match(TokenType.SET): 1871 aggregates = self._parse_csv(self._parse_set_item) 1872 1873 return self.expression( 1874 exp.MergeTreeTTL, 1875 expressions=expressions, 1876 where=where, 1877 group=group, 1878 aggregates=aggregates, 1879 ) 1880 1881 def _parse_statement(self) -> t.Optional[exp.Expression]: 1882 if self._curr is None: 1883 return None 1884 1885 if self._match_set(self.STATEMENT_PARSERS): 1886 comments = self._prev_comments 1887 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1888 stmt.add_comments(comments, prepend=True) 1889 return stmt 1890 1891 if self._match_set(self.dialect.tokenizer.COMMANDS): 1892 return self._parse_command() 1893 1894 expression = self._parse_expression() 1895 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1896 return self._parse_query_modifiers(expression) 1897 1898 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1899 start = self._prev 1900 temporary = self._match(TokenType.TEMPORARY) 1901 materialized = self._match_text_seq("MATERIALIZED") 1902 1903 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1904 if not kind: 1905 return self._parse_as_command(start) 1906 1907 concurrently = self._match_text_seq("CONCURRENTLY") 1908 if_exists = exists or self._parse_exists() 1909 1910 if kind == "COLUMN": 1911 this = self._parse_column() 1912 else: 1913 this = self._parse_table_parts( 1914 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1915 ) 1916 1917 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1918 1919 if self._match(TokenType.L_PAREN, advance=False): 1920 expressions = self._parse_wrapped_csv(self._parse_types) 1921 else: 1922 expressions = None 1923 1924 return self.expression( 1925 exp.Drop, 1926 exists=if_exists, 1927 this=this, 1928 expressions=expressions, 1929 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1930 temporary=temporary, 1931 materialized=materialized, 1932 cascade=self._match_text_seq("CASCADE"), 1933 constraints=self._match_text_seq("CONSTRAINTS"), 1934 purge=self._match_text_seq("PURGE"), 1935 cluster=cluster, 1936 concurrently=concurrently, 1937 ) 1938 1939 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1940 return ( 1941 self._match_text_seq("IF") 1942 and (not not_ or self._match(TokenType.NOT)) 1943 and self._match(TokenType.EXISTS) 1944 ) 1945 1946 def _parse_create(self) -> exp.Create | exp.Command: 1947 # Note: this can't be None because we've matched a statement parser 1948 start = self._prev 1949 1950 replace = ( 1951 start.token_type == TokenType.REPLACE 1952 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1953 or self._match_pair(TokenType.OR, TokenType.ALTER) 1954 ) 1955 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1956 1957 unique = self._match(TokenType.UNIQUE) 1958 1959 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1960 clustered = True 1961 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1962 "COLUMNSTORE" 1963 ): 1964 clustered = False 1965 else: 1966 clustered = None 1967 1968 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1969 self._advance() 1970 1971 properties = None 1972 create_token = self._match_set(self.CREATABLES) and self._prev 1973 1974 if not create_token: 1975 # exp.Properties.Location.POST_CREATE 1976 properties = self._parse_properties() 1977 create_token = self._match_set(self.CREATABLES) and self._prev 1978 1979 if not properties or not create_token: 1980 return self._parse_as_command(start) 1981 1982 concurrently = self._match_text_seq("CONCURRENTLY") 1983 exists = self._parse_exists(not_=True) 1984 this = None 1985 expression: t.Optional[exp.Expression] = None 1986 indexes = None 1987 no_schema_binding = None 1988 begin = None 1989 end = None 1990 clone = None 1991 1992 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1993 nonlocal properties 1994 if properties and temp_props: 1995 properties.expressions.extend(temp_props.expressions) 1996 elif temp_props: 1997 properties = temp_props 1998 1999 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2000 this = self._parse_user_defined_function(kind=create_token.token_type) 2001 2002 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2003 extend_props(self._parse_properties()) 2004 2005 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2006 extend_props(self._parse_properties()) 2007 2008 if not expression: 2009 if self._match(TokenType.COMMAND): 2010 expression = self._parse_as_command(self._prev) 2011 else: 2012 begin = self._match(TokenType.BEGIN) 2013 return_ = self._match_text_seq("RETURN") 2014 2015 if self._match(TokenType.STRING, advance=False): 2016 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2017 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2018 expression = self._parse_string() 2019 extend_props(self._parse_properties()) 2020 else: 2021 expression = self._parse_user_defined_function_expression() 2022 2023 end = self._match_text_seq("END") 2024 2025 if return_: 2026 expression = self.expression(exp.Return, this=expression) 2027 elif create_token.token_type == TokenType.INDEX: 2028 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2029 if not self._match(TokenType.ON): 2030 index = self._parse_id_var() 2031 anonymous = False 2032 else: 2033 index = None 2034 anonymous = True 2035 2036 this = self._parse_index(index=index, anonymous=anonymous) 2037 elif create_token.token_type in self.DB_CREATABLES: 2038 table_parts = self._parse_table_parts( 2039 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2040 ) 2041 2042 # exp.Properties.Location.POST_NAME 2043 self._match(TokenType.COMMA) 2044 extend_props(self._parse_properties(before=True)) 2045 2046 this = self._parse_schema(this=table_parts) 2047 2048 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2049 extend_props(self._parse_properties()) 2050 2051 has_alias = self._match(TokenType.ALIAS) 2052 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2053 # exp.Properties.Location.POST_ALIAS 2054 extend_props(self._parse_properties()) 2055 2056 if create_token.token_type == TokenType.SEQUENCE: 2057 expression = self._parse_types() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_ddl_select() 2061 2062 # Some dialects also support using a table as an alias instead of a SELECT. 2063 # Here we fallback to this as an alternative. 2064 if not expression and has_alias: 2065 expression = self._try_parse(self._parse_table_parts) 2066 2067 if create_token.token_type == TokenType.TABLE: 2068 # exp.Properties.Location.POST_EXPRESSION 2069 extend_props(self._parse_properties()) 2070 2071 indexes = [] 2072 while True: 2073 index = self._parse_index() 2074 2075 # exp.Properties.Location.POST_INDEX 2076 extend_props(self._parse_properties()) 2077 if not index: 2078 break 2079 else: 2080 self._match(TokenType.COMMA) 2081 indexes.append(index) 2082 elif create_token.token_type == TokenType.VIEW: 2083 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2084 no_schema_binding = True 2085 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2086 extend_props(self._parse_properties()) 2087 2088 shallow = self._match_text_seq("SHALLOW") 2089 2090 if self._match_texts(self.CLONE_KEYWORDS): 2091 copy = self._prev.text.lower() == "copy" 2092 clone = self.expression( 2093 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2094 ) 2095 2096 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2097 return self._parse_as_command(start) 2098 2099 create_kind_text = create_token.text.upper() 2100 return self.expression( 2101 exp.Create, 2102 this=this, 2103 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2104 replace=replace, 2105 refresh=refresh, 2106 unique=unique, 2107 expression=expression, 2108 exists=exists, 2109 properties=properties, 2110 indexes=indexes, 2111 no_schema_binding=no_schema_binding, 2112 begin=begin, 2113 end=end, 2114 clone=clone, 2115 concurrently=concurrently, 2116 clustered=clustered, 2117 ) 2118 2119 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2120 seq = exp.SequenceProperties() 2121 2122 options = [] 2123 index = self._index 2124 2125 while self._curr: 2126 self._match(TokenType.COMMA) 2127 if self._match_text_seq("INCREMENT"): 2128 self._match_text_seq("BY") 2129 self._match_text_seq("=") 2130 seq.set("increment", self._parse_term()) 2131 elif self._match_text_seq("MINVALUE"): 2132 seq.set("minvalue", self._parse_term()) 2133 elif self._match_text_seq("MAXVALUE"): 2134 seq.set("maxvalue", self._parse_term()) 2135 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2136 self._match_text_seq("=") 2137 seq.set("start", self._parse_term()) 2138 elif self._match_text_seq("CACHE"): 2139 # T-SQL allows empty CACHE which is initialized dynamically 2140 seq.set("cache", self._parse_number() or True) 2141 elif self._match_text_seq("OWNED", "BY"): 2142 # "OWNED BY NONE" is the default 2143 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2144 else: 2145 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2146 if opt: 2147 options.append(opt) 2148 else: 2149 break 2150 2151 seq.set("options", options if options else None) 2152 return None if self._index == index else seq 2153 2154 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2155 # only used for teradata currently 2156 self._match(TokenType.COMMA) 2157 2158 kwargs = { 2159 "no": self._match_text_seq("NO"), 2160 "dual": self._match_text_seq("DUAL"), 2161 "before": self._match_text_seq("BEFORE"), 2162 "default": self._match_text_seq("DEFAULT"), 2163 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2164 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2165 "after": self._match_text_seq("AFTER"), 2166 "minimum": self._match_texts(("MIN", "MINIMUM")), 2167 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2168 } 2169 2170 if self._match_texts(self.PROPERTY_PARSERS): 2171 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2172 try: 2173 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2174 except TypeError: 2175 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2176 2177 return None 2178 2179 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2180 return self._parse_wrapped_csv(self._parse_property) 2181 2182 def _parse_property(self) -> t.Optional[exp.Expression]: 2183 if self._match_texts(self.PROPERTY_PARSERS): 2184 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2185 2186 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2187 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2188 2189 if self._match_text_seq("COMPOUND", "SORTKEY"): 2190 return self._parse_sortkey(compound=True) 2191 2192 if self._match_text_seq("SQL", "SECURITY"): 2193 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2194 2195 index = self._index 2196 key = self._parse_column() 2197 2198 if not self._match(TokenType.EQ): 2199 self._retreat(index) 2200 return self._parse_sequence_properties() 2201 2202 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2203 if isinstance(key, exp.Column): 2204 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2205 2206 value = self._parse_bitwise() or self._parse_var(any_token=True) 2207 2208 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2209 if isinstance(value, exp.Column): 2210 value = exp.var(value.name) 2211 2212 return self.expression(exp.Property, this=key, value=value) 2213 2214 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2215 if self._match_text_seq("BY"): 2216 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2217 2218 self._match(TokenType.ALIAS) 2219 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2220 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2221 2222 return self.expression( 2223 exp.FileFormatProperty, 2224 this=( 2225 self.expression( 2226 exp.InputOutputFormat, 2227 input_format=input_format, 2228 output_format=output_format, 2229 ) 2230 if input_format or output_format 2231 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2232 ), 2233 ) 2234 2235 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2236 field = self._parse_field() 2237 if isinstance(field, exp.Identifier) and not field.quoted: 2238 field = exp.var(field) 2239 2240 return field 2241 2242 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2243 self._match(TokenType.EQ) 2244 self._match(TokenType.ALIAS) 2245 2246 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2247 2248 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2249 properties = [] 2250 while True: 2251 if before: 2252 prop = self._parse_property_before() 2253 else: 2254 prop = self._parse_property() 2255 if not prop: 2256 break 2257 for p in ensure_list(prop): 2258 properties.append(p) 2259 2260 if properties: 2261 return self.expression(exp.Properties, expressions=properties) 2262 2263 return None 2264 2265 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2266 return self.expression( 2267 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2268 ) 2269 2270 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2271 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2272 security_specifier = self._prev.text.upper() 2273 return self.expression(exp.SecurityProperty, this=security_specifier) 2274 return None 2275 2276 def _parse_settings_property(self) -> exp.SettingsProperty: 2277 return self.expression( 2278 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2279 ) 2280 2281 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2282 if self._index >= 2: 2283 pre_volatile_token = self._tokens[self._index - 2] 2284 else: 2285 pre_volatile_token = None 2286 2287 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2288 return exp.VolatileProperty() 2289 2290 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2291 2292 def _parse_retention_period(self) -> exp.Var: 2293 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2294 number = self._parse_number() 2295 number_str = f"{number} " if number else "" 2296 unit = self._parse_var(any_token=True) 2297 return exp.var(f"{number_str}{unit}") 2298 2299 def _parse_system_versioning_property( 2300 self, with_: bool = False 2301 ) -> exp.WithSystemVersioningProperty: 2302 self._match(TokenType.EQ) 2303 prop = self.expression( 2304 exp.WithSystemVersioningProperty, 2305 **{ # type: ignore 2306 "on": True, 2307 "with": with_, 2308 }, 2309 ) 2310 2311 if self._match_text_seq("OFF"): 2312 prop.set("on", False) 2313 return prop 2314 2315 self._match(TokenType.ON) 2316 if self._match(TokenType.L_PAREN): 2317 while self._curr and not self._match(TokenType.R_PAREN): 2318 if self._match_text_seq("HISTORY_TABLE", "="): 2319 prop.set("this", self._parse_table_parts()) 2320 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2321 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2322 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2323 prop.set("retention_period", self._parse_retention_period()) 2324 2325 self._match(TokenType.COMMA) 2326 2327 return prop 2328 2329 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2330 self._match(TokenType.EQ) 2331 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2332 prop = self.expression(exp.DataDeletionProperty, on=on) 2333 2334 if self._match(TokenType.L_PAREN): 2335 while self._curr and not self._match(TokenType.R_PAREN): 2336 if self._match_text_seq("FILTER_COLUMN", "="): 2337 prop.set("filter_column", self._parse_column()) 2338 elif self._match_text_seq("RETENTION_PERIOD", "="): 2339 prop.set("retention_period", self._parse_retention_period()) 2340 2341 self._match(TokenType.COMMA) 2342 2343 return prop 2344 2345 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2346 kind = "HASH" 2347 expressions: t.Optional[t.List[exp.Expression]] = None 2348 if self._match_text_seq("BY", "HASH"): 2349 expressions = self._parse_wrapped_csv(self._parse_id_var) 2350 elif self._match_text_seq("BY", "RANDOM"): 2351 kind = "RANDOM" 2352 2353 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2354 buckets: t.Optional[exp.Expression] = None 2355 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2356 buckets = self._parse_number() 2357 2358 return self.expression( 2359 exp.DistributedByProperty, 2360 expressions=expressions, 2361 kind=kind, 2362 buckets=buckets, 2363 order=self._parse_order(), 2364 ) 2365 2366 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2367 self._match_text_seq("KEY") 2368 expressions = self._parse_wrapped_id_vars() 2369 return self.expression(expr_type, expressions=expressions) 2370 2371 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2372 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2373 prop = self._parse_system_versioning_property(with_=True) 2374 self._match_r_paren() 2375 return prop 2376 2377 if self._match(TokenType.L_PAREN, advance=False): 2378 return self._parse_wrapped_properties() 2379 2380 if self._match_text_seq("JOURNAL"): 2381 return self._parse_withjournaltable() 2382 2383 if self._match_texts(self.VIEW_ATTRIBUTES): 2384 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2385 2386 if self._match_text_seq("DATA"): 2387 return self._parse_withdata(no=False) 2388 elif self._match_text_seq("NO", "DATA"): 2389 return self._parse_withdata(no=True) 2390 2391 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2392 return self._parse_serde_properties(with_=True) 2393 2394 if self._match(TokenType.SCHEMA): 2395 return self.expression( 2396 exp.WithSchemaBindingProperty, 2397 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2398 ) 2399 2400 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2401 return self.expression( 2402 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2403 ) 2404 2405 if not self._next: 2406 return None 2407 2408 return self._parse_withisolatedloading() 2409 2410 def _parse_procedure_option(self) -> exp.Expression | None: 2411 if self._match_text_seq("EXECUTE", "AS"): 2412 return self.expression( 2413 exp.ExecuteAsProperty, 2414 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2415 or self._parse_string(), 2416 ) 2417 2418 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2419 2420 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2421 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2422 self._match(TokenType.EQ) 2423 2424 user = self._parse_id_var() 2425 self._match(TokenType.PARAMETER) 2426 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2427 2428 if not user or not host: 2429 return None 2430 2431 return exp.DefinerProperty(this=f"{user}@{host}") 2432 2433 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2434 self._match(TokenType.TABLE) 2435 self._match(TokenType.EQ) 2436 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2437 2438 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2439 return self.expression(exp.LogProperty, no=no) 2440 2441 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2442 return self.expression(exp.JournalProperty, **kwargs) 2443 2444 def _parse_checksum(self) -> exp.ChecksumProperty: 2445 self._match(TokenType.EQ) 2446 2447 on = None 2448 if self._match(TokenType.ON): 2449 on = True 2450 elif self._match_text_seq("OFF"): 2451 on = False 2452 2453 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2454 2455 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2456 return self.expression( 2457 exp.Cluster, 2458 expressions=( 2459 self._parse_wrapped_csv(self._parse_ordered) 2460 if wrapped 2461 else self._parse_csv(self._parse_ordered) 2462 ), 2463 ) 2464 2465 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2466 self._match_text_seq("BY") 2467 2468 self._match_l_paren() 2469 expressions = self._parse_csv(self._parse_column) 2470 self._match_r_paren() 2471 2472 if self._match_text_seq("SORTED", "BY"): 2473 self._match_l_paren() 2474 sorted_by = self._parse_csv(self._parse_ordered) 2475 self._match_r_paren() 2476 else: 2477 sorted_by = None 2478 2479 self._match(TokenType.INTO) 2480 buckets = self._parse_number() 2481 self._match_text_seq("BUCKETS") 2482 2483 return self.expression( 2484 exp.ClusteredByProperty, 2485 expressions=expressions, 2486 sorted_by=sorted_by, 2487 buckets=buckets, 2488 ) 2489 2490 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2491 if not self._match_text_seq("GRANTS"): 2492 self._retreat(self._index - 1) 2493 return None 2494 2495 return self.expression(exp.CopyGrantsProperty) 2496 2497 def _parse_freespace(self) -> exp.FreespaceProperty: 2498 self._match(TokenType.EQ) 2499 return self.expression( 2500 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2501 ) 2502 2503 def _parse_mergeblockratio( 2504 self, no: bool = False, default: bool = False 2505 ) -> exp.MergeBlockRatioProperty: 2506 if self._match(TokenType.EQ): 2507 return self.expression( 2508 exp.MergeBlockRatioProperty, 2509 this=self._parse_number(), 2510 percent=self._match(TokenType.PERCENT), 2511 ) 2512 2513 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2514 2515 def _parse_datablocksize( 2516 self, 2517 default: t.Optional[bool] = None, 2518 minimum: t.Optional[bool] = None, 2519 maximum: t.Optional[bool] = None, 2520 ) -> exp.DataBlocksizeProperty: 2521 self._match(TokenType.EQ) 2522 size = self._parse_number() 2523 2524 units = None 2525 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2526 units = self._prev.text 2527 2528 return self.expression( 2529 exp.DataBlocksizeProperty, 2530 size=size, 2531 units=units, 2532 default=default, 2533 minimum=minimum, 2534 maximum=maximum, 2535 ) 2536 2537 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2538 self._match(TokenType.EQ) 2539 always = self._match_text_seq("ALWAYS") 2540 manual = self._match_text_seq("MANUAL") 2541 never = self._match_text_seq("NEVER") 2542 default = self._match_text_seq("DEFAULT") 2543 2544 autotemp = None 2545 if self._match_text_seq("AUTOTEMP"): 2546 autotemp = self._parse_schema() 2547 2548 return self.expression( 2549 exp.BlockCompressionProperty, 2550 always=always, 2551 manual=manual, 2552 never=never, 2553 default=default, 2554 autotemp=autotemp, 2555 ) 2556 2557 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2558 index = self._index 2559 no = self._match_text_seq("NO") 2560 concurrent = self._match_text_seq("CONCURRENT") 2561 2562 if not self._match_text_seq("ISOLATED", "LOADING"): 2563 self._retreat(index) 2564 return None 2565 2566 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2567 return self.expression( 2568 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2569 ) 2570 2571 def _parse_locking(self) -> exp.LockingProperty: 2572 if self._match(TokenType.TABLE): 2573 kind = "TABLE" 2574 elif self._match(TokenType.VIEW): 2575 kind = "VIEW" 2576 elif self._match(TokenType.ROW): 2577 kind = "ROW" 2578 elif self._match_text_seq("DATABASE"): 2579 kind = "DATABASE" 2580 else: 2581 kind = None 2582 2583 if kind in ("DATABASE", "TABLE", "VIEW"): 2584 this = self._parse_table_parts() 2585 else: 2586 this = None 2587 2588 if self._match(TokenType.FOR): 2589 for_or_in = "FOR" 2590 elif self._match(TokenType.IN): 2591 for_or_in = "IN" 2592 else: 2593 for_or_in = None 2594 2595 if self._match_text_seq("ACCESS"): 2596 lock_type = "ACCESS" 2597 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2598 lock_type = "EXCLUSIVE" 2599 elif self._match_text_seq("SHARE"): 2600 lock_type = "SHARE" 2601 elif self._match_text_seq("READ"): 2602 lock_type = "READ" 2603 elif self._match_text_seq("WRITE"): 2604 lock_type = "WRITE" 2605 elif self._match_text_seq("CHECKSUM"): 2606 lock_type = "CHECKSUM" 2607 else: 2608 lock_type = None 2609 2610 override = self._match_text_seq("OVERRIDE") 2611 2612 return self.expression( 2613 exp.LockingProperty, 2614 this=this, 2615 kind=kind, 2616 for_or_in=for_or_in, 2617 lock_type=lock_type, 2618 override=override, 2619 ) 2620 2621 def _parse_partition_by(self) -> t.List[exp.Expression]: 2622 if self._match(TokenType.PARTITION_BY): 2623 return self._parse_csv(self._parse_assignment) 2624 return [] 2625 2626 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2627 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2628 if self._match_text_seq("MINVALUE"): 2629 return exp.var("MINVALUE") 2630 if self._match_text_seq("MAXVALUE"): 2631 return exp.var("MAXVALUE") 2632 return self._parse_bitwise() 2633 2634 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2635 expression = None 2636 from_expressions = None 2637 to_expressions = None 2638 2639 if self._match(TokenType.IN): 2640 this = self._parse_wrapped_csv(self._parse_bitwise) 2641 elif self._match(TokenType.FROM): 2642 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2643 self._match_text_seq("TO") 2644 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2645 elif self._match_text_seq("WITH", "(", "MODULUS"): 2646 this = self._parse_number() 2647 self._match_text_seq(",", "REMAINDER") 2648 expression = self._parse_number() 2649 self._match_r_paren() 2650 else: 2651 self.raise_error("Failed to parse partition bound spec.") 2652 2653 return self.expression( 2654 exp.PartitionBoundSpec, 2655 this=this, 2656 expression=expression, 2657 from_expressions=from_expressions, 2658 to_expressions=to_expressions, 2659 ) 2660 2661 # https://www.postgresql.org/docs/current/sql-createtable.html 2662 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2663 if not self._match_text_seq("OF"): 2664 self._retreat(self._index - 1) 2665 return None 2666 2667 this = self._parse_table(schema=True) 2668 2669 if self._match(TokenType.DEFAULT): 2670 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2671 elif self._match_text_seq("FOR", "VALUES"): 2672 expression = self._parse_partition_bound_spec() 2673 else: 2674 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2675 2676 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2677 2678 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2679 self._match(TokenType.EQ) 2680 return self.expression( 2681 exp.PartitionedByProperty, 2682 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2683 ) 2684 2685 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2686 if self._match_text_seq("AND", "STATISTICS"): 2687 statistics = True 2688 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2689 statistics = False 2690 else: 2691 statistics = None 2692 2693 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2694 2695 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2696 if self._match_text_seq("SQL"): 2697 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2698 return None 2699 2700 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2701 if self._match_text_seq("SQL", "DATA"): 2702 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2703 return None 2704 2705 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2706 if self._match_text_seq("PRIMARY", "INDEX"): 2707 return exp.NoPrimaryIndexProperty() 2708 if self._match_text_seq("SQL"): 2709 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2710 return None 2711 2712 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2713 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2714 return exp.OnCommitProperty() 2715 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2716 return exp.OnCommitProperty(delete=True) 2717 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2718 2719 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2720 if self._match_text_seq("SQL", "DATA"): 2721 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2722 return None 2723 2724 def _parse_distkey(self) -> exp.DistKeyProperty: 2725 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2726 2727 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2728 table = self._parse_table(schema=True) 2729 2730 options = [] 2731 while self._match_texts(("INCLUDING", "EXCLUDING")): 2732 this = self._prev.text.upper() 2733 2734 id_var = self._parse_id_var() 2735 if not id_var: 2736 return None 2737 2738 options.append( 2739 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2740 ) 2741 2742 return self.expression(exp.LikeProperty, this=table, expressions=options) 2743 2744 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2745 return self.expression( 2746 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2747 ) 2748 2749 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2750 self._match(TokenType.EQ) 2751 return self.expression( 2752 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2753 ) 2754 2755 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2756 self._match_text_seq("WITH", "CONNECTION") 2757 return self.expression( 2758 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2759 ) 2760 2761 def _parse_returns(self) -> exp.ReturnsProperty: 2762 value: t.Optional[exp.Expression] 2763 null = None 2764 is_table = self._match(TokenType.TABLE) 2765 2766 if is_table: 2767 if self._match(TokenType.LT): 2768 value = self.expression( 2769 exp.Schema, 2770 this="TABLE", 2771 expressions=self._parse_csv(self._parse_struct_types), 2772 ) 2773 if not self._match(TokenType.GT): 2774 self.raise_error("Expecting >") 2775 else: 2776 value = self._parse_schema(exp.var("TABLE")) 2777 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2778 null = True 2779 value = None 2780 else: 2781 value = self._parse_types() 2782 2783 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2784 2785 def _parse_describe(self) -> exp.Describe: 2786 kind = self._match_set(self.CREATABLES) and self._prev.text 2787 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2788 if self._match(TokenType.DOT): 2789 style = None 2790 self._retreat(self._index - 2) 2791 2792 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2793 2794 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2795 this = self._parse_statement() 2796 else: 2797 this = self._parse_table(schema=True) 2798 2799 properties = self._parse_properties() 2800 expressions = properties.expressions if properties else None 2801 partition = self._parse_partition() 2802 return self.expression( 2803 exp.Describe, 2804 this=this, 2805 style=style, 2806 kind=kind, 2807 expressions=expressions, 2808 partition=partition, 2809 format=format, 2810 ) 2811 2812 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2813 kind = self._prev.text.upper() 2814 expressions = [] 2815 2816 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2817 if self._match(TokenType.WHEN): 2818 expression = self._parse_disjunction() 2819 self._match(TokenType.THEN) 2820 else: 2821 expression = None 2822 2823 else_ = self._match(TokenType.ELSE) 2824 2825 if not self._match(TokenType.INTO): 2826 return None 2827 2828 return self.expression( 2829 exp.ConditionalInsert, 2830 this=self.expression( 2831 exp.Insert, 2832 this=self._parse_table(schema=True), 2833 expression=self._parse_derived_table_values(), 2834 ), 2835 expression=expression, 2836 else_=else_, 2837 ) 2838 2839 expression = parse_conditional_insert() 2840 while expression is not None: 2841 expressions.append(expression) 2842 expression = parse_conditional_insert() 2843 2844 return self.expression( 2845 exp.MultitableInserts, 2846 kind=kind, 2847 comments=comments, 2848 expressions=expressions, 2849 source=self._parse_table(), 2850 ) 2851 2852 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2853 comments = [] 2854 hint = self._parse_hint() 2855 overwrite = self._match(TokenType.OVERWRITE) 2856 ignore = self._match(TokenType.IGNORE) 2857 local = self._match_text_seq("LOCAL") 2858 alternative = None 2859 is_function = None 2860 2861 if self._match_text_seq("DIRECTORY"): 2862 this: t.Optional[exp.Expression] = self.expression( 2863 exp.Directory, 2864 this=self._parse_var_or_string(), 2865 local=local, 2866 row_format=self._parse_row_format(match_row=True), 2867 ) 2868 else: 2869 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2870 comments += ensure_list(self._prev_comments) 2871 return self._parse_multitable_inserts(comments) 2872 2873 if self._match(TokenType.OR): 2874 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2875 2876 self._match(TokenType.INTO) 2877 comments += ensure_list(self._prev_comments) 2878 self._match(TokenType.TABLE) 2879 is_function = self._match(TokenType.FUNCTION) 2880 2881 this = ( 2882 self._parse_table(schema=True, parse_partition=True) 2883 if not is_function 2884 else self._parse_function() 2885 ) 2886 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2887 this.set("alias", self._parse_table_alias()) 2888 2889 returning = self._parse_returning() 2890 2891 return self.expression( 2892 exp.Insert, 2893 comments=comments, 2894 hint=hint, 2895 is_function=is_function, 2896 this=this, 2897 stored=self._match_text_seq("STORED") and self._parse_stored(), 2898 by_name=self._match_text_seq("BY", "NAME"), 2899 exists=self._parse_exists(), 2900 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2901 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2902 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2903 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2904 conflict=self._parse_on_conflict(), 2905 returning=returning or self._parse_returning(), 2906 overwrite=overwrite, 2907 alternative=alternative, 2908 ignore=ignore, 2909 source=self._match(TokenType.TABLE) and self._parse_table(), 2910 ) 2911 2912 def _parse_kill(self) -> exp.Kill: 2913 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2914 2915 return self.expression( 2916 exp.Kill, 2917 this=self._parse_primary(), 2918 kind=kind, 2919 ) 2920 2921 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2922 conflict = self._match_text_seq("ON", "CONFLICT") 2923 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2924 2925 if not conflict and not duplicate: 2926 return None 2927 2928 conflict_keys = None 2929 constraint = None 2930 2931 if conflict: 2932 if self._match_text_seq("ON", "CONSTRAINT"): 2933 constraint = self._parse_id_var() 2934 elif self._match(TokenType.L_PAREN): 2935 conflict_keys = self._parse_csv(self._parse_id_var) 2936 self._match_r_paren() 2937 2938 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2939 if self._prev.token_type == TokenType.UPDATE: 2940 self._match(TokenType.SET) 2941 expressions = self._parse_csv(self._parse_equality) 2942 else: 2943 expressions = None 2944 2945 return self.expression( 2946 exp.OnConflict, 2947 duplicate=duplicate, 2948 expressions=expressions, 2949 action=action, 2950 conflict_keys=conflict_keys, 2951 constraint=constraint, 2952 where=self._parse_where(), 2953 ) 2954 2955 def _parse_returning(self) -> t.Optional[exp.Returning]: 2956 if not self._match(TokenType.RETURNING): 2957 return None 2958 return self.expression( 2959 exp.Returning, 2960 expressions=self._parse_csv(self._parse_expression), 2961 into=self._match(TokenType.INTO) and self._parse_table_part(), 2962 ) 2963 2964 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2965 if not self._match(TokenType.FORMAT): 2966 return None 2967 return self._parse_row_format() 2968 2969 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2970 index = self._index 2971 with_ = with_ or self._match_text_seq("WITH") 2972 2973 if not self._match(TokenType.SERDE_PROPERTIES): 2974 self._retreat(index) 2975 return None 2976 return self.expression( 2977 exp.SerdeProperties, 2978 **{ # type: ignore 2979 "expressions": self._parse_wrapped_properties(), 2980 "with": with_, 2981 }, 2982 ) 2983 2984 def _parse_row_format( 2985 self, match_row: bool = False 2986 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2987 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2988 return None 2989 2990 if self._match_text_seq("SERDE"): 2991 this = self._parse_string() 2992 2993 serde_properties = self._parse_serde_properties() 2994 2995 return self.expression( 2996 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2997 ) 2998 2999 self._match_text_seq("DELIMITED") 3000 3001 kwargs = {} 3002 3003 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3004 kwargs["fields"] = self._parse_string() 3005 if self._match_text_seq("ESCAPED", "BY"): 3006 kwargs["escaped"] = self._parse_string() 3007 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3008 kwargs["collection_items"] = self._parse_string() 3009 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3010 kwargs["map_keys"] = self._parse_string() 3011 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3012 kwargs["lines"] = self._parse_string() 3013 if self._match_text_seq("NULL", "DEFINED", "AS"): 3014 kwargs["null"] = self._parse_string() 3015 3016 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3017 3018 def _parse_load(self) -> exp.LoadData | exp.Command: 3019 if self._match_text_seq("DATA"): 3020 local = self._match_text_seq("LOCAL") 3021 self._match_text_seq("INPATH") 3022 inpath = self._parse_string() 3023 overwrite = self._match(TokenType.OVERWRITE) 3024 self._match_pair(TokenType.INTO, TokenType.TABLE) 3025 3026 return self.expression( 3027 exp.LoadData, 3028 this=self._parse_table(schema=True), 3029 local=local, 3030 overwrite=overwrite, 3031 inpath=inpath, 3032 partition=self._parse_partition(), 3033 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3034 serde=self._match_text_seq("SERDE") and self._parse_string(), 3035 ) 3036 return self._parse_as_command(self._prev) 3037 3038 def _parse_delete(self) -> exp.Delete: 3039 # This handles MySQL's "Multiple-Table Syntax" 3040 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3041 tables = None 3042 if not self._match(TokenType.FROM, advance=False): 3043 tables = self._parse_csv(self._parse_table) or None 3044 3045 returning = self._parse_returning() 3046 3047 return self.expression( 3048 exp.Delete, 3049 tables=tables, 3050 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3051 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3052 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3053 where=self._parse_where(), 3054 returning=returning or self._parse_returning(), 3055 limit=self._parse_limit(), 3056 ) 3057 3058 def _parse_update(self) -> exp.Update: 3059 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3060 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3061 returning = self._parse_returning() 3062 return self.expression( 3063 exp.Update, 3064 **{ # type: ignore 3065 "this": this, 3066 "expressions": expressions, 3067 "from": self._parse_from(joins=True), 3068 "where": self._parse_where(), 3069 "returning": returning or self._parse_returning(), 3070 "order": self._parse_order(), 3071 "limit": self._parse_limit(), 3072 }, 3073 ) 3074 3075 def _parse_use(self) -> exp.Use: 3076 return self.expression( 3077 exp.Use, 3078 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3079 this=self._parse_table(schema=False), 3080 ) 3081 3082 def _parse_uncache(self) -> exp.Uncache: 3083 if not self._match(TokenType.TABLE): 3084 self.raise_error("Expecting TABLE after UNCACHE") 3085 3086 return self.expression( 3087 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3088 ) 3089 3090 def _parse_cache(self) -> exp.Cache: 3091 lazy = self._match_text_seq("LAZY") 3092 self._match(TokenType.TABLE) 3093 table = self._parse_table(schema=True) 3094 3095 options = [] 3096 if self._match_text_seq("OPTIONS"): 3097 self._match_l_paren() 3098 k = self._parse_string() 3099 self._match(TokenType.EQ) 3100 v = self._parse_string() 3101 options = [k, v] 3102 self._match_r_paren() 3103 3104 self._match(TokenType.ALIAS) 3105 return self.expression( 3106 exp.Cache, 3107 this=table, 3108 lazy=lazy, 3109 options=options, 3110 expression=self._parse_select(nested=True), 3111 ) 3112 3113 def _parse_partition(self) -> t.Optional[exp.Partition]: 3114 if not self._match_texts(self.PARTITION_KEYWORDS): 3115 return None 3116 3117 return self.expression( 3118 exp.Partition, 3119 subpartition=self._prev.text.upper() == "SUBPARTITION", 3120 expressions=self._parse_wrapped_csv(self._parse_assignment), 3121 ) 3122 3123 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3124 def _parse_value_expression() -> t.Optional[exp.Expression]: 3125 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3126 return exp.var(self._prev.text.upper()) 3127 return self._parse_expression() 3128 3129 if self._match(TokenType.L_PAREN): 3130 expressions = self._parse_csv(_parse_value_expression) 3131 self._match_r_paren() 3132 return self.expression(exp.Tuple, expressions=expressions) 3133 3134 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3135 expression = self._parse_expression() 3136 if expression: 3137 return self.expression(exp.Tuple, expressions=[expression]) 3138 return None 3139 3140 def _parse_projections(self) -> t.List[exp.Expression]: 3141 return self._parse_expressions() 3142 3143 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3144 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3145 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3146 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3147 ) 3148 elif self._match(TokenType.FROM): 3149 from_ = self._parse_from(skip_from_token=True) 3150 # Support parentheses for duckdb FROM-first syntax 3151 select = self._parse_select() 3152 if select: 3153 select.set("from", from_) 3154 this = select 3155 else: 3156 this = exp.select("*").from_(t.cast(exp.From, from_)) 3157 else: 3158 this = ( 3159 self._parse_table() 3160 if table 3161 else self._parse_select(nested=True, parse_set_operation=False) 3162 ) 3163 3164 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3165 # in case a modifier (e.g. join) is following 3166 if table and isinstance(this, exp.Values) and this.alias: 3167 alias = this.args["alias"].pop() 3168 this = exp.Table(this=this, alias=alias) 3169 3170 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3171 3172 return this 3173 3174 def _parse_select( 3175 self, 3176 nested: bool = False, 3177 table: bool = False, 3178 parse_subquery_alias: bool = True, 3179 parse_set_operation: bool = True, 3180 ) -> t.Optional[exp.Expression]: 3181 cte = self._parse_with() 3182 3183 if cte: 3184 this = self._parse_statement() 3185 3186 if not this: 3187 self.raise_error("Failed to parse any statement following CTE") 3188 return cte 3189 3190 if "with" in this.arg_types: 3191 this.set("with", cte) 3192 else: 3193 self.raise_error(f"{this.key} does not support CTE") 3194 this = cte 3195 3196 return this 3197 3198 # duckdb supports leading with FROM x 3199 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3200 3201 if self._match(TokenType.SELECT): 3202 comments = self._prev_comments 3203 3204 hint = self._parse_hint() 3205 3206 if self._next and not self._next.token_type == TokenType.DOT: 3207 all_ = self._match(TokenType.ALL) 3208 distinct = self._match_set(self.DISTINCT_TOKENS) 3209 else: 3210 all_, distinct = None, None 3211 3212 kind = ( 3213 self._match(TokenType.ALIAS) 3214 and self._match_texts(("STRUCT", "VALUE")) 3215 and self._prev.text.upper() 3216 ) 3217 3218 if distinct: 3219 distinct = self.expression( 3220 exp.Distinct, 3221 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3222 ) 3223 3224 if all_ and distinct: 3225 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3226 3227 operation_modifiers = [] 3228 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3229 operation_modifiers.append(exp.var(self._prev.text.upper())) 3230 3231 limit = self._parse_limit(top=True) 3232 projections = self._parse_projections() 3233 3234 this = self.expression( 3235 exp.Select, 3236 kind=kind, 3237 hint=hint, 3238 distinct=distinct, 3239 expressions=projections, 3240 limit=limit, 3241 operation_modifiers=operation_modifiers or None, 3242 ) 3243 this.comments = comments 3244 3245 into = self._parse_into() 3246 if into: 3247 this.set("into", into) 3248 3249 if not from_: 3250 from_ = self._parse_from() 3251 3252 if from_: 3253 this.set("from", from_) 3254 3255 this = self._parse_query_modifiers(this) 3256 elif (table or nested) and self._match(TokenType.L_PAREN): 3257 this = self._parse_wrapped_select(table=table) 3258 3259 # We return early here so that the UNION isn't attached to the subquery by the 3260 # following call to _parse_set_operations, but instead becomes the parent node 3261 self._match_r_paren() 3262 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3263 elif self._match(TokenType.VALUES, advance=False): 3264 this = self._parse_derived_table_values() 3265 elif from_: 3266 this = exp.select("*").from_(from_.this, copy=False) 3267 if self._match(TokenType.PIPE_GT, advance=False): 3268 return self._parse_pipe_syntax_query(this) 3269 elif self._match(TokenType.SUMMARIZE): 3270 table = self._match(TokenType.TABLE) 3271 this = self._parse_select() or self._parse_string() or self._parse_table() 3272 return self.expression(exp.Summarize, this=this, table=table) 3273 elif self._match(TokenType.DESCRIBE): 3274 this = self._parse_describe() 3275 elif self._match_text_seq("STREAM"): 3276 this = self._parse_function() 3277 if this: 3278 this = self.expression(exp.Stream, this=this) 3279 else: 3280 self._retreat(self._index - 1) 3281 else: 3282 this = None 3283 3284 return self._parse_set_operations(this) if parse_set_operation else this 3285 3286 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3287 self._match_text_seq("SEARCH") 3288 3289 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3290 3291 if not kind: 3292 return None 3293 3294 self._match_text_seq("FIRST", "BY") 3295 3296 return self.expression( 3297 exp.RecursiveWithSearch, 3298 kind=kind, 3299 this=self._parse_id_var(), 3300 expression=self._match_text_seq("SET") and self._parse_id_var(), 3301 using=self._match_text_seq("USING") and self._parse_id_var(), 3302 ) 3303 3304 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3305 if not skip_with_token and not self._match(TokenType.WITH): 3306 return None 3307 3308 comments = self._prev_comments 3309 recursive = self._match(TokenType.RECURSIVE) 3310 3311 last_comments = None 3312 expressions = [] 3313 while True: 3314 cte = self._parse_cte() 3315 if isinstance(cte, exp.CTE): 3316 expressions.append(cte) 3317 if last_comments: 3318 cte.add_comments(last_comments) 3319 3320 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3321 break 3322 else: 3323 self._match(TokenType.WITH) 3324 3325 last_comments = self._prev_comments 3326 3327 return self.expression( 3328 exp.With, 3329 comments=comments, 3330 expressions=expressions, 3331 recursive=recursive, 3332 search=self._parse_recursive_with_search(), 3333 ) 3334 3335 def _parse_cte(self) -> t.Optional[exp.CTE]: 3336 index = self._index 3337 3338 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3339 if not alias or not alias.this: 3340 self.raise_error("Expected CTE to have alias") 3341 3342 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3343 self._retreat(index) 3344 return None 3345 3346 comments = self._prev_comments 3347 3348 if self._match_text_seq("NOT", "MATERIALIZED"): 3349 materialized = False 3350 elif self._match_text_seq("MATERIALIZED"): 3351 materialized = True 3352 else: 3353 materialized = None 3354 3355 cte = self.expression( 3356 exp.CTE, 3357 this=self._parse_wrapped(self._parse_statement), 3358 alias=alias, 3359 materialized=materialized, 3360 comments=comments, 3361 ) 3362 3363 if isinstance(cte.this, exp.Values): 3364 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3365 3366 return cte 3367 3368 def _parse_table_alias( 3369 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3370 ) -> t.Optional[exp.TableAlias]: 3371 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3372 # so this section tries to parse the clause version and if it fails, it treats the token 3373 # as an identifier (alias) 3374 if self._can_parse_limit_or_offset(): 3375 return None 3376 3377 any_token = self._match(TokenType.ALIAS) 3378 alias = ( 3379 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3380 or self._parse_string_as_identifier() 3381 ) 3382 3383 index = self._index 3384 if self._match(TokenType.L_PAREN): 3385 columns = self._parse_csv(self._parse_function_parameter) 3386 self._match_r_paren() if columns else self._retreat(index) 3387 else: 3388 columns = None 3389 3390 if not alias and not columns: 3391 return None 3392 3393 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3394 3395 # We bubble up comments from the Identifier to the TableAlias 3396 if isinstance(alias, exp.Identifier): 3397 table_alias.add_comments(alias.pop_comments()) 3398 3399 return table_alias 3400 3401 def _parse_subquery( 3402 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3403 ) -> t.Optional[exp.Subquery]: 3404 if not this: 3405 return None 3406 3407 return self.expression( 3408 exp.Subquery, 3409 this=this, 3410 pivots=self._parse_pivots(), 3411 alias=self._parse_table_alias() if parse_alias else None, 3412 sample=self._parse_table_sample(), 3413 ) 3414 3415 def _implicit_unnests_to_explicit(self, this: E) -> E: 3416 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3417 3418 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3419 for i, join in enumerate(this.args.get("joins") or []): 3420 table = join.this 3421 normalized_table = table.copy() 3422 normalized_table.meta["maybe_column"] = True 3423 normalized_table = _norm(normalized_table, dialect=self.dialect) 3424 3425 if isinstance(table, exp.Table) and not join.args.get("on"): 3426 if normalized_table.parts[0].name in refs: 3427 table_as_column = table.to_column() 3428 unnest = exp.Unnest(expressions=[table_as_column]) 3429 3430 # Table.to_column creates a parent Alias node that we want to convert to 3431 # a TableAlias and attach to the Unnest, so it matches the parser's output 3432 if isinstance(table.args.get("alias"), exp.TableAlias): 3433 table_as_column.replace(table_as_column.this) 3434 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3435 3436 table.replace(unnest) 3437 3438 refs.add(normalized_table.alias_or_name) 3439 3440 return this 3441 3442 def _parse_query_modifiers( 3443 self, this: t.Optional[exp.Expression] 3444 ) -> t.Optional[exp.Expression]: 3445 if isinstance(this, self.MODIFIABLES): 3446 for join in self._parse_joins(): 3447 this.append("joins", join) 3448 for lateral in iter(self._parse_lateral, None): 3449 this.append("laterals", lateral) 3450 3451 while True: 3452 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3453 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3454 key, expression = parser(self) 3455 3456 if expression: 3457 this.set(key, expression) 3458 if key == "limit": 3459 offset = expression.args.pop("offset", None) 3460 3461 if offset: 3462 offset = exp.Offset(expression=offset) 3463 this.set("offset", offset) 3464 3465 limit_by_expressions = expression.expressions 3466 expression.set("expressions", None) 3467 offset.set("expressions", limit_by_expressions) 3468 continue 3469 break 3470 3471 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3472 this = self._implicit_unnests_to_explicit(this) 3473 3474 return this 3475 3476 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3477 start = self._curr 3478 while self._curr: 3479 self._advance() 3480 3481 end = self._tokens[self._index - 1] 3482 return exp.Hint(expressions=[self._find_sql(start, end)]) 3483 3484 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3485 return self._parse_function_call() 3486 3487 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3488 start_index = self._index 3489 should_fallback_to_string = False 3490 3491 hints = [] 3492 try: 3493 for hint in iter( 3494 lambda: self._parse_csv( 3495 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3496 ), 3497 [], 3498 ): 3499 hints.extend(hint) 3500 except ParseError: 3501 should_fallback_to_string = True 3502 3503 if should_fallback_to_string or self._curr: 3504 self._retreat(start_index) 3505 return self._parse_hint_fallback_to_string() 3506 3507 return self.expression(exp.Hint, expressions=hints) 3508 3509 def _parse_hint(self) -> t.Optional[exp.Hint]: 3510 if self._match(TokenType.HINT) and self._prev_comments: 3511 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3512 3513 return None 3514 3515 def _parse_into(self) -> t.Optional[exp.Into]: 3516 if not self._match(TokenType.INTO): 3517 return None 3518 3519 temp = self._match(TokenType.TEMPORARY) 3520 unlogged = self._match_text_seq("UNLOGGED") 3521 self._match(TokenType.TABLE) 3522 3523 return self.expression( 3524 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3525 ) 3526 3527 def _parse_from( 3528 self, joins: bool = False, skip_from_token: bool = False 3529 ) -> t.Optional[exp.From]: 3530 if not skip_from_token and not self._match(TokenType.FROM): 3531 return None 3532 3533 return self.expression( 3534 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3535 ) 3536 3537 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3538 return self.expression( 3539 exp.MatchRecognizeMeasure, 3540 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3541 this=self._parse_expression(), 3542 ) 3543 3544 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3545 if not self._match(TokenType.MATCH_RECOGNIZE): 3546 return None 3547 3548 self._match_l_paren() 3549 3550 partition = self._parse_partition_by() 3551 order = self._parse_order() 3552 3553 measures = ( 3554 self._parse_csv(self._parse_match_recognize_measure) 3555 if self._match_text_seq("MEASURES") 3556 else None 3557 ) 3558 3559 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3560 rows = exp.var("ONE ROW PER MATCH") 3561 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3562 text = "ALL ROWS PER MATCH" 3563 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3564 text += " SHOW EMPTY MATCHES" 3565 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3566 text += " OMIT EMPTY MATCHES" 3567 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3568 text += " WITH UNMATCHED ROWS" 3569 rows = exp.var(text) 3570 else: 3571 rows = None 3572 3573 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3574 text = "AFTER MATCH SKIP" 3575 if self._match_text_seq("PAST", "LAST", "ROW"): 3576 text += " PAST LAST ROW" 3577 elif self._match_text_seq("TO", "NEXT", "ROW"): 3578 text += " TO NEXT ROW" 3579 elif self._match_text_seq("TO", "FIRST"): 3580 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3581 elif self._match_text_seq("TO", "LAST"): 3582 text += f" TO LAST {self._advance_any().text}" # type: ignore 3583 after = exp.var(text) 3584 else: 3585 after = None 3586 3587 if self._match_text_seq("PATTERN"): 3588 self._match_l_paren() 3589 3590 if not self._curr: 3591 self.raise_error("Expecting )", self._curr) 3592 3593 paren = 1 3594 start = self._curr 3595 3596 while self._curr and paren > 0: 3597 if self._curr.token_type == TokenType.L_PAREN: 3598 paren += 1 3599 if self._curr.token_type == TokenType.R_PAREN: 3600 paren -= 1 3601 3602 end = self._prev 3603 self._advance() 3604 3605 if paren > 0: 3606 self.raise_error("Expecting )", self._curr) 3607 3608 pattern = exp.var(self._find_sql(start, end)) 3609 else: 3610 pattern = None 3611 3612 define = ( 3613 self._parse_csv(self._parse_name_as_expression) 3614 if self._match_text_seq("DEFINE") 3615 else None 3616 ) 3617 3618 self._match_r_paren() 3619 3620 return self.expression( 3621 exp.MatchRecognize, 3622 partition_by=partition, 3623 order=order, 3624 measures=measures, 3625 rows=rows, 3626 after=after, 3627 pattern=pattern, 3628 define=define, 3629 alias=self._parse_table_alias(), 3630 ) 3631 3632 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3633 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3634 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3635 cross_apply = False 3636 3637 if cross_apply is not None: 3638 this = self._parse_select(table=True) 3639 view = None 3640 outer = None 3641 elif self._match(TokenType.LATERAL): 3642 this = self._parse_select(table=True) 3643 view = self._match(TokenType.VIEW) 3644 outer = self._match(TokenType.OUTER) 3645 else: 3646 return None 3647 3648 if not this: 3649 this = ( 3650 self._parse_unnest() 3651 or self._parse_function() 3652 or self._parse_id_var(any_token=False) 3653 ) 3654 3655 while self._match(TokenType.DOT): 3656 this = exp.Dot( 3657 this=this, 3658 expression=self._parse_function() or self._parse_id_var(any_token=False), 3659 ) 3660 3661 ordinality: t.Optional[bool] = None 3662 3663 if view: 3664 table = self._parse_id_var(any_token=False) 3665 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3666 table_alias: t.Optional[exp.TableAlias] = self.expression( 3667 exp.TableAlias, this=table, columns=columns 3668 ) 3669 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3670 # We move the alias from the lateral's child node to the lateral itself 3671 table_alias = this.args["alias"].pop() 3672 else: 3673 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3674 table_alias = self._parse_table_alias() 3675 3676 return self.expression( 3677 exp.Lateral, 3678 this=this, 3679 view=view, 3680 outer=outer, 3681 alias=table_alias, 3682 cross_apply=cross_apply, 3683 ordinality=ordinality, 3684 ) 3685 3686 def _parse_join_parts( 3687 self, 3688 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3689 return ( 3690 self._match_set(self.JOIN_METHODS) and self._prev, 3691 self._match_set(self.JOIN_SIDES) and self._prev, 3692 self._match_set(self.JOIN_KINDS) and self._prev, 3693 ) 3694 3695 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3696 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3697 this = self._parse_column() 3698 if isinstance(this, exp.Column): 3699 return this.this 3700 return this 3701 3702 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3703 3704 def _parse_join( 3705 self, skip_join_token: bool = False, parse_bracket: bool = False 3706 ) -> t.Optional[exp.Join]: 3707 if self._match(TokenType.COMMA): 3708 table = self._try_parse(self._parse_table) 3709 if table: 3710 return self.expression(exp.Join, this=table) 3711 return None 3712 3713 index = self._index 3714 method, side, kind = self._parse_join_parts() 3715 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3716 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3717 3718 if not skip_join_token and not join: 3719 self._retreat(index) 3720 kind = None 3721 method = None 3722 side = None 3723 3724 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3725 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3726 3727 if not skip_join_token and not join and not outer_apply and not cross_apply: 3728 return None 3729 3730 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3731 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3732 kwargs["expressions"] = self._parse_csv( 3733 lambda: self._parse_table(parse_bracket=parse_bracket) 3734 ) 3735 3736 if method: 3737 kwargs["method"] = method.text 3738 if side: 3739 kwargs["side"] = side.text 3740 if kind: 3741 kwargs["kind"] = kind.text 3742 if hint: 3743 kwargs["hint"] = hint 3744 3745 if self._match(TokenType.MATCH_CONDITION): 3746 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3747 3748 if self._match(TokenType.ON): 3749 kwargs["on"] = self._parse_assignment() 3750 elif self._match(TokenType.USING): 3751 kwargs["using"] = self._parse_using_identifiers() 3752 elif ( 3753 not (outer_apply or cross_apply) 3754 and not isinstance(kwargs["this"], exp.Unnest) 3755 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3756 ): 3757 index = self._index 3758 joins: t.Optional[list] = list(self._parse_joins()) 3759 3760 if joins and self._match(TokenType.ON): 3761 kwargs["on"] = self._parse_assignment() 3762 elif joins and self._match(TokenType.USING): 3763 kwargs["using"] = self._parse_using_identifiers() 3764 else: 3765 joins = None 3766 self._retreat(index) 3767 3768 kwargs["this"].set("joins", joins if joins else None) 3769 3770 kwargs["pivots"] = self._parse_pivots() 3771 3772 comments = [c for token in (method, side, kind) if token for c in token.comments] 3773 return self.expression(exp.Join, comments=comments, **kwargs) 3774 3775 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3776 this = self._parse_assignment() 3777 3778 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3779 return this 3780 3781 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3782 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3783 3784 return this 3785 3786 def _parse_index_params(self) -> exp.IndexParameters: 3787 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3788 3789 if self._match(TokenType.L_PAREN, advance=False): 3790 columns = self._parse_wrapped_csv(self._parse_with_operator) 3791 else: 3792 columns = None 3793 3794 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3795 partition_by = self._parse_partition_by() 3796 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3797 tablespace = ( 3798 self._parse_var(any_token=True) 3799 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3800 else None 3801 ) 3802 where = self._parse_where() 3803 3804 on = self._parse_field() if self._match(TokenType.ON) else None 3805 3806 return self.expression( 3807 exp.IndexParameters, 3808 using=using, 3809 columns=columns, 3810 include=include, 3811 partition_by=partition_by, 3812 where=where, 3813 with_storage=with_storage, 3814 tablespace=tablespace, 3815 on=on, 3816 ) 3817 3818 def _parse_index( 3819 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3820 ) -> t.Optional[exp.Index]: 3821 if index or anonymous: 3822 unique = None 3823 primary = None 3824 amp = None 3825 3826 self._match(TokenType.ON) 3827 self._match(TokenType.TABLE) # hive 3828 table = self._parse_table_parts(schema=True) 3829 else: 3830 unique = self._match(TokenType.UNIQUE) 3831 primary = self._match_text_seq("PRIMARY") 3832 amp = self._match_text_seq("AMP") 3833 3834 if not self._match(TokenType.INDEX): 3835 return None 3836 3837 index = self._parse_id_var() 3838 table = None 3839 3840 params = self._parse_index_params() 3841 3842 return self.expression( 3843 exp.Index, 3844 this=index, 3845 table=table, 3846 unique=unique, 3847 primary=primary, 3848 amp=amp, 3849 params=params, 3850 ) 3851 3852 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3853 hints: t.List[exp.Expression] = [] 3854 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3855 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3856 hints.append( 3857 self.expression( 3858 exp.WithTableHint, 3859 expressions=self._parse_csv( 3860 lambda: self._parse_function() or self._parse_var(any_token=True) 3861 ), 3862 ) 3863 ) 3864 self._match_r_paren() 3865 else: 3866 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3867 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3868 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3869 3870 self._match_set((TokenType.INDEX, TokenType.KEY)) 3871 if self._match(TokenType.FOR): 3872 hint.set("target", self._advance_any() and self._prev.text.upper()) 3873 3874 hint.set("expressions", self._parse_wrapped_id_vars()) 3875 hints.append(hint) 3876 3877 return hints or None 3878 3879 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3880 return ( 3881 (not schema and self._parse_function(optional_parens=False)) 3882 or self._parse_id_var(any_token=False) 3883 or self._parse_string_as_identifier() 3884 or self._parse_placeholder() 3885 ) 3886 3887 def _parse_table_parts( 3888 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3889 ) -> exp.Table: 3890 catalog = None 3891 db = None 3892 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3893 3894 while self._match(TokenType.DOT): 3895 if catalog: 3896 # This allows nesting the table in arbitrarily many dot expressions if needed 3897 table = self.expression( 3898 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3899 ) 3900 else: 3901 catalog = db 3902 db = table 3903 # "" used for tsql FROM a..b case 3904 table = self._parse_table_part(schema=schema) or "" 3905 3906 if ( 3907 wildcard 3908 and self._is_connected() 3909 and (isinstance(table, exp.Identifier) or not table) 3910 and self._match(TokenType.STAR) 3911 ): 3912 if isinstance(table, exp.Identifier): 3913 table.args["this"] += "*" 3914 else: 3915 table = exp.Identifier(this="*") 3916 3917 # We bubble up comments from the Identifier to the Table 3918 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3919 3920 if is_db_reference: 3921 catalog = db 3922 db = table 3923 table = None 3924 3925 if not table and not is_db_reference: 3926 self.raise_error(f"Expected table name but got {self._curr}") 3927 if not db and is_db_reference: 3928 self.raise_error(f"Expected database name but got {self._curr}") 3929 3930 table = self.expression( 3931 exp.Table, 3932 comments=comments, 3933 this=table, 3934 db=db, 3935 catalog=catalog, 3936 ) 3937 3938 changes = self._parse_changes() 3939 if changes: 3940 table.set("changes", changes) 3941 3942 at_before = self._parse_historical_data() 3943 if at_before: 3944 table.set("when", at_before) 3945 3946 pivots = self._parse_pivots() 3947 if pivots: 3948 table.set("pivots", pivots) 3949 3950 return table 3951 3952 def _parse_table( 3953 self, 3954 schema: bool = False, 3955 joins: bool = False, 3956 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3957 parse_bracket: bool = False, 3958 is_db_reference: bool = False, 3959 parse_partition: bool = False, 3960 ) -> t.Optional[exp.Expression]: 3961 lateral = self._parse_lateral() 3962 if lateral: 3963 return lateral 3964 3965 unnest = self._parse_unnest() 3966 if unnest: 3967 return unnest 3968 3969 values = self._parse_derived_table_values() 3970 if values: 3971 return values 3972 3973 subquery = self._parse_select(table=True) 3974 if subquery: 3975 if not subquery.args.get("pivots"): 3976 subquery.set("pivots", self._parse_pivots()) 3977 return subquery 3978 3979 bracket = parse_bracket and self._parse_bracket(None) 3980 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3981 3982 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3983 self._parse_table 3984 ) 3985 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3986 3987 only = self._match(TokenType.ONLY) 3988 3989 this = t.cast( 3990 exp.Expression, 3991 bracket 3992 or rows_from 3993 or self._parse_bracket( 3994 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3995 ), 3996 ) 3997 3998 if only: 3999 this.set("only", only) 4000 4001 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4002 self._match_text_seq("*") 4003 4004 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4005 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4006 this.set("partition", self._parse_partition()) 4007 4008 if schema: 4009 return self._parse_schema(this=this) 4010 4011 version = self._parse_version() 4012 4013 if version: 4014 this.set("version", version) 4015 4016 if self.dialect.ALIAS_POST_TABLESAMPLE: 4017 this.set("sample", self._parse_table_sample()) 4018 4019 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4020 if alias: 4021 this.set("alias", alias) 4022 4023 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4024 return self.expression( 4025 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4026 ) 4027 4028 this.set("hints", self._parse_table_hints()) 4029 4030 if not this.args.get("pivots"): 4031 this.set("pivots", self._parse_pivots()) 4032 4033 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4034 this.set("sample", self._parse_table_sample()) 4035 4036 if joins: 4037 for join in self._parse_joins(): 4038 this.append("joins", join) 4039 4040 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4041 this.set("ordinality", True) 4042 this.set("alias", self._parse_table_alias()) 4043 4044 return this 4045 4046 def _parse_version(self) -> t.Optional[exp.Version]: 4047 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4048 this = "TIMESTAMP" 4049 elif self._match(TokenType.VERSION_SNAPSHOT): 4050 this = "VERSION" 4051 else: 4052 return None 4053 4054 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4055 kind = self._prev.text.upper() 4056 start = self._parse_bitwise() 4057 self._match_texts(("TO", "AND")) 4058 end = self._parse_bitwise() 4059 expression: t.Optional[exp.Expression] = self.expression( 4060 exp.Tuple, expressions=[start, end] 4061 ) 4062 elif self._match_text_seq("CONTAINED", "IN"): 4063 kind = "CONTAINED IN" 4064 expression = self.expression( 4065 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4066 ) 4067 elif self._match(TokenType.ALL): 4068 kind = "ALL" 4069 expression = None 4070 else: 4071 self._match_text_seq("AS", "OF") 4072 kind = "AS OF" 4073 expression = self._parse_type() 4074 4075 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4076 4077 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4078 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4079 index = self._index 4080 historical_data = None 4081 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4082 this = self._prev.text.upper() 4083 kind = ( 4084 self._match(TokenType.L_PAREN) 4085 and self._match_texts(self.HISTORICAL_DATA_KIND) 4086 and self._prev.text.upper() 4087 ) 4088 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4089 4090 if expression: 4091 self._match_r_paren() 4092 historical_data = self.expression( 4093 exp.HistoricalData, this=this, kind=kind, expression=expression 4094 ) 4095 else: 4096 self._retreat(index) 4097 4098 return historical_data 4099 4100 def _parse_changes(self) -> t.Optional[exp.Changes]: 4101 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4102 return None 4103 4104 information = self._parse_var(any_token=True) 4105 self._match_r_paren() 4106 4107 return self.expression( 4108 exp.Changes, 4109 information=information, 4110 at_before=self._parse_historical_data(), 4111 end=self._parse_historical_data(), 4112 ) 4113 4114 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4115 if not self._match(TokenType.UNNEST): 4116 return None 4117 4118 expressions = self._parse_wrapped_csv(self._parse_equality) 4119 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4120 4121 alias = self._parse_table_alias() if with_alias else None 4122 4123 if alias: 4124 if self.dialect.UNNEST_COLUMN_ONLY: 4125 if alias.args.get("columns"): 4126 self.raise_error("Unexpected extra column alias in unnest.") 4127 4128 alias.set("columns", [alias.this]) 4129 alias.set("this", None) 4130 4131 columns = alias.args.get("columns") or [] 4132 if offset and len(expressions) < len(columns): 4133 offset = columns.pop() 4134 4135 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4136 self._match(TokenType.ALIAS) 4137 offset = self._parse_id_var( 4138 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4139 ) or exp.to_identifier("offset") 4140 4141 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4142 4143 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4144 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4145 if not is_derived and not ( 4146 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4147 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4148 ): 4149 return None 4150 4151 expressions = self._parse_csv(self._parse_value) 4152 alias = self._parse_table_alias() 4153 4154 if is_derived: 4155 self._match_r_paren() 4156 4157 return self.expression( 4158 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4159 ) 4160 4161 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4162 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4163 as_modifier and self._match_text_seq("USING", "SAMPLE") 4164 ): 4165 return None 4166 4167 bucket_numerator = None 4168 bucket_denominator = None 4169 bucket_field = None 4170 percent = None 4171 size = None 4172 seed = None 4173 4174 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4175 matched_l_paren = self._match(TokenType.L_PAREN) 4176 4177 if self.TABLESAMPLE_CSV: 4178 num = None 4179 expressions = self._parse_csv(self._parse_primary) 4180 else: 4181 expressions = None 4182 num = ( 4183 self._parse_factor() 4184 if self._match(TokenType.NUMBER, advance=False) 4185 else self._parse_primary() or self._parse_placeholder() 4186 ) 4187 4188 if self._match_text_seq("BUCKET"): 4189 bucket_numerator = self._parse_number() 4190 self._match_text_seq("OUT", "OF") 4191 bucket_denominator = bucket_denominator = self._parse_number() 4192 self._match(TokenType.ON) 4193 bucket_field = self._parse_field() 4194 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4195 percent = num 4196 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4197 size = num 4198 else: 4199 percent = num 4200 4201 if matched_l_paren: 4202 self._match_r_paren() 4203 4204 if self._match(TokenType.L_PAREN): 4205 method = self._parse_var(upper=True) 4206 seed = self._match(TokenType.COMMA) and self._parse_number() 4207 self._match_r_paren() 4208 elif self._match_texts(("SEED", "REPEATABLE")): 4209 seed = self._parse_wrapped(self._parse_number) 4210 4211 if not method and self.DEFAULT_SAMPLING_METHOD: 4212 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4213 4214 return self.expression( 4215 exp.TableSample, 4216 expressions=expressions, 4217 method=method, 4218 bucket_numerator=bucket_numerator, 4219 bucket_denominator=bucket_denominator, 4220 bucket_field=bucket_field, 4221 percent=percent, 4222 size=size, 4223 seed=seed, 4224 ) 4225 4226 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4227 return list(iter(self._parse_pivot, None)) or None 4228 4229 def _parse_joins(self) -> t.Iterator[exp.Join]: 4230 return iter(self._parse_join, None) 4231 4232 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4233 if not self._match(TokenType.INTO): 4234 return None 4235 4236 return self.expression( 4237 exp.UnpivotColumns, 4238 this=self._match_text_seq("NAME") and self._parse_column(), 4239 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4240 ) 4241 4242 # https://duckdb.org/docs/sql/statements/pivot 4243 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4244 def _parse_on() -> t.Optional[exp.Expression]: 4245 this = self._parse_bitwise() 4246 4247 if self._match(TokenType.IN): 4248 # PIVOT ... ON col IN (row_val1, row_val2) 4249 return self._parse_in(this) 4250 if self._match(TokenType.ALIAS, advance=False): 4251 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4252 return self._parse_alias(this) 4253 4254 return this 4255 4256 this = self._parse_table() 4257 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4258 into = self._parse_unpivot_columns() 4259 using = self._match(TokenType.USING) and self._parse_csv( 4260 lambda: self._parse_alias(self._parse_function()) 4261 ) 4262 group = self._parse_group() 4263 4264 return self.expression( 4265 exp.Pivot, 4266 this=this, 4267 expressions=expressions, 4268 using=using, 4269 group=group, 4270 unpivot=is_unpivot, 4271 into=into, 4272 ) 4273 4274 def _parse_pivot_in(self) -> exp.In: 4275 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4276 this = self._parse_select_or_expression() 4277 4278 self._match(TokenType.ALIAS) 4279 alias = self._parse_bitwise() 4280 if alias: 4281 if isinstance(alias, exp.Column) and not alias.db: 4282 alias = alias.this 4283 return self.expression(exp.PivotAlias, this=this, alias=alias) 4284 4285 return this 4286 4287 value = self._parse_column() 4288 4289 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4290 self.raise_error("Expecting IN (") 4291 4292 if self._match(TokenType.ANY): 4293 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4294 else: 4295 exprs = self._parse_csv(_parse_aliased_expression) 4296 4297 self._match_r_paren() 4298 return self.expression(exp.In, this=value, expressions=exprs) 4299 4300 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4301 index = self._index 4302 include_nulls = None 4303 4304 if self._match(TokenType.PIVOT): 4305 unpivot = False 4306 elif self._match(TokenType.UNPIVOT): 4307 unpivot = True 4308 4309 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4310 if self._match_text_seq("INCLUDE", "NULLS"): 4311 include_nulls = True 4312 elif self._match_text_seq("EXCLUDE", "NULLS"): 4313 include_nulls = False 4314 else: 4315 return None 4316 4317 expressions = [] 4318 4319 if not self._match(TokenType.L_PAREN): 4320 self._retreat(index) 4321 return None 4322 4323 if unpivot: 4324 expressions = self._parse_csv(self._parse_column) 4325 else: 4326 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4327 4328 if not expressions: 4329 self.raise_error("Failed to parse PIVOT's aggregation list") 4330 4331 if not self._match(TokenType.FOR): 4332 self.raise_error("Expecting FOR") 4333 4334 fields = [] 4335 while True: 4336 field = self._try_parse(self._parse_pivot_in) 4337 if not field: 4338 break 4339 fields.append(field) 4340 4341 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4342 self._parse_bitwise 4343 ) 4344 4345 group = self._parse_group() 4346 4347 self._match_r_paren() 4348 4349 pivot = self.expression( 4350 exp.Pivot, 4351 expressions=expressions, 4352 fields=fields, 4353 unpivot=unpivot, 4354 include_nulls=include_nulls, 4355 default_on_null=default_on_null, 4356 group=group, 4357 ) 4358 4359 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4360 pivot.set("alias", self._parse_table_alias()) 4361 4362 if not unpivot: 4363 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4364 4365 columns: t.List[exp.Expression] = [] 4366 all_fields = [] 4367 for pivot_field in pivot.fields: 4368 pivot_field_expressions = pivot_field.expressions 4369 4370 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4371 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4372 continue 4373 4374 all_fields.append( 4375 [ 4376 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4377 for fld in pivot_field_expressions 4378 ] 4379 ) 4380 4381 if all_fields: 4382 if names: 4383 all_fields.append(names) 4384 4385 # Generate all possible combinations of the pivot columns 4386 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4387 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4388 for fld_parts_tuple in itertools.product(*all_fields): 4389 fld_parts = list(fld_parts_tuple) 4390 4391 if names and self.PREFIXED_PIVOT_COLUMNS: 4392 # Move the "name" to the front of the list 4393 fld_parts.insert(0, fld_parts.pop(-1)) 4394 4395 columns.append(exp.to_identifier("_".join(fld_parts))) 4396 4397 pivot.set("columns", columns) 4398 4399 return pivot 4400 4401 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4402 return [agg.alias for agg in aggregations if agg.alias] 4403 4404 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4405 if not skip_where_token and not self._match(TokenType.PREWHERE): 4406 return None 4407 4408 return self.expression( 4409 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4410 ) 4411 4412 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4413 if not skip_where_token and not self._match(TokenType.WHERE): 4414 return None 4415 4416 return self.expression( 4417 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4418 ) 4419 4420 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4421 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4422 return None 4423 4424 elements: t.Dict[str, t.Any] = defaultdict(list) 4425 4426 if self._match(TokenType.ALL): 4427 elements["all"] = True 4428 elif self._match(TokenType.DISTINCT): 4429 elements["all"] = False 4430 4431 while True: 4432 index = self._index 4433 4434 elements["expressions"].extend( 4435 self._parse_csv( 4436 lambda: None 4437 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4438 else self._parse_assignment() 4439 ) 4440 ) 4441 4442 before_with_index = self._index 4443 with_prefix = self._match(TokenType.WITH) 4444 4445 if self._match(TokenType.ROLLUP): 4446 elements["rollup"].append( 4447 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4448 ) 4449 elif self._match(TokenType.CUBE): 4450 elements["cube"].append( 4451 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4452 ) 4453 elif self._match(TokenType.GROUPING_SETS): 4454 elements["grouping_sets"].append( 4455 self.expression( 4456 exp.GroupingSets, 4457 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4458 ) 4459 ) 4460 elif self._match_text_seq("TOTALS"): 4461 elements["totals"] = True # type: ignore 4462 4463 if before_with_index <= self._index <= before_with_index + 1: 4464 self._retreat(before_with_index) 4465 break 4466 4467 if index == self._index: 4468 break 4469 4470 return self.expression(exp.Group, **elements) # type: ignore 4471 4472 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4473 return self.expression( 4474 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4475 ) 4476 4477 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4478 if self._match(TokenType.L_PAREN): 4479 grouping_set = self._parse_csv(self._parse_column) 4480 self._match_r_paren() 4481 return self.expression(exp.Tuple, expressions=grouping_set) 4482 4483 return self._parse_column() 4484 4485 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4486 if not skip_having_token and not self._match(TokenType.HAVING): 4487 return None 4488 return self.expression(exp.Having, this=self._parse_assignment()) 4489 4490 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4491 if not self._match(TokenType.QUALIFY): 4492 return None 4493 return self.expression(exp.Qualify, this=self._parse_assignment()) 4494 4495 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4496 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4497 exp.Prior, this=self._parse_bitwise() 4498 ) 4499 connect = self._parse_assignment() 4500 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4501 return connect 4502 4503 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4504 if skip_start_token: 4505 start = None 4506 elif self._match(TokenType.START_WITH): 4507 start = self._parse_assignment() 4508 else: 4509 return None 4510 4511 self._match(TokenType.CONNECT_BY) 4512 nocycle = self._match_text_seq("NOCYCLE") 4513 connect = self._parse_connect_with_prior() 4514 4515 if not start and self._match(TokenType.START_WITH): 4516 start = self._parse_assignment() 4517 4518 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4519 4520 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4521 this = self._parse_id_var(any_token=True) 4522 if self._match(TokenType.ALIAS): 4523 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4524 return this 4525 4526 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4527 if self._match_text_seq("INTERPOLATE"): 4528 return self._parse_wrapped_csv(self._parse_name_as_expression) 4529 return None 4530 4531 def _parse_order( 4532 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4533 ) -> t.Optional[exp.Expression]: 4534 siblings = None 4535 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4536 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4537 return this 4538 4539 siblings = True 4540 4541 return self.expression( 4542 exp.Order, 4543 this=this, 4544 expressions=self._parse_csv(self._parse_ordered), 4545 siblings=siblings, 4546 ) 4547 4548 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4549 if not self._match(token): 4550 return None 4551 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4552 4553 def _parse_ordered( 4554 self, parse_method: t.Optional[t.Callable] = None 4555 ) -> t.Optional[exp.Ordered]: 4556 this = parse_method() if parse_method else self._parse_assignment() 4557 if not this: 4558 return None 4559 4560 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4561 this = exp.var("ALL") 4562 4563 asc = self._match(TokenType.ASC) 4564 desc = self._match(TokenType.DESC) or (asc and False) 4565 4566 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4567 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4568 4569 nulls_first = is_nulls_first or False 4570 explicitly_null_ordered = is_nulls_first or is_nulls_last 4571 4572 if ( 4573 not explicitly_null_ordered 4574 and ( 4575 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4576 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4577 ) 4578 and self.dialect.NULL_ORDERING != "nulls_are_last" 4579 ): 4580 nulls_first = True 4581 4582 if self._match_text_seq("WITH", "FILL"): 4583 with_fill = self.expression( 4584 exp.WithFill, 4585 **{ # type: ignore 4586 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4587 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4588 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4589 "interpolate": self._parse_interpolate(), 4590 }, 4591 ) 4592 else: 4593 with_fill = None 4594 4595 return self.expression( 4596 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4597 ) 4598 4599 def _parse_limit_options(self) -> exp.LimitOptions: 4600 percent = self._match(TokenType.PERCENT) 4601 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4602 self._match_text_seq("ONLY") 4603 with_ties = self._match_text_seq("WITH", "TIES") 4604 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4605 4606 def _parse_limit( 4607 self, 4608 this: t.Optional[exp.Expression] = None, 4609 top: bool = False, 4610 skip_limit_token: bool = False, 4611 ) -> t.Optional[exp.Expression]: 4612 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4613 comments = self._prev_comments 4614 if top: 4615 limit_paren = self._match(TokenType.L_PAREN) 4616 expression = self._parse_term() if limit_paren else self._parse_number() 4617 4618 if limit_paren: 4619 self._match_r_paren() 4620 4621 limit_options = self._parse_limit_options() 4622 else: 4623 limit_options = None 4624 expression = self._parse_term() 4625 4626 if self._match(TokenType.COMMA): 4627 offset = expression 4628 expression = self._parse_term() 4629 else: 4630 offset = None 4631 4632 limit_exp = self.expression( 4633 exp.Limit, 4634 this=this, 4635 expression=expression, 4636 offset=offset, 4637 comments=comments, 4638 limit_options=limit_options, 4639 expressions=self._parse_limit_by(), 4640 ) 4641 4642 return limit_exp 4643 4644 if self._match(TokenType.FETCH): 4645 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4646 direction = self._prev.text.upper() if direction else "FIRST" 4647 4648 count = self._parse_field(tokens=self.FETCH_TOKENS) 4649 4650 return self.expression( 4651 exp.Fetch, 4652 direction=direction, 4653 count=count, 4654 limit_options=self._parse_limit_options(), 4655 ) 4656 4657 return this 4658 4659 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4660 if not self._match(TokenType.OFFSET): 4661 return this 4662 4663 count = self._parse_term() 4664 self._match_set((TokenType.ROW, TokenType.ROWS)) 4665 4666 return self.expression( 4667 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4668 ) 4669 4670 def _can_parse_limit_or_offset(self) -> bool: 4671 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4672 return False 4673 4674 index = self._index 4675 result = bool( 4676 self._try_parse(self._parse_limit, retreat=True) 4677 or self._try_parse(self._parse_offset, retreat=True) 4678 ) 4679 self._retreat(index) 4680 return result 4681 4682 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4683 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4684 4685 def _parse_locks(self) -> t.List[exp.Lock]: 4686 locks = [] 4687 while True: 4688 if self._match_text_seq("FOR", "UPDATE"): 4689 update = True 4690 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4691 "LOCK", "IN", "SHARE", "MODE" 4692 ): 4693 update = False 4694 else: 4695 break 4696 4697 expressions = None 4698 if self._match_text_seq("OF"): 4699 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4700 4701 wait: t.Optional[bool | exp.Expression] = None 4702 if self._match_text_seq("NOWAIT"): 4703 wait = True 4704 elif self._match_text_seq("WAIT"): 4705 wait = self._parse_primary() 4706 elif self._match_text_seq("SKIP", "LOCKED"): 4707 wait = False 4708 4709 locks.append( 4710 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4711 ) 4712 4713 return locks 4714 4715 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4716 start = self._index 4717 _, side_token, kind_token = self._parse_join_parts() 4718 4719 side = side_token.text if side_token else None 4720 kind = kind_token.text if kind_token else None 4721 4722 if not self._match_set(self.SET_OPERATIONS): 4723 self._retreat(start) 4724 return None 4725 4726 token_type = self._prev.token_type 4727 4728 if token_type == TokenType.UNION: 4729 operation: t.Type[exp.SetOperation] = exp.Union 4730 elif token_type == TokenType.EXCEPT: 4731 operation = exp.Except 4732 else: 4733 operation = exp.Intersect 4734 4735 comments = self._prev.comments 4736 4737 if self._match(TokenType.DISTINCT): 4738 distinct: t.Optional[bool] = True 4739 elif self._match(TokenType.ALL): 4740 distinct = False 4741 else: 4742 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4743 if distinct is None: 4744 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4745 4746 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4747 "STRICT", "CORRESPONDING" 4748 ) 4749 if self._match_text_seq("CORRESPONDING"): 4750 by_name = True 4751 if not side and not kind: 4752 kind = "INNER" 4753 4754 on_column_list = None 4755 if by_name and self._match_texts(("ON", "BY")): 4756 on_column_list = self._parse_wrapped_csv(self._parse_column) 4757 4758 expression = self._parse_select(nested=True, parse_set_operation=False) 4759 4760 return self.expression( 4761 operation, 4762 comments=comments, 4763 this=this, 4764 distinct=distinct, 4765 by_name=by_name, 4766 expression=expression, 4767 side=side, 4768 kind=kind, 4769 on=on_column_list, 4770 ) 4771 4772 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4773 while this: 4774 setop = self.parse_set_operation(this) 4775 if not setop: 4776 break 4777 this = setop 4778 4779 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4780 expression = this.expression 4781 4782 if expression: 4783 for arg in self.SET_OP_MODIFIERS: 4784 expr = expression.args.get(arg) 4785 if expr: 4786 this.set(arg, expr.pop()) 4787 4788 return this 4789 4790 def _parse_expression(self) -> t.Optional[exp.Expression]: 4791 return self._parse_alias(self._parse_assignment()) 4792 4793 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4794 this = self._parse_disjunction() 4795 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4796 # This allows us to parse <non-identifier token> := <expr> 4797 this = exp.column( 4798 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4799 ) 4800 4801 while self._match_set(self.ASSIGNMENT): 4802 if isinstance(this, exp.Column) and len(this.parts) == 1: 4803 this = this.this 4804 4805 this = self.expression( 4806 self.ASSIGNMENT[self._prev.token_type], 4807 this=this, 4808 comments=self._prev_comments, 4809 expression=self._parse_assignment(), 4810 ) 4811 4812 return this 4813 4814 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4815 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4816 4817 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4818 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4819 4820 def _parse_equality(self) -> t.Optional[exp.Expression]: 4821 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4822 4823 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4824 return self._parse_tokens(self._parse_range, self.COMPARISON) 4825 4826 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4827 this = this or self._parse_bitwise() 4828 negate = self._match(TokenType.NOT) 4829 4830 if self._match_set(self.RANGE_PARSERS): 4831 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4832 if not expression: 4833 return this 4834 4835 this = expression 4836 elif self._match(TokenType.ISNULL): 4837 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4838 4839 # Postgres supports ISNULL and NOTNULL for conditions. 4840 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4841 if self._match(TokenType.NOTNULL): 4842 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4843 this = self.expression(exp.Not, this=this) 4844 4845 if negate: 4846 this = self._negate_range(this) 4847 4848 if self._match(TokenType.IS): 4849 this = self._parse_is(this) 4850 4851 return this 4852 4853 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4854 if not this: 4855 return this 4856 4857 return self.expression(exp.Not, this=this) 4858 4859 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4860 index = self._index - 1 4861 negate = self._match(TokenType.NOT) 4862 4863 if self._match_text_seq("DISTINCT", "FROM"): 4864 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4865 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4866 4867 if self._match(TokenType.JSON): 4868 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4869 4870 if self._match_text_seq("WITH"): 4871 _with = True 4872 elif self._match_text_seq("WITHOUT"): 4873 _with = False 4874 else: 4875 _with = None 4876 4877 unique = self._match(TokenType.UNIQUE) 4878 self._match_text_seq("KEYS") 4879 expression: t.Optional[exp.Expression] = self.expression( 4880 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4881 ) 4882 else: 4883 expression = self._parse_primary() or self._parse_null() 4884 if not expression: 4885 self._retreat(index) 4886 return None 4887 4888 this = self.expression(exp.Is, this=this, expression=expression) 4889 return self.expression(exp.Not, this=this) if negate else this 4890 4891 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4892 unnest = self._parse_unnest(with_alias=False) 4893 if unnest: 4894 this = self.expression(exp.In, this=this, unnest=unnest) 4895 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4896 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4897 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4898 4899 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4900 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4901 else: 4902 this = self.expression(exp.In, this=this, expressions=expressions) 4903 4904 if matched_l_paren: 4905 self._match_r_paren(this) 4906 elif not self._match(TokenType.R_BRACKET, expression=this): 4907 self.raise_error("Expecting ]") 4908 else: 4909 this = self.expression(exp.In, this=this, field=self._parse_column()) 4910 4911 return this 4912 4913 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4914 low = self._parse_bitwise() 4915 self._match(TokenType.AND) 4916 high = self._parse_bitwise() 4917 return self.expression(exp.Between, this=this, low=low, high=high) 4918 4919 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4920 if not self._match(TokenType.ESCAPE): 4921 return this 4922 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4923 4924 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4925 index = self._index 4926 4927 if not self._match(TokenType.INTERVAL) and match_interval: 4928 return None 4929 4930 if self._match(TokenType.STRING, advance=False): 4931 this = self._parse_primary() 4932 else: 4933 this = self._parse_term() 4934 4935 if not this or ( 4936 isinstance(this, exp.Column) 4937 and not this.table 4938 and not this.this.quoted 4939 and this.name.upper() == "IS" 4940 ): 4941 self._retreat(index) 4942 return None 4943 4944 unit = self._parse_function() or ( 4945 not self._match(TokenType.ALIAS, advance=False) 4946 and self._parse_var(any_token=True, upper=True) 4947 ) 4948 4949 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4950 # each INTERVAL expression into this canonical form so it's easy to transpile 4951 if this and this.is_number: 4952 this = exp.Literal.string(this.to_py()) 4953 elif this and this.is_string: 4954 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4955 if parts and unit: 4956 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4957 unit = None 4958 self._retreat(self._index - 1) 4959 4960 if len(parts) == 1: 4961 this = exp.Literal.string(parts[0][0]) 4962 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4963 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4964 unit = self.expression( 4965 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4966 ) 4967 4968 interval = self.expression(exp.Interval, this=this, unit=unit) 4969 4970 index = self._index 4971 self._match(TokenType.PLUS) 4972 4973 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4974 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4975 return self.expression( 4976 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4977 ) 4978 4979 self._retreat(index) 4980 return interval 4981 4982 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4983 this = self._parse_term() 4984 4985 while True: 4986 if self._match_set(self.BITWISE): 4987 this = self.expression( 4988 self.BITWISE[self._prev.token_type], 4989 this=this, 4990 expression=self._parse_term(), 4991 ) 4992 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4993 this = self.expression( 4994 exp.DPipe, 4995 this=this, 4996 expression=self._parse_term(), 4997 safe=not self.dialect.STRICT_STRING_CONCAT, 4998 ) 4999 elif self._match(TokenType.DQMARK): 5000 this = self.expression( 5001 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5002 ) 5003 elif self._match_pair(TokenType.LT, TokenType.LT): 5004 this = self.expression( 5005 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5006 ) 5007 elif self._match_pair(TokenType.GT, TokenType.GT): 5008 this = self.expression( 5009 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5010 ) 5011 else: 5012 break 5013 5014 return this 5015 5016 def _parse_term(self) -> t.Optional[exp.Expression]: 5017 this = self._parse_factor() 5018 5019 while self._match_set(self.TERM): 5020 klass = self.TERM[self._prev.token_type] 5021 comments = self._prev_comments 5022 expression = self._parse_factor() 5023 5024 this = self.expression(klass, this=this, comments=comments, expression=expression) 5025 5026 if isinstance(this, exp.Collate): 5027 expr = this.expression 5028 5029 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5030 # fallback to Identifier / Var 5031 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5032 ident = expr.this 5033 if isinstance(ident, exp.Identifier): 5034 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5035 5036 return this 5037 5038 def _parse_factor(self) -> t.Optional[exp.Expression]: 5039 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5040 this = parse_method() 5041 5042 while self._match_set(self.FACTOR): 5043 klass = self.FACTOR[self._prev.token_type] 5044 comments = self._prev_comments 5045 expression = parse_method() 5046 5047 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5048 self._retreat(self._index - 1) 5049 return this 5050 5051 this = self.expression(klass, this=this, comments=comments, expression=expression) 5052 5053 if isinstance(this, exp.Div): 5054 this.args["typed"] = self.dialect.TYPED_DIVISION 5055 this.args["safe"] = self.dialect.SAFE_DIVISION 5056 5057 return this 5058 5059 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5060 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5061 5062 def _parse_unary(self) -> t.Optional[exp.Expression]: 5063 if self._match_set(self.UNARY_PARSERS): 5064 return self.UNARY_PARSERS[self._prev.token_type](self) 5065 return self._parse_at_time_zone(self._parse_type()) 5066 5067 def _parse_type( 5068 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5069 ) -> t.Optional[exp.Expression]: 5070 interval = parse_interval and self._parse_interval() 5071 if interval: 5072 return interval 5073 5074 index = self._index 5075 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5076 5077 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5078 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5079 if isinstance(data_type, exp.Cast): 5080 # This constructor can contain ops directly after it, for instance struct unnesting: 5081 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5082 return self._parse_column_ops(data_type) 5083 5084 if data_type: 5085 index2 = self._index 5086 this = self._parse_primary() 5087 5088 if isinstance(this, exp.Literal): 5089 this = self._parse_column_ops(this) 5090 5091 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5092 if parser: 5093 return parser(self, this, data_type) 5094 5095 return self.expression(exp.Cast, this=this, to=data_type) 5096 5097 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5098 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5099 # 5100 # If the index difference here is greater than 1, that means the parser itself must have 5101 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5102 # 5103 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5104 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5105 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5106 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5107 # 5108 # In these cases, we don't really want to return the converted type, but instead retreat 5109 # and try to parse a Column or Identifier in the section below. 5110 if data_type.expressions and index2 - index > 1: 5111 self._retreat(index2) 5112 return self._parse_column_ops(data_type) 5113 5114 self._retreat(index) 5115 5116 if fallback_to_identifier: 5117 return self._parse_id_var() 5118 5119 this = self._parse_column() 5120 return this and self._parse_column_ops(this) 5121 5122 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5123 this = self._parse_type() 5124 if not this: 5125 return None 5126 5127 if isinstance(this, exp.Column) and not this.table: 5128 this = exp.var(this.name.upper()) 5129 5130 return self.expression( 5131 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5132 ) 5133 5134 def _parse_types( 5135 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5136 ) -> t.Optional[exp.Expression]: 5137 index = self._index 5138 5139 this: t.Optional[exp.Expression] = None 5140 prefix = self._match_text_seq("SYSUDTLIB", ".") 5141 5142 if not self._match_set(self.TYPE_TOKENS): 5143 identifier = allow_identifiers and self._parse_id_var( 5144 any_token=False, tokens=(TokenType.VAR,) 5145 ) 5146 if isinstance(identifier, exp.Identifier): 5147 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5148 5149 if len(tokens) != 1: 5150 self.raise_error("Unexpected identifier", self._prev) 5151 5152 if tokens[0].token_type in self.TYPE_TOKENS: 5153 self._prev = tokens[0] 5154 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5155 type_name = identifier.name 5156 5157 while self._match(TokenType.DOT): 5158 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5159 5160 this = exp.DataType.build(type_name, udt=True) 5161 else: 5162 self._retreat(self._index - 1) 5163 return None 5164 else: 5165 return None 5166 5167 type_token = self._prev.token_type 5168 5169 if type_token == TokenType.PSEUDO_TYPE: 5170 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5171 5172 if type_token == TokenType.OBJECT_IDENTIFIER: 5173 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5174 5175 # https://materialize.com/docs/sql/types/map/ 5176 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5177 key_type = self._parse_types( 5178 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5179 ) 5180 if not self._match(TokenType.FARROW): 5181 self._retreat(index) 5182 return None 5183 5184 value_type = self._parse_types( 5185 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5186 ) 5187 if not self._match(TokenType.R_BRACKET): 5188 self._retreat(index) 5189 return None 5190 5191 return exp.DataType( 5192 this=exp.DataType.Type.MAP, 5193 expressions=[key_type, value_type], 5194 nested=True, 5195 prefix=prefix, 5196 ) 5197 5198 nested = type_token in self.NESTED_TYPE_TOKENS 5199 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5200 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5201 expressions = None 5202 maybe_func = False 5203 5204 if self._match(TokenType.L_PAREN): 5205 if is_struct: 5206 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5207 elif nested: 5208 expressions = self._parse_csv( 5209 lambda: self._parse_types( 5210 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5211 ) 5212 ) 5213 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5214 this = expressions[0] 5215 this.set("nullable", True) 5216 self._match_r_paren() 5217 return this 5218 elif type_token in self.ENUM_TYPE_TOKENS: 5219 expressions = self._parse_csv(self._parse_equality) 5220 elif is_aggregate: 5221 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5222 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5223 ) 5224 if not func_or_ident: 5225 return None 5226 expressions = [func_or_ident] 5227 if self._match(TokenType.COMMA): 5228 expressions.extend( 5229 self._parse_csv( 5230 lambda: self._parse_types( 5231 check_func=check_func, 5232 schema=schema, 5233 allow_identifiers=allow_identifiers, 5234 ) 5235 ) 5236 ) 5237 else: 5238 expressions = self._parse_csv(self._parse_type_size) 5239 5240 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5241 if type_token == TokenType.VECTOR and len(expressions) == 2: 5242 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5243 5244 if not expressions or not self._match(TokenType.R_PAREN): 5245 self._retreat(index) 5246 return None 5247 5248 maybe_func = True 5249 5250 values: t.Optional[t.List[exp.Expression]] = None 5251 5252 if nested and self._match(TokenType.LT): 5253 if is_struct: 5254 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5255 else: 5256 expressions = self._parse_csv( 5257 lambda: self._parse_types( 5258 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5259 ) 5260 ) 5261 5262 if not self._match(TokenType.GT): 5263 self.raise_error("Expecting >") 5264 5265 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5266 values = self._parse_csv(self._parse_assignment) 5267 if not values and is_struct: 5268 values = None 5269 self._retreat(self._index - 1) 5270 else: 5271 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5272 5273 if type_token in self.TIMESTAMPS: 5274 if self._match_text_seq("WITH", "TIME", "ZONE"): 5275 maybe_func = False 5276 tz_type = ( 5277 exp.DataType.Type.TIMETZ 5278 if type_token in self.TIMES 5279 else exp.DataType.Type.TIMESTAMPTZ 5280 ) 5281 this = exp.DataType(this=tz_type, expressions=expressions) 5282 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5283 maybe_func = False 5284 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5285 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5286 maybe_func = False 5287 elif type_token == TokenType.INTERVAL: 5288 unit = self._parse_var(upper=True) 5289 if unit: 5290 if self._match_text_seq("TO"): 5291 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5292 5293 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5294 else: 5295 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5296 elif type_token == TokenType.VOID: 5297 this = exp.DataType(this=exp.DataType.Type.NULL) 5298 5299 if maybe_func and check_func: 5300 index2 = self._index 5301 peek = self._parse_string() 5302 5303 if not peek: 5304 self._retreat(index) 5305 return None 5306 5307 self._retreat(index2) 5308 5309 if not this: 5310 if self._match_text_seq("UNSIGNED"): 5311 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5312 if not unsigned_type_token: 5313 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5314 5315 type_token = unsigned_type_token or type_token 5316 5317 this = exp.DataType( 5318 this=exp.DataType.Type[type_token.value], 5319 expressions=expressions, 5320 nested=nested, 5321 prefix=prefix, 5322 ) 5323 5324 # Empty arrays/structs are allowed 5325 if values is not None: 5326 cls = exp.Struct if is_struct else exp.Array 5327 this = exp.cast(cls(expressions=values), this, copy=False) 5328 5329 elif expressions: 5330 this.set("expressions", expressions) 5331 5332 # https://materialize.com/docs/sql/types/list/#type-name 5333 while self._match(TokenType.LIST): 5334 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5335 5336 index = self._index 5337 5338 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5339 matched_array = self._match(TokenType.ARRAY) 5340 5341 while self._curr: 5342 datatype_token = self._prev.token_type 5343 matched_l_bracket = self._match(TokenType.L_BRACKET) 5344 5345 if (not matched_l_bracket and not matched_array) or ( 5346 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5347 ): 5348 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5349 # not to be confused with the fixed size array parsing 5350 break 5351 5352 matched_array = False 5353 values = self._parse_csv(self._parse_assignment) or None 5354 if ( 5355 values 5356 and not schema 5357 and ( 5358 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5359 ) 5360 ): 5361 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5362 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5363 self._retreat(index) 5364 break 5365 5366 this = exp.DataType( 5367 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5368 ) 5369 self._match(TokenType.R_BRACKET) 5370 5371 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5372 converter = self.TYPE_CONVERTERS.get(this.this) 5373 if converter: 5374 this = converter(t.cast(exp.DataType, this)) 5375 5376 return this 5377 5378 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5379 index = self._index 5380 5381 if ( 5382 self._curr 5383 and self._next 5384 and self._curr.token_type in self.TYPE_TOKENS 5385 and self._next.token_type in self.TYPE_TOKENS 5386 ): 5387 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5388 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5389 this = self._parse_id_var() 5390 else: 5391 this = ( 5392 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5393 or self._parse_id_var() 5394 ) 5395 5396 self._match(TokenType.COLON) 5397 5398 if ( 5399 type_required 5400 and not isinstance(this, exp.DataType) 5401 and not self._match_set(self.TYPE_TOKENS, advance=False) 5402 ): 5403 self._retreat(index) 5404 return self._parse_types() 5405 5406 return self._parse_column_def(this) 5407 5408 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5409 if not self._match_text_seq("AT", "TIME", "ZONE"): 5410 return this 5411 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5412 5413 def _parse_column(self) -> t.Optional[exp.Expression]: 5414 this = self._parse_column_reference() 5415 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5416 5417 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5418 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5419 5420 return column 5421 5422 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5423 this = self._parse_field() 5424 if ( 5425 not this 5426 and self._match(TokenType.VALUES, advance=False) 5427 and self.VALUES_FOLLOWED_BY_PAREN 5428 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5429 ): 5430 this = self._parse_id_var() 5431 5432 if isinstance(this, exp.Identifier): 5433 # We bubble up comments from the Identifier to the Column 5434 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5435 5436 return this 5437 5438 def _parse_colon_as_variant_extract( 5439 self, this: t.Optional[exp.Expression] 5440 ) -> t.Optional[exp.Expression]: 5441 casts = [] 5442 json_path = [] 5443 escape = None 5444 5445 while self._match(TokenType.COLON): 5446 start_index = self._index 5447 5448 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5449 path = self._parse_column_ops( 5450 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5451 ) 5452 5453 # The cast :: operator has a lower precedence than the extraction operator :, so 5454 # we rearrange the AST appropriately to avoid casting the JSON path 5455 while isinstance(path, exp.Cast): 5456 casts.append(path.to) 5457 path = path.this 5458 5459 if casts: 5460 dcolon_offset = next( 5461 i 5462 for i, t in enumerate(self._tokens[start_index:]) 5463 if t.token_type == TokenType.DCOLON 5464 ) 5465 end_token = self._tokens[start_index + dcolon_offset - 1] 5466 else: 5467 end_token = self._prev 5468 5469 if path: 5470 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5471 # it'll roundtrip to a string literal in GET_PATH 5472 if isinstance(path, exp.Identifier) and path.quoted: 5473 escape = True 5474 5475 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5476 5477 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5478 # Databricks transforms it back to the colon/dot notation 5479 if json_path: 5480 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5481 5482 if json_path_expr: 5483 json_path_expr.set("escape", escape) 5484 5485 this = self.expression( 5486 exp.JSONExtract, 5487 this=this, 5488 expression=json_path_expr, 5489 variant_extract=True, 5490 ) 5491 5492 while casts: 5493 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5494 5495 return this 5496 5497 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5498 return self._parse_types() 5499 5500 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5501 this = self._parse_bracket(this) 5502 5503 while self._match_set(self.COLUMN_OPERATORS): 5504 op_token = self._prev.token_type 5505 op = self.COLUMN_OPERATORS.get(op_token) 5506 5507 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5508 field = self._parse_dcolon() 5509 if not field: 5510 self.raise_error("Expected type") 5511 elif op and self._curr: 5512 field = self._parse_column_reference() or self._parse_bracket() 5513 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5514 field = self._parse_column_ops(field) 5515 else: 5516 field = self._parse_field(any_token=True, anonymous_func=True) 5517 5518 if isinstance(field, (exp.Func, exp.Window)) and this: 5519 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5520 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5521 this = exp.replace_tree( 5522 this, 5523 lambda n: ( 5524 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5525 if n.table 5526 else n.this 5527 ) 5528 if isinstance(n, exp.Column) 5529 else n, 5530 ) 5531 5532 if op: 5533 this = op(self, this, field) 5534 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5535 this = self.expression( 5536 exp.Column, 5537 comments=this.comments, 5538 this=field, 5539 table=this.this, 5540 db=this.args.get("table"), 5541 catalog=this.args.get("db"), 5542 ) 5543 elif isinstance(field, exp.Window): 5544 # Move the exp.Dot's to the window's function 5545 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5546 field.set("this", window_func) 5547 this = field 5548 else: 5549 this = self.expression(exp.Dot, this=this, expression=field) 5550 5551 if field and field.comments: 5552 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5553 5554 this = self._parse_bracket(this) 5555 5556 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5557 5558 def _parse_primary(self) -> t.Optional[exp.Expression]: 5559 if self._match_set(self.PRIMARY_PARSERS): 5560 token_type = self._prev.token_type 5561 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5562 5563 if token_type == TokenType.STRING: 5564 expressions = [primary] 5565 while self._match(TokenType.STRING): 5566 expressions.append(exp.Literal.string(self._prev.text)) 5567 5568 if len(expressions) > 1: 5569 return self.expression(exp.Concat, expressions=expressions) 5570 5571 return primary 5572 5573 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5574 return exp.Literal.number(f"0.{self._prev.text}") 5575 5576 if self._match(TokenType.L_PAREN): 5577 comments = self._prev_comments 5578 query = self._parse_select() 5579 5580 if query: 5581 expressions = [query] 5582 else: 5583 expressions = self._parse_expressions() 5584 5585 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5586 5587 if not this and self._match(TokenType.R_PAREN, advance=False): 5588 this = self.expression(exp.Tuple) 5589 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5590 this = self._parse_subquery(this=this, parse_alias=False) 5591 elif isinstance(this, exp.Subquery): 5592 this = self._parse_subquery( 5593 this=self._parse_set_operations(this), parse_alias=False 5594 ) 5595 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5596 this = self.expression(exp.Tuple, expressions=expressions) 5597 else: 5598 this = self.expression(exp.Paren, this=this) 5599 5600 if this: 5601 this.add_comments(comments) 5602 5603 self._match_r_paren(expression=this) 5604 return this 5605 5606 return None 5607 5608 def _parse_field( 5609 self, 5610 any_token: bool = False, 5611 tokens: t.Optional[t.Collection[TokenType]] = None, 5612 anonymous_func: bool = False, 5613 ) -> t.Optional[exp.Expression]: 5614 if anonymous_func: 5615 field = ( 5616 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5617 or self._parse_primary() 5618 ) 5619 else: 5620 field = self._parse_primary() or self._parse_function( 5621 anonymous=anonymous_func, any_token=any_token 5622 ) 5623 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5624 5625 def _parse_function( 5626 self, 5627 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5628 anonymous: bool = False, 5629 optional_parens: bool = True, 5630 any_token: bool = False, 5631 ) -> t.Optional[exp.Expression]: 5632 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5633 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5634 fn_syntax = False 5635 if ( 5636 self._match(TokenType.L_BRACE, advance=False) 5637 and self._next 5638 and self._next.text.upper() == "FN" 5639 ): 5640 self._advance(2) 5641 fn_syntax = True 5642 5643 func = self._parse_function_call( 5644 functions=functions, 5645 anonymous=anonymous, 5646 optional_parens=optional_parens, 5647 any_token=any_token, 5648 ) 5649 5650 if fn_syntax: 5651 self._match(TokenType.R_BRACE) 5652 5653 return func 5654 5655 def _parse_function_call( 5656 self, 5657 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5658 anonymous: bool = False, 5659 optional_parens: bool = True, 5660 any_token: bool = False, 5661 ) -> t.Optional[exp.Expression]: 5662 if not self._curr: 5663 return None 5664 5665 comments = self._curr.comments 5666 token = self._curr 5667 token_type = self._curr.token_type 5668 this = self._curr.text 5669 upper = this.upper() 5670 5671 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5672 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5673 self._advance() 5674 return self._parse_window(parser(self)) 5675 5676 if not self._next or self._next.token_type != TokenType.L_PAREN: 5677 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5678 self._advance() 5679 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5680 5681 return None 5682 5683 if any_token: 5684 if token_type in self.RESERVED_TOKENS: 5685 return None 5686 elif token_type not in self.FUNC_TOKENS: 5687 return None 5688 5689 self._advance(2) 5690 5691 parser = self.FUNCTION_PARSERS.get(upper) 5692 if parser and not anonymous: 5693 this = parser(self) 5694 else: 5695 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5696 5697 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5698 this = self.expression( 5699 subquery_predicate, comments=comments, this=self._parse_select() 5700 ) 5701 self._match_r_paren() 5702 return this 5703 5704 if functions is None: 5705 functions = self.FUNCTIONS 5706 5707 function = functions.get(upper) 5708 known_function = function and not anonymous 5709 5710 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5711 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5712 5713 post_func_comments = self._curr and self._curr.comments 5714 if known_function and post_func_comments: 5715 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5716 # call we'll construct it as exp.Anonymous, even if it's "known" 5717 if any( 5718 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5719 for comment in post_func_comments 5720 ): 5721 known_function = False 5722 5723 if alias and known_function: 5724 args = self._kv_to_prop_eq(args) 5725 5726 if known_function: 5727 func_builder = t.cast(t.Callable, function) 5728 5729 if "dialect" in func_builder.__code__.co_varnames: 5730 func = func_builder(args, dialect=self.dialect) 5731 else: 5732 func = func_builder(args) 5733 5734 func = self.validate_expression(func, args) 5735 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5736 func.meta["name"] = this 5737 5738 this = func 5739 else: 5740 if token_type == TokenType.IDENTIFIER: 5741 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5742 5743 this = self.expression(exp.Anonymous, this=this, expressions=args) 5744 this = this.update_positions(token) 5745 5746 if isinstance(this, exp.Expression): 5747 this.add_comments(comments) 5748 5749 self._match_r_paren(this) 5750 return self._parse_window(this) 5751 5752 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5753 return expression 5754 5755 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5756 transformed = [] 5757 5758 for index, e in enumerate(expressions): 5759 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5760 if isinstance(e, exp.Alias): 5761 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5762 5763 if not isinstance(e, exp.PropertyEQ): 5764 e = self.expression( 5765 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5766 ) 5767 5768 if isinstance(e.this, exp.Column): 5769 e.this.replace(e.this.this) 5770 else: 5771 e = self._to_prop_eq(e, index) 5772 5773 transformed.append(e) 5774 5775 return transformed 5776 5777 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5778 return self._parse_statement() 5779 5780 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5781 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5782 5783 def _parse_user_defined_function( 5784 self, kind: t.Optional[TokenType] = None 5785 ) -> t.Optional[exp.Expression]: 5786 this = self._parse_table_parts(schema=True) 5787 5788 if not self._match(TokenType.L_PAREN): 5789 return this 5790 5791 expressions = self._parse_csv(self._parse_function_parameter) 5792 self._match_r_paren() 5793 return self.expression( 5794 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5795 ) 5796 5797 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5798 literal = self._parse_primary() 5799 if literal: 5800 return self.expression(exp.Introducer, this=token.text, expression=literal) 5801 5802 return self._identifier_expression(token) 5803 5804 def _parse_session_parameter(self) -> exp.SessionParameter: 5805 kind = None 5806 this = self._parse_id_var() or self._parse_primary() 5807 5808 if this and self._match(TokenType.DOT): 5809 kind = this.name 5810 this = self._parse_var() or self._parse_primary() 5811 5812 return self.expression(exp.SessionParameter, this=this, kind=kind) 5813 5814 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5815 return self._parse_id_var() 5816 5817 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5818 index = self._index 5819 5820 if self._match(TokenType.L_PAREN): 5821 expressions = t.cast( 5822 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5823 ) 5824 5825 if not self._match(TokenType.R_PAREN): 5826 self._retreat(index) 5827 else: 5828 expressions = [self._parse_lambda_arg()] 5829 5830 if self._match_set(self.LAMBDAS): 5831 return self.LAMBDAS[self._prev.token_type](self, expressions) 5832 5833 self._retreat(index) 5834 5835 this: t.Optional[exp.Expression] 5836 5837 if self._match(TokenType.DISTINCT): 5838 this = self.expression( 5839 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5840 ) 5841 else: 5842 this = self._parse_select_or_expression(alias=alias) 5843 5844 return self._parse_limit( 5845 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5846 ) 5847 5848 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5849 index = self._index 5850 if not self._match(TokenType.L_PAREN): 5851 return this 5852 5853 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5854 # expr can be of both types 5855 if self._match_set(self.SELECT_START_TOKENS): 5856 self._retreat(index) 5857 return this 5858 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5859 self._match_r_paren() 5860 return self.expression(exp.Schema, this=this, expressions=args) 5861 5862 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5863 return self._parse_column_def(self._parse_field(any_token=True)) 5864 5865 def _parse_column_def( 5866 self, this: t.Optional[exp.Expression], computed_column: bool = True 5867 ) -> t.Optional[exp.Expression]: 5868 # column defs are not really columns, they're identifiers 5869 if isinstance(this, exp.Column): 5870 this = this.this 5871 5872 if not computed_column: 5873 self._match(TokenType.ALIAS) 5874 5875 kind = self._parse_types(schema=True) 5876 5877 if self._match_text_seq("FOR", "ORDINALITY"): 5878 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5879 5880 constraints: t.List[exp.Expression] = [] 5881 5882 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5883 ("ALIAS", "MATERIALIZED") 5884 ): 5885 persisted = self._prev.text.upper() == "MATERIALIZED" 5886 constraint_kind = exp.ComputedColumnConstraint( 5887 this=self._parse_assignment(), 5888 persisted=persisted or self._match_text_seq("PERSISTED"), 5889 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5890 ) 5891 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5892 elif ( 5893 kind 5894 and self._match(TokenType.ALIAS, advance=False) 5895 and ( 5896 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5897 or (self._next and self._next.token_type == TokenType.L_PAREN) 5898 ) 5899 ): 5900 self._advance() 5901 constraints.append( 5902 self.expression( 5903 exp.ColumnConstraint, 5904 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5905 ) 5906 ) 5907 5908 while True: 5909 constraint = self._parse_column_constraint() 5910 if not constraint: 5911 break 5912 constraints.append(constraint) 5913 5914 if not kind and not constraints: 5915 return this 5916 5917 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5918 5919 def _parse_auto_increment( 5920 self, 5921 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5922 start = None 5923 increment = None 5924 5925 if self._match(TokenType.L_PAREN, advance=False): 5926 args = self._parse_wrapped_csv(self._parse_bitwise) 5927 start = seq_get(args, 0) 5928 increment = seq_get(args, 1) 5929 elif self._match_text_seq("START"): 5930 start = self._parse_bitwise() 5931 self._match_text_seq("INCREMENT") 5932 increment = self._parse_bitwise() 5933 5934 if start and increment: 5935 return exp.GeneratedAsIdentityColumnConstraint( 5936 start=start, increment=increment, this=False 5937 ) 5938 5939 return exp.AutoIncrementColumnConstraint() 5940 5941 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5942 if not self._match_text_seq("REFRESH"): 5943 self._retreat(self._index - 1) 5944 return None 5945 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5946 5947 def _parse_compress(self) -> exp.CompressColumnConstraint: 5948 if self._match(TokenType.L_PAREN, advance=False): 5949 return self.expression( 5950 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5951 ) 5952 5953 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5954 5955 def _parse_generated_as_identity( 5956 self, 5957 ) -> ( 5958 exp.GeneratedAsIdentityColumnConstraint 5959 | exp.ComputedColumnConstraint 5960 | exp.GeneratedAsRowColumnConstraint 5961 ): 5962 if self._match_text_seq("BY", "DEFAULT"): 5963 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5964 this = self.expression( 5965 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5966 ) 5967 else: 5968 self._match_text_seq("ALWAYS") 5969 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5970 5971 self._match(TokenType.ALIAS) 5972 5973 if self._match_text_seq("ROW"): 5974 start = self._match_text_seq("START") 5975 if not start: 5976 self._match(TokenType.END) 5977 hidden = self._match_text_seq("HIDDEN") 5978 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5979 5980 identity = self._match_text_seq("IDENTITY") 5981 5982 if self._match(TokenType.L_PAREN): 5983 if self._match(TokenType.START_WITH): 5984 this.set("start", self._parse_bitwise()) 5985 if self._match_text_seq("INCREMENT", "BY"): 5986 this.set("increment", self._parse_bitwise()) 5987 if self._match_text_seq("MINVALUE"): 5988 this.set("minvalue", self._parse_bitwise()) 5989 if self._match_text_seq("MAXVALUE"): 5990 this.set("maxvalue", self._parse_bitwise()) 5991 5992 if self._match_text_seq("CYCLE"): 5993 this.set("cycle", True) 5994 elif self._match_text_seq("NO", "CYCLE"): 5995 this.set("cycle", False) 5996 5997 if not identity: 5998 this.set("expression", self._parse_range()) 5999 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6000 args = self._parse_csv(self._parse_bitwise) 6001 this.set("start", seq_get(args, 0)) 6002 this.set("increment", seq_get(args, 1)) 6003 6004 self._match_r_paren() 6005 6006 return this 6007 6008 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6009 self._match_text_seq("LENGTH") 6010 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6011 6012 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6013 if self._match_text_seq("NULL"): 6014 return self.expression(exp.NotNullColumnConstraint) 6015 if self._match_text_seq("CASESPECIFIC"): 6016 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6017 if self._match_text_seq("FOR", "REPLICATION"): 6018 return self.expression(exp.NotForReplicationColumnConstraint) 6019 6020 # Unconsume the `NOT` token 6021 self._retreat(self._index - 1) 6022 return None 6023 6024 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6025 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6026 6027 procedure_option_follows = ( 6028 self._match(TokenType.WITH, advance=False) 6029 and self._next 6030 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6031 ) 6032 6033 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6034 return self.expression( 6035 exp.ColumnConstraint, 6036 this=this, 6037 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6038 ) 6039 6040 return this 6041 6042 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6043 if not self._match(TokenType.CONSTRAINT): 6044 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6045 6046 return self.expression( 6047 exp.Constraint, 6048 this=self._parse_id_var(), 6049 expressions=self._parse_unnamed_constraints(), 6050 ) 6051 6052 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6053 constraints = [] 6054 while True: 6055 constraint = self._parse_unnamed_constraint() or self._parse_function() 6056 if not constraint: 6057 break 6058 constraints.append(constraint) 6059 6060 return constraints 6061 6062 def _parse_unnamed_constraint( 6063 self, constraints: t.Optional[t.Collection[str]] = None 6064 ) -> t.Optional[exp.Expression]: 6065 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6066 constraints or self.CONSTRAINT_PARSERS 6067 ): 6068 return None 6069 6070 constraint = self._prev.text.upper() 6071 if constraint not in self.CONSTRAINT_PARSERS: 6072 self.raise_error(f"No parser found for schema constraint {constraint}.") 6073 6074 return self.CONSTRAINT_PARSERS[constraint](self) 6075 6076 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6077 return self._parse_id_var(any_token=False) 6078 6079 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6080 self._match_text_seq("KEY") 6081 return self.expression( 6082 exp.UniqueColumnConstraint, 6083 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6084 this=self._parse_schema(self._parse_unique_key()), 6085 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6086 on_conflict=self._parse_on_conflict(), 6087 options=self._parse_key_constraint_options(), 6088 ) 6089 6090 def _parse_key_constraint_options(self) -> t.List[str]: 6091 options = [] 6092 while True: 6093 if not self._curr: 6094 break 6095 6096 if self._match(TokenType.ON): 6097 action = None 6098 on = self._advance_any() and self._prev.text 6099 6100 if self._match_text_seq("NO", "ACTION"): 6101 action = "NO ACTION" 6102 elif self._match_text_seq("CASCADE"): 6103 action = "CASCADE" 6104 elif self._match_text_seq("RESTRICT"): 6105 action = "RESTRICT" 6106 elif self._match_pair(TokenType.SET, TokenType.NULL): 6107 action = "SET NULL" 6108 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6109 action = "SET DEFAULT" 6110 else: 6111 self.raise_error("Invalid key constraint") 6112 6113 options.append(f"ON {on} {action}") 6114 else: 6115 var = self._parse_var_from_options( 6116 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6117 ) 6118 if not var: 6119 break 6120 options.append(var.name) 6121 6122 return options 6123 6124 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6125 if match and not self._match(TokenType.REFERENCES): 6126 return None 6127 6128 expressions = None 6129 this = self._parse_table(schema=True) 6130 options = self._parse_key_constraint_options() 6131 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6132 6133 def _parse_foreign_key(self) -> exp.ForeignKey: 6134 expressions = ( 6135 self._parse_wrapped_id_vars() 6136 if not self._match(TokenType.REFERENCES, advance=False) 6137 else None 6138 ) 6139 reference = self._parse_references() 6140 on_options = {} 6141 6142 while self._match(TokenType.ON): 6143 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6144 self.raise_error("Expected DELETE or UPDATE") 6145 6146 kind = self._prev.text.lower() 6147 6148 if self._match_text_seq("NO", "ACTION"): 6149 action = "NO ACTION" 6150 elif self._match(TokenType.SET): 6151 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6152 action = "SET " + self._prev.text.upper() 6153 else: 6154 self._advance() 6155 action = self._prev.text.upper() 6156 6157 on_options[kind] = action 6158 6159 return self.expression( 6160 exp.ForeignKey, 6161 expressions=expressions, 6162 reference=reference, 6163 options=self._parse_key_constraint_options(), 6164 **on_options, # type: ignore 6165 ) 6166 6167 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6168 return self._parse_ordered() or self._parse_field() 6169 6170 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6171 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6172 self._retreat(self._index - 1) 6173 return None 6174 6175 id_vars = self._parse_wrapped_id_vars() 6176 return self.expression( 6177 exp.PeriodForSystemTimeConstraint, 6178 this=seq_get(id_vars, 0), 6179 expression=seq_get(id_vars, 1), 6180 ) 6181 6182 def _parse_primary_key( 6183 self, wrapped_optional: bool = False, in_props: bool = False 6184 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6185 desc = ( 6186 self._match_set((TokenType.ASC, TokenType.DESC)) 6187 and self._prev.token_type == TokenType.DESC 6188 ) 6189 6190 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6191 return self.expression( 6192 exp.PrimaryKeyColumnConstraint, 6193 desc=desc, 6194 options=self._parse_key_constraint_options(), 6195 ) 6196 6197 expressions = self._parse_wrapped_csv( 6198 self._parse_primary_key_part, optional=wrapped_optional 6199 ) 6200 options = self._parse_key_constraint_options() 6201 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6202 6203 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6204 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6205 6206 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6207 """ 6208 Parses a datetime column in ODBC format. We parse the column into the corresponding 6209 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6210 same as we did for `DATE('yyyy-mm-dd')`. 6211 6212 Reference: 6213 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6214 """ 6215 self._match(TokenType.VAR) 6216 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6217 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6218 if not self._match(TokenType.R_BRACE): 6219 self.raise_error("Expected }") 6220 return expression 6221 6222 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6223 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6224 return this 6225 6226 bracket_kind = self._prev.token_type 6227 if ( 6228 bracket_kind == TokenType.L_BRACE 6229 and self._curr 6230 and self._curr.token_type == TokenType.VAR 6231 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6232 ): 6233 return self._parse_odbc_datetime_literal() 6234 6235 expressions = self._parse_csv( 6236 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6237 ) 6238 6239 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6240 self.raise_error("Expected ]") 6241 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6242 self.raise_error("Expected }") 6243 6244 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6245 if bracket_kind == TokenType.L_BRACE: 6246 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6247 elif not this: 6248 this = build_array_constructor( 6249 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6250 ) 6251 else: 6252 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6253 if constructor_type: 6254 return build_array_constructor( 6255 constructor_type, 6256 args=expressions, 6257 bracket_kind=bracket_kind, 6258 dialect=self.dialect, 6259 ) 6260 6261 expressions = apply_index_offset( 6262 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6263 ) 6264 this = self.expression( 6265 exp.Bracket, 6266 this=this, 6267 expressions=expressions, 6268 comments=this.pop_comments(), 6269 ) 6270 6271 self._add_comments(this) 6272 return self._parse_bracket(this) 6273 6274 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6275 if self._match(TokenType.COLON): 6276 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6277 return this 6278 6279 def _parse_case(self) -> t.Optional[exp.Expression]: 6280 ifs = [] 6281 default = None 6282 6283 comments = self._prev_comments 6284 expression = self._parse_assignment() 6285 6286 while self._match(TokenType.WHEN): 6287 this = self._parse_assignment() 6288 self._match(TokenType.THEN) 6289 then = self._parse_assignment() 6290 ifs.append(self.expression(exp.If, this=this, true=then)) 6291 6292 if self._match(TokenType.ELSE): 6293 default = self._parse_assignment() 6294 6295 if not self._match(TokenType.END): 6296 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6297 default = exp.column("interval") 6298 else: 6299 self.raise_error("Expected END after CASE", self._prev) 6300 6301 return self.expression( 6302 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6303 ) 6304 6305 def _parse_if(self) -> t.Optional[exp.Expression]: 6306 if self._match(TokenType.L_PAREN): 6307 args = self._parse_csv( 6308 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6309 ) 6310 this = self.validate_expression(exp.If.from_arg_list(args), args) 6311 self._match_r_paren() 6312 else: 6313 index = self._index - 1 6314 6315 if self.NO_PAREN_IF_COMMANDS and index == 0: 6316 return self._parse_as_command(self._prev) 6317 6318 condition = self._parse_assignment() 6319 6320 if not condition: 6321 self._retreat(index) 6322 return None 6323 6324 self._match(TokenType.THEN) 6325 true = self._parse_assignment() 6326 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6327 self._match(TokenType.END) 6328 this = self.expression(exp.If, this=condition, true=true, false=false) 6329 6330 return this 6331 6332 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6333 if not self._match_text_seq("VALUE", "FOR"): 6334 self._retreat(self._index - 1) 6335 return None 6336 6337 return self.expression( 6338 exp.NextValueFor, 6339 this=self._parse_column(), 6340 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6341 ) 6342 6343 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6344 this = self._parse_function() or self._parse_var_or_string(upper=True) 6345 6346 if self._match(TokenType.FROM): 6347 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6348 6349 if not self._match(TokenType.COMMA): 6350 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6351 6352 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6353 6354 def _parse_gap_fill(self) -> exp.GapFill: 6355 self._match(TokenType.TABLE) 6356 this = self._parse_table() 6357 6358 self._match(TokenType.COMMA) 6359 args = [this, *self._parse_csv(self._parse_lambda)] 6360 6361 gap_fill = exp.GapFill.from_arg_list(args) 6362 return self.validate_expression(gap_fill, args) 6363 6364 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6365 this = self._parse_assignment() 6366 6367 if not self._match(TokenType.ALIAS): 6368 if self._match(TokenType.COMMA): 6369 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6370 6371 self.raise_error("Expected AS after CAST") 6372 6373 fmt = None 6374 to = self._parse_types() 6375 6376 default = self._match(TokenType.DEFAULT) 6377 if default: 6378 default = self._parse_bitwise() 6379 self._match_text_seq("ON", "CONVERSION", "ERROR") 6380 6381 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6382 fmt_string = self._parse_string() 6383 fmt = self._parse_at_time_zone(fmt_string) 6384 6385 if not to: 6386 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6387 if to.this in exp.DataType.TEMPORAL_TYPES: 6388 this = self.expression( 6389 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6390 this=this, 6391 format=exp.Literal.string( 6392 format_time( 6393 fmt_string.this if fmt_string else "", 6394 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6395 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6396 ) 6397 ), 6398 safe=safe, 6399 ) 6400 6401 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6402 this.set("zone", fmt.args["zone"]) 6403 return this 6404 elif not to: 6405 self.raise_error("Expected TYPE after CAST") 6406 elif isinstance(to, exp.Identifier): 6407 to = exp.DataType.build(to.name, udt=True) 6408 elif to.this == exp.DataType.Type.CHAR: 6409 if self._match(TokenType.CHARACTER_SET): 6410 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6411 6412 return self.expression( 6413 exp.Cast if strict else exp.TryCast, 6414 this=this, 6415 to=to, 6416 format=fmt, 6417 safe=safe, 6418 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6419 default=default, 6420 ) 6421 6422 def _parse_string_agg(self) -> exp.GroupConcat: 6423 if self._match(TokenType.DISTINCT): 6424 args: t.List[t.Optional[exp.Expression]] = [ 6425 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6426 ] 6427 if self._match(TokenType.COMMA): 6428 args.extend(self._parse_csv(self._parse_assignment)) 6429 else: 6430 args = self._parse_csv(self._parse_assignment) # type: ignore 6431 6432 if self._match_text_seq("ON", "OVERFLOW"): 6433 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6434 if self._match_text_seq("ERROR"): 6435 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6436 else: 6437 self._match_text_seq("TRUNCATE") 6438 on_overflow = self.expression( 6439 exp.OverflowTruncateBehavior, 6440 this=self._parse_string(), 6441 with_count=( 6442 self._match_text_seq("WITH", "COUNT") 6443 or not self._match_text_seq("WITHOUT", "COUNT") 6444 ), 6445 ) 6446 else: 6447 on_overflow = None 6448 6449 index = self._index 6450 if not self._match(TokenType.R_PAREN) and args: 6451 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6452 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6453 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6454 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6455 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6456 6457 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6458 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6459 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6460 if not self._match_text_seq("WITHIN", "GROUP"): 6461 self._retreat(index) 6462 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6463 6464 # The corresponding match_r_paren will be called in parse_function (caller) 6465 self._match_l_paren() 6466 6467 return self.expression( 6468 exp.GroupConcat, 6469 this=self._parse_order(this=seq_get(args, 0)), 6470 separator=seq_get(args, 1), 6471 on_overflow=on_overflow, 6472 ) 6473 6474 def _parse_convert( 6475 self, strict: bool, safe: t.Optional[bool] = None 6476 ) -> t.Optional[exp.Expression]: 6477 this = self._parse_bitwise() 6478 6479 if self._match(TokenType.USING): 6480 to: t.Optional[exp.Expression] = self.expression( 6481 exp.CharacterSet, this=self._parse_var() 6482 ) 6483 elif self._match(TokenType.COMMA): 6484 to = self._parse_types() 6485 else: 6486 to = None 6487 6488 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6489 6490 def _parse_xml_table(self) -> exp.XMLTable: 6491 namespaces = None 6492 passing = None 6493 columns = None 6494 6495 if self._match_text_seq("XMLNAMESPACES", "("): 6496 namespaces = self._parse_xml_namespace() 6497 self._match_text_seq(")", ",") 6498 6499 this = self._parse_string() 6500 6501 if self._match_text_seq("PASSING"): 6502 # The BY VALUE keywords are optional and are provided for semantic clarity 6503 self._match_text_seq("BY", "VALUE") 6504 passing = self._parse_csv(self._parse_column) 6505 6506 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6507 6508 if self._match_text_seq("COLUMNS"): 6509 columns = self._parse_csv(self._parse_field_def) 6510 6511 return self.expression( 6512 exp.XMLTable, 6513 this=this, 6514 namespaces=namespaces, 6515 passing=passing, 6516 columns=columns, 6517 by_ref=by_ref, 6518 ) 6519 6520 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6521 namespaces = [] 6522 6523 while True: 6524 if self._match(TokenType.DEFAULT): 6525 uri = self._parse_string() 6526 else: 6527 uri = self._parse_alias(self._parse_string()) 6528 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6529 if not self._match(TokenType.COMMA): 6530 break 6531 6532 return namespaces 6533 6534 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6535 """ 6536 There are generally two variants of the DECODE function: 6537 6538 - DECODE(bin, charset) 6539 - DECODE(expression, search, result [, search, result] ... [, default]) 6540 6541 The second variant will always be parsed into a CASE expression. Note that NULL 6542 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6543 instead of relying on pattern matching. 6544 """ 6545 args = self._parse_csv(self._parse_assignment) 6546 6547 if len(args) < 3: 6548 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6549 6550 expression, *expressions = args 6551 if not expression: 6552 return None 6553 6554 ifs = [] 6555 for search, result in zip(expressions[::2], expressions[1::2]): 6556 if not search or not result: 6557 return None 6558 6559 if isinstance(search, exp.Literal): 6560 ifs.append( 6561 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6562 ) 6563 elif isinstance(search, exp.Null): 6564 ifs.append( 6565 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6566 ) 6567 else: 6568 cond = exp.or_( 6569 exp.EQ(this=expression.copy(), expression=search), 6570 exp.and_( 6571 exp.Is(this=expression.copy(), expression=exp.Null()), 6572 exp.Is(this=search.copy(), expression=exp.Null()), 6573 copy=False, 6574 ), 6575 copy=False, 6576 ) 6577 ifs.append(exp.If(this=cond, true=result)) 6578 6579 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6580 6581 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6582 self._match_text_seq("KEY") 6583 key = self._parse_column() 6584 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6585 self._match_text_seq("VALUE") 6586 value = self._parse_bitwise() 6587 6588 if not key and not value: 6589 return None 6590 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6591 6592 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6593 if not this or not self._match_text_seq("FORMAT", "JSON"): 6594 return this 6595 6596 return self.expression(exp.FormatJson, this=this) 6597 6598 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6599 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6600 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6601 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6602 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6603 else: 6604 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6605 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6606 6607 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6608 6609 if not empty and not error and not null: 6610 return None 6611 6612 return self.expression( 6613 exp.OnCondition, 6614 empty=empty, 6615 error=error, 6616 null=null, 6617 ) 6618 6619 def _parse_on_handling( 6620 self, on: str, *values: str 6621 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6622 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6623 for value in values: 6624 if self._match_text_seq(value, "ON", on): 6625 return f"{value} ON {on}" 6626 6627 index = self._index 6628 if self._match(TokenType.DEFAULT): 6629 default_value = self._parse_bitwise() 6630 if self._match_text_seq("ON", on): 6631 return default_value 6632 6633 self._retreat(index) 6634 6635 return None 6636 6637 @t.overload 6638 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6639 6640 @t.overload 6641 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6642 6643 def _parse_json_object(self, agg=False): 6644 star = self._parse_star() 6645 expressions = ( 6646 [star] 6647 if star 6648 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6649 ) 6650 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6651 6652 unique_keys = None 6653 if self._match_text_seq("WITH", "UNIQUE"): 6654 unique_keys = True 6655 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6656 unique_keys = False 6657 6658 self._match_text_seq("KEYS") 6659 6660 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6661 self._parse_type() 6662 ) 6663 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6664 6665 return self.expression( 6666 exp.JSONObjectAgg if agg else exp.JSONObject, 6667 expressions=expressions, 6668 null_handling=null_handling, 6669 unique_keys=unique_keys, 6670 return_type=return_type, 6671 encoding=encoding, 6672 ) 6673 6674 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6675 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6676 if not self._match_text_seq("NESTED"): 6677 this = self._parse_id_var() 6678 kind = self._parse_types(allow_identifiers=False) 6679 nested = None 6680 else: 6681 this = None 6682 kind = None 6683 nested = True 6684 6685 path = self._match_text_seq("PATH") and self._parse_string() 6686 nested_schema = nested and self._parse_json_schema() 6687 6688 return self.expression( 6689 exp.JSONColumnDef, 6690 this=this, 6691 kind=kind, 6692 path=path, 6693 nested_schema=nested_schema, 6694 ) 6695 6696 def _parse_json_schema(self) -> exp.JSONSchema: 6697 self._match_text_seq("COLUMNS") 6698 return self.expression( 6699 exp.JSONSchema, 6700 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6701 ) 6702 6703 def _parse_json_table(self) -> exp.JSONTable: 6704 this = self._parse_format_json(self._parse_bitwise()) 6705 path = self._match(TokenType.COMMA) and self._parse_string() 6706 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6707 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6708 schema = self._parse_json_schema() 6709 6710 return exp.JSONTable( 6711 this=this, 6712 schema=schema, 6713 path=path, 6714 error_handling=error_handling, 6715 empty_handling=empty_handling, 6716 ) 6717 6718 def _parse_match_against(self) -> exp.MatchAgainst: 6719 expressions = self._parse_csv(self._parse_column) 6720 6721 self._match_text_seq(")", "AGAINST", "(") 6722 6723 this = self._parse_string() 6724 6725 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6726 modifier = "IN NATURAL LANGUAGE MODE" 6727 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6728 modifier = f"{modifier} WITH QUERY EXPANSION" 6729 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6730 modifier = "IN BOOLEAN MODE" 6731 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6732 modifier = "WITH QUERY EXPANSION" 6733 else: 6734 modifier = None 6735 6736 return self.expression( 6737 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6738 ) 6739 6740 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6741 def _parse_open_json(self) -> exp.OpenJSON: 6742 this = self._parse_bitwise() 6743 path = self._match(TokenType.COMMA) and self._parse_string() 6744 6745 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6746 this = self._parse_field(any_token=True) 6747 kind = self._parse_types() 6748 path = self._parse_string() 6749 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6750 6751 return self.expression( 6752 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6753 ) 6754 6755 expressions = None 6756 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6757 self._match_l_paren() 6758 expressions = self._parse_csv(_parse_open_json_column_def) 6759 6760 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6761 6762 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6763 args = self._parse_csv(self._parse_bitwise) 6764 6765 if self._match(TokenType.IN): 6766 return self.expression( 6767 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6768 ) 6769 6770 if haystack_first: 6771 haystack = seq_get(args, 0) 6772 needle = seq_get(args, 1) 6773 else: 6774 haystack = seq_get(args, 1) 6775 needle = seq_get(args, 0) 6776 6777 return self.expression( 6778 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6779 ) 6780 6781 def _parse_predict(self) -> exp.Predict: 6782 self._match_text_seq("MODEL") 6783 this = self._parse_table() 6784 6785 self._match(TokenType.COMMA) 6786 self._match_text_seq("TABLE") 6787 6788 return self.expression( 6789 exp.Predict, 6790 this=this, 6791 expression=self._parse_table(), 6792 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6793 ) 6794 6795 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6796 args = self._parse_csv(self._parse_table) 6797 return exp.JoinHint(this=func_name.upper(), expressions=args) 6798 6799 def _parse_substring(self) -> exp.Substring: 6800 # Postgres supports the form: substring(string [from int] [for int]) 6801 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6802 6803 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6804 6805 if self._match(TokenType.FROM): 6806 args.append(self._parse_bitwise()) 6807 if self._match(TokenType.FOR): 6808 if len(args) == 1: 6809 args.append(exp.Literal.number(1)) 6810 args.append(self._parse_bitwise()) 6811 6812 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6813 6814 def _parse_trim(self) -> exp.Trim: 6815 # https://www.w3resource.com/sql/character-functions/trim.php 6816 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6817 6818 position = None 6819 collation = None 6820 expression = None 6821 6822 if self._match_texts(self.TRIM_TYPES): 6823 position = self._prev.text.upper() 6824 6825 this = self._parse_bitwise() 6826 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6827 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6828 expression = self._parse_bitwise() 6829 6830 if invert_order: 6831 this, expression = expression, this 6832 6833 if self._match(TokenType.COLLATE): 6834 collation = self._parse_bitwise() 6835 6836 return self.expression( 6837 exp.Trim, this=this, position=position, expression=expression, collation=collation 6838 ) 6839 6840 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6841 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6842 6843 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6844 return self._parse_window(self._parse_id_var(), alias=True) 6845 6846 def _parse_respect_or_ignore_nulls( 6847 self, this: t.Optional[exp.Expression] 6848 ) -> t.Optional[exp.Expression]: 6849 if self._match_text_seq("IGNORE", "NULLS"): 6850 return self.expression(exp.IgnoreNulls, this=this) 6851 if self._match_text_seq("RESPECT", "NULLS"): 6852 return self.expression(exp.RespectNulls, this=this) 6853 return this 6854 6855 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6856 if self._match(TokenType.HAVING): 6857 self._match_texts(("MAX", "MIN")) 6858 max = self._prev.text.upper() != "MIN" 6859 return self.expression( 6860 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6861 ) 6862 6863 return this 6864 6865 def _parse_window( 6866 self, this: t.Optional[exp.Expression], alias: bool = False 6867 ) -> t.Optional[exp.Expression]: 6868 func = this 6869 comments = func.comments if isinstance(func, exp.Expression) else None 6870 6871 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6872 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6873 if self._match_text_seq("WITHIN", "GROUP"): 6874 order = self._parse_wrapped(self._parse_order) 6875 this = self.expression(exp.WithinGroup, this=this, expression=order) 6876 6877 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6878 self._match(TokenType.WHERE) 6879 this = self.expression( 6880 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6881 ) 6882 self._match_r_paren() 6883 6884 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6885 # Some dialects choose to implement and some do not. 6886 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6887 6888 # There is some code above in _parse_lambda that handles 6889 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6890 6891 # The below changes handle 6892 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6893 6894 # Oracle allows both formats 6895 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6896 # and Snowflake chose to do the same for familiarity 6897 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6898 if isinstance(this, exp.AggFunc): 6899 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6900 6901 if ignore_respect and ignore_respect is not this: 6902 ignore_respect.replace(ignore_respect.this) 6903 this = self.expression(ignore_respect.__class__, this=this) 6904 6905 this = self._parse_respect_or_ignore_nulls(this) 6906 6907 # bigquery select from window x AS (partition by ...) 6908 if alias: 6909 over = None 6910 self._match(TokenType.ALIAS) 6911 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6912 return this 6913 else: 6914 over = self._prev.text.upper() 6915 6916 if comments and isinstance(func, exp.Expression): 6917 func.pop_comments() 6918 6919 if not self._match(TokenType.L_PAREN): 6920 return self.expression( 6921 exp.Window, 6922 comments=comments, 6923 this=this, 6924 alias=self._parse_id_var(False), 6925 over=over, 6926 ) 6927 6928 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6929 6930 first = self._match(TokenType.FIRST) 6931 if self._match_text_seq("LAST"): 6932 first = False 6933 6934 partition, order = self._parse_partition_and_order() 6935 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6936 6937 if kind: 6938 self._match(TokenType.BETWEEN) 6939 start = self._parse_window_spec() 6940 self._match(TokenType.AND) 6941 end = self._parse_window_spec() 6942 exclude = ( 6943 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6944 if self._match_text_seq("EXCLUDE") 6945 else None 6946 ) 6947 6948 spec = self.expression( 6949 exp.WindowSpec, 6950 kind=kind, 6951 start=start["value"], 6952 start_side=start["side"], 6953 end=end["value"], 6954 end_side=end["side"], 6955 exclude=exclude, 6956 ) 6957 else: 6958 spec = None 6959 6960 self._match_r_paren() 6961 6962 window = self.expression( 6963 exp.Window, 6964 comments=comments, 6965 this=this, 6966 partition_by=partition, 6967 order=order, 6968 spec=spec, 6969 alias=window_alias, 6970 over=over, 6971 first=first, 6972 ) 6973 6974 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6975 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6976 return self._parse_window(window, alias=alias) 6977 6978 return window 6979 6980 def _parse_partition_and_order( 6981 self, 6982 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6983 return self._parse_partition_by(), self._parse_order() 6984 6985 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6986 self._match(TokenType.BETWEEN) 6987 6988 return { 6989 "value": ( 6990 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6991 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6992 or self._parse_bitwise() 6993 ), 6994 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6995 } 6996 6997 def _parse_alias( 6998 self, this: t.Optional[exp.Expression], explicit: bool = False 6999 ) -> t.Optional[exp.Expression]: 7000 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7001 # so this section tries to parse the clause version and if it fails, it treats the token 7002 # as an identifier (alias) 7003 if self._can_parse_limit_or_offset(): 7004 return this 7005 7006 any_token = self._match(TokenType.ALIAS) 7007 comments = self._prev_comments or [] 7008 7009 if explicit and not any_token: 7010 return this 7011 7012 if self._match(TokenType.L_PAREN): 7013 aliases = self.expression( 7014 exp.Aliases, 7015 comments=comments, 7016 this=this, 7017 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7018 ) 7019 self._match_r_paren(aliases) 7020 return aliases 7021 7022 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7023 self.STRING_ALIASES and self._parse_string_as_identifier() 7024 ) 7025 7026 if alias: 7027 comments.extend(alias.pop_comments()) 7028 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7029 column = this.this 7030 7031 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7032 if not this.comments and column and column.comments: 7033 this.comments = column.pop_comments() 7034 7035 return this 7036 7037 def _parse_id_var( 7038 self, 7039 any_token: bool = True, 7040 tokens: t.Optional[t.Collection[TokenType]] = None, 7041 ) -> t.Optional[exp.Expression]: 7042 expression = self._parse_identifier() 7043 if not expression and ( 7044 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7045 ): 7046 quoted = self._prev.token_type == TokenType.STRING 7047 expression = self._identifier_expression(quoted=quoted) 7048 7049 return expression 7050 7051 def _parse_string(self) -> t.Optional[exp.Expression]: 7052 if self._match_set(self.STRING_PARSERS): 7053 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7054 return self._parse_placeholder() 7055 7056 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7057 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7058 if output: 7059 output.update_positions(self._prev) 7060 return output 7061 7062 def _parse_number(self) -> t.Optional[exp.Expression]: 7063 if self._match_set(self.NUMERIC_PARSERS): 7064 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7065 return self._parse_placeholder() 7066 7067 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7068 if self._match(TokenType.IDENTIFIER): 7069 return self._identifier_expression(quoted=True) 7070 return self._parse_placeholder() 7071 7072 def _parse_var( 7073 self, 7074 any_token: bool = False, 7075 tokens: t.Optional[t.Collection[TokenType]] = None, 7076 upper: bool = False, 7077 ) -> t.Optional[exp.Expression]: 7078 if ( 7079 (any_token and self._advance_any()) 7080 or self._match(TokenType.VAR) 7081 or (self._match_set(tokens) if tokens else False) 7082 ): 7083 return self.expression( 7084 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7085 ) 7086 return self._parse_placeholder() 7087 7088 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7089 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7090 self._advance() 7091 return self._prev 7092 return None 7093 7094 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7095 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7096 7097 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7098 return self._parse_primary() or self._parse_var(any_token=True) 7099 7100 def _parse_null(self) -> t.Optional[exp.Expression]: 7101 if self._match_set(self.NULL_TOKENS): 7102 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7103 return self._parse_placeholder() 7104 7105 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7106 if self._match(TokenType.TRUE): 7107 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7108 if self._match(TokenType.FALSE): 7109 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7110 return self._parse_placeholder() 7111 7112 def _parse_star(self) -> t.Optional[exp.Expression]: 7113 if self._match(TokenType.STAR): 7114 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7115 return self._parse_placeholder() 7116 7117 def _parse_parameter(self) -> exp.Parameter: 7118 this = self._parse_identifier() or self._parse_primary_or_var() 7119 return self.expression(exp.Parameter, this=this) 7120 7121 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7122 if self._match_set(self.PLACEHOLDER_PARSERS): 7123 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7124 if placeholder: 7125 return placeholder 7126 self._advance(-1) 7127 return None 7128 7129 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7130 if not self._match_texts(keywords): 7131 return None 7132 if self._match(TokenType.L_PAREN, advance=False): 7133 return self._parse_wrapped_csv(self._parse_expression) 7134 7135 expression = self._parse_expression() 7136 return [expression] if expression else None 7137 7138 def _parse_csv( 7139 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7140 ) -> t.List[exp.Expression]: 7141 parse_result = parse_method() 7142 items = [parse_result] if parse_result is not None else [] 7143 7144 while self._match(sep): 7145 self._add_comments(parse_result) 7146 parse_result = parse_method() 7147 if parse_result is not None: 7148 items.append(parse_result) 7149 7150 return items 7151 7152 def _parse_tokens( 7153 self, parse_method: t.Callable, expressions: t.Dict 7154 ) -> t.Optional[exp.Expression]: 7155 this = parse_method() 7156 7157 while self._match_set(expressions): 7158 this = self.expression( 7159 expressions[self._prev.token_type], 7160 this=this, 7161 comments=self._prev_comments, 7162 expression=parse_method(), 7163 ) 7164 7165 return this 7166 7167 def _parse_pipe_syntax_query(self, query: exp.Select) -> exp.Query: 7168 while self._match(TokenType.PIPE_GT): 7169 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 7170 if not parser: 7171 self.raise_error(f"Unsupported pipe syntax operator: '{self._curr.text.upper()}'.") 7172 else: 7173 query = parser(self, query) 7174 7175 return query 7176 7177 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7178 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7179 7180 def _parse_wrapped_csv( 7181 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7182 ) -> t.List[exp.Expression]: 7183 return self._parse_wrapped( 7184 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7185 ) 7186 7187 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7188 wrapped = self._match(TokenType.L_PAREN) 7189 if not wrapped and not optional: 7190 self.raise_error("Expecting (") 7191 parse_result = parse_method() 7192 if wrapped: 7193 self._match_r_paren() 7194 return parse_result 7195 7196 def _parse_expressions(self) -> t.List[exp.Expression]: 7197 return self._parse_csv(self._parse_expression) 7198 7199 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7200 return self._parse_select() or self._parse_set_operations( 7201 self._parse_alias(self._parse_assignment(), explicit=True) 7202 if alias 7203 else self._parse_assignment() 7204 ) 7205 7206 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7207 return self._parse_query_modifiers( 7208 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7209 ) 7210 7211 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7212 this = None 7213 if self._match_texts(self.TRANSACTION_KIND): 7214 this = self._prev.text 7215 7216 self._match_texts(("TRANSACTION", "WORK")) 7217 7218 modes = [] 7219 while True: 7220 mode = [] 7221 while self._match(TokenType.VAR): 7222 mode.append(self._prev.text) 7223 7224 if mode: 7225 modes.append(" ".join(mode)) 7226 if not self._match(TokenType.COMMA): 7227 break 7228 7229 return self.expression(exp.Transaction, this=this, modes=modes) 7230 7231 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7232 chain = None 7233 savepoint = None 7234 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7235 7236 self._match_texts(("TRANSACTION", "WORK")) 7237 7238 if self._match_text_seq("TO"): 7239 self._match_text_seq("SAVEPOINT") 7240 savepoint = self._parse_id_var() 7241 7242 if self._match(TokenType.AND): 7243 chain = not self._match_text_seq("NO") 7244 self._match_text_seq("CHAIN") 7245 7246 if is_rollback: 7247 return self.expression(exp.Rollback, savepoint=savepoint) 7248 7249 return self.expression(exp.Commit, chain=chain) 7250 7251 def _parse_refresh(self) -> exp.Refresh: 7252 self._match(TokenType.TABLE) 7253 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7254 7255 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7256 if not self._prev.text.upper() == "ADD": 7257 return None 7258 7259 self._match(TokenType.COLUMN) 7260 exists_column = self._parse_exists(not_=True) 7261 expression = self._parse_field_def() 7262 7263 if expression: 7264 expression.set("exists", exists_column) 7265 7266 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7267 if self._match_texts(("FIRST", "AFTER")): 7268 position = self._prev.text 7269 column_position = self.expression( 7270 exp.ColumnPosition, this=self._parse_column(), position=position 7271 ) 7272 expression.set("position", column_position) 7273 7274 return expression 7275 7276 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7277 drop = self._match(TokenType.DROP) and self._parse_drop() 7278 if drop and not isinstance(drop, exp.Command): 7279 drop.set("kind", drop.args.get("kind", "COLUMN")) 7280 return drop 7281 7282 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7283 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7284 return self.expression( 7285 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7286 ) 7287 7288 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7289 def _parse_add_column_or_constraint(): 7290 self._match_text_seq("ADD") 7291 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7292 return self.expression( 7293 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7294 ) 7295 return self._parse_add_column() 7296 7297 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7298 "COLUMNS" 7299 ): 7300 schema = self._parse_schema() 7301 7302 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7303 7304 return self._parse_csv(_parse_add_column_or_constraint) 7305 7306 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7307 if self._match_texts(self.ALTER_ALTER_PARSERS): 7308 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7309 7310 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7311 # keyword after ALTER we default to parsing this statement 7312 self._match(TokenType.COLUMN) 7313 column = self._parse_field(any_token=True) 7314 7315 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7316 return self.expression(exp.AlterColumn, this=column, drop=True) 7317 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7318 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7319 if self._match(TokenType.COMMENT): 7320 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7321 if self._match_text_seq("DROP", "NOT", "NULL"): 7322 return self.expression( 7323 exp.AlterColumn, 7324 this=column, 7325 drop=True, 7326 allow_null=True, 7327 ) 7328 if self._match_text_seq("SET", "NOT", "NULL"): 7329 return self.expression( 7330 exp.AlterColumn, 7331 this=column, 7332 allow_null=False, 7333 ) 7334 7335 if self._match_text_seq("SET", "VISIBLE"): 7336 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7337 if self._match_text_seq("SET", "INVISIBLE"): 7338 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7339 7340 self._match_text_seq("SET", "DATA") 7341 self._match_text_seq("TYPE") 7342 return self.expression( 7343 exp.AlterColumn, 7344 this=column, 7345 dtype=self._parse_types(), 7346 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7347 using=self._match(TokenType.USING) and self._parse_assignment(), 7348 ) 7349 7350 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7351 if self._match_texts(("ALL", "EVEN", "AUTO")): 7352 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7353 7354 self._match_text_seq("KEY", "DISTKEY") 7355 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7356 7357 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7358 if compound: 7359 self._match_text_seq("SORTKEY") 7360 7361 if self._match(TokenType.L_PAREN, advance=False): 7362 return self.expression( 7363 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7364 ) 7365 7366 self._match_texts(("AUTO", "NONE")) 7367 return self.expression( 7368 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7369 ) 7370 7371 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7372 index = self._index - 1 7373 7374 partition_exists = self._parse_exists() 7375 if self._match(TokenType.PARTITION, advance=False): 7376 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7377 7378 self._retreat(index) 7379 return self._parse_csv(self._parse_drop_column) 7380 7381 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7382 if self._match(TokenType.COLUMN): 7383 exists = self._parse_exists() 7384 old_column = self._parse_column() 7385 to = self._match_text_seq("TO") 7386 new_column = self._parse_column() 7387 7388 if old_column is None or to is None or new_column is None: 7389 return None 7390 7391 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7392 7393 self._match_text_seq("TO") 7394 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7395 7396 def _parse_alter_table_set(self) -> exp.AlterSet: 7397 alter_set = self.expression(exp.AlterSet) 7398 7399 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7400 "TABLE", "PROPERTIES" 7401 ): 7402 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7403 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7404 alter_set.set("expressions", [self._parse_assignment()]) 7405 elif self._match_texts(("LOGGED", "UNLOGGED")): 7406 alter_set.set("option", exp.var(self._prev.text.upper())) 7407 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7408 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7409 elif self._match_text_seq("LOCATION"): 7410 alter_set.set("location", self._parse_field()) 7411 elif self._match_text_seq("ACCESS", "METHOD"): 7412 alter_set.set("access_method", self._parse_field()) 7413 elif self._match_text_seq("TABLESPACE"): 7414 alter_set.set("tablespace", self._parse_field()) 7415 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7416 alter_set.set("file_format", [self._parse_field()]) 7417 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7418 alter_set.set("file_format", self._parse_wrapped_options()) 7419 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7420 alter_set.set("copy_options", self._parse_wrapped_options()) 7421 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7422 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7423 else: 7424 if self._match_text_seq("SERDE"): 7425 alter_set.set("serde", self._parse_field()) 7426 7427 properties = self._parse_wrapped(self._parse_properties, optional=True) 7428 alter_set.set("expressions", [properties]) 7429 7430 return alter_set 7431 7432 def _parse_alter(self) -> exp.Alter | exp.Command: 7433 start = self._prev 7434 7435 alter_token = self._match_set(self.ALTERABLES) and self._prev 7436 if not alter_token: 7437 return self._parse_as_command(start) 7438 7439 exists = self._parse_exists() 7440 only = self._match_text_seq("ONLY") 7441 this = self._parse_table(schema=True) 7442 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7443 7444 if self._next: 7445 self._advance() 7446 7447 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7448 if parser: 7449 actions = ensure_list(parser(self)) 7450 not_valid = self._match_text_seq("NOT", "VALID") 7451 options = self._parse_csv(self._parse_property) 7452 7453 if not self._curr and actions: 7454 return self.expression( 7455 exp.Alter, 7456 this=this, 7457 kind=alter_token.text.upper(), 7458 exists=exists, 7459 actions=actions, 7460 only=only, 7461 options=options, 7462 cluster=cluster, 7463 not_valid=not_valid, 7464 ) 7465 7466 return self._parse_as_command(start) 7467 7468 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7469 start = self._prev 7470 # https://duckdb.org/docs/sql/statements/analyze 7471 if not self._curr: 7472 return self.expression(exp.Analyze) 7473 7474 options = [] 7475 while self._match_texts(self.ANALYZE_STYLES): 7476 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7477 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7478 else: 7479 options.append(self._prev.text.upper()) 7480 7481 this: t.Optional[exp.Expression] = None 7482 inner_expression: t.Optional[exp.Expression] = None 7483 7484 kind = self._curr and self._curr.text.upper() 7485 7486 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7487 this = self._parse_table_parts() 7488 elif self._match_text_seq("TABLES"): 7489 if self._match_set((TokenType.FROM, TokenType.IN)): 7490 kind = f"{kind} {self._prev.text.upper()}" 7491 this = self._parse_table(schema=True, is_db_reference=True) 7492 elif self._match_text_seq("DATABASE"): 7493 this = self._parse_table(schema=True, is_db_reference=True) 7494 elif self._match_text_seq("CLUSTER"): 7495 this = self._parse_table() 7496 # Try matching inner expr keywords before fallback to parse table. 7497 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7498 kind = None 7499 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7500 else: 7501 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7502 kind = None 7503 this = self._parse_table_parts() 7504 7505 partition = self._try_parse(self._parse_partition) 7506 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7507 return self._parse_as_command(start) 7508 7509 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7510 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7511 "WITH", "ASYNC", "MODE" 7512 ): 7513 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7514 else: 7515 mode = None 7516 7517 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7518 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7519 7520 properties = self._parse_properties() 7521 return self.expression( 7522 exp.Analyze, 7523 kind=kind, 7524 this=this, 7525 mode=mode, 7526 partition=partition, 7527 properties=properties, 7528 expression=inner_expression, 7529 options=options, 7530 ) 7531 7532 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7533 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7534 this = None 7535 kind = self._prev.text.upper() 7536 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7537 expressions = [] 7538 7539 if not self._match_text_seq("STATISTICS"): 7540 self.raise_error("Expecting token STATISTICS") 7541 7542 if self._match_text_seq("NOSCAN"): 7543 this = "NOSCAN" 7544 elif self._match(TokenType.FOR): 7545 if self._match_text_seq("ALL", "COLUMNS"): 7546 this = "FOR ALL COLUMNS" 7547 if self._match_texts("COLUMNS"): 7548 this = "FOR COLUMNS" 7549 expressions = self._parse_csv(self._parse_column_reference) 7550 elif self._match_text_seq("SAMPLE"): 7551 sample = self._parse_number() 7552 expressions = [ 7553 self.expression( 7554 exp.AnalyzeSample, 7555 sample=sample, 7556 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7557 ) 7558 ] 7559 7560 return self.expression( 7561 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7562 ) 7563 7564 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7565 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7566 kind = None 7567 this = None 7568 expression: t.Optional[exp.Expression] = None 7569 if self._match_text_seq("REF", "UPDATE"): 7570 kind = "REF" 7571 this = "UPDATE" 7572 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7573 this = "UPDATE SET DANGLING TO NULL" 7574 elif self._match_text_seq("STRUCTURE"): 7575 kind = "STRUCTURE" 7576 if self._match_text_seq("CASCADE", "FAST"): 7577 this = "CASCADE FAST" 7578 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7579 ("ONLINE", "OFFLINE") 7580 ): 7581 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7582 expression = self._parse_into() 7583 7584 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7585 7586 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7587 this = self._prev.text.upper() 7588 if self._match_text_seq("COLUMNS"): 7589 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7590 return None 7591 7592 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7593 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7594 if self._match_text_seq("STATISTICS"): 7595 return self.expression(exp.AnalyzeDelete, kind=kind) 7596 return None 7597 7598 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7599 if self._match_text_seq("CHAINED", "ROWS"): 7600 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7601 return None 7602 7603 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7604 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7605 this = self._prev.text.upper() 7606 expression: t.Optional[exp.Expression] = None 7607 expressions = [] 7608 update_options = None 7609 7610 if self._match_text_seq("HISTOGRAM", "ON"): 7611 expressions = self._parse_csv(self._parse_column_reference) 7612 with_expressions = [] 7613 while self._match(TokenType.WITH): 7614 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7615 if self._match_texts(("SYNC", "ASYNC")): 7616 if self._match_text_seq("MODE", advance=False): 7617 with_expressions.append(f"{self._prev.text.upper()} MODE") 7618 self._advance() 7619 else: 7620 buckets = self._parse_number() 7621 if self._match_text_seq("BUCKETS"): 7622 with_expressions.append(f"{buckets} BUCKETS") 7623 if with_expressions: 7624 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7625 7626 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7627 TokenType.UPDATE, advance=False 7628 ): 7629 update_options = self._prev.text.upper() 7630 self._advance() 7631 elif self._match_text_seq("USING", "DATA"): 7632 expression = self.expression(exp.UsingData, this=self._parse_string()) 7633 7634 return self.expression( 7635 exp.AnalyzeHistogram, 7636 this=this, 7637 expressions=expressions, 7638 expression=expression, 7639 update_options=update_options, 7640 ) 7641 7642 def _parse_merge(self) -> exp.Merge: 7643 self._match(TokenType.INTO) 7644 target = self._parse_table() 7645 7646 if target and self._match(TokenType.ALIAS, advance=False): 7647 target.set("alias", self._parse_table_alias()) 7648 7649 self._match(TokenType.USING) 7650 using = self._parse_table() 7651 7652 self._match(TokenType.ON) 7653 on = self._parse_assignment() 7654 7655 return self.expression( 7656 exp.Merge, 7657 this=target, 7658 using=using, 7659 on=on, 7660 whens=self._parse_when_matched(), 7661 returning=self._parse_returning(), 7662 ) 7663 7664 def _parse_when_matched(self) -> exp.Whens: 7665 whens = [] 7666 7667 while self._match(TokenType.WHEN): 7668 matched = not self._match(TokenType.NOT) 7669 self._match_text_seq("MATCHED") 7670 source = ( 7671 False 7672 if self._match_text_seq("BY", "TARGET") 7673 else self._match_text_seq("BY", "SOURCE") 7674 ) 7675 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7676 7677 self._match(TokenType.THEN) 7678 7679 if self._match(TokenType.INSERT): 7680 this = self._parse_star() 7681 if this: 7682 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7683 else: 7684 then = self.expression( 7685 exp.Insert, 7686 this=exp.var("ROW") 7687 if self._match_text_seq("ROW") 7688 else self._parse_value(values=False), 7689 expression=self._match_text_seq("VALUES") and self._parse_value(), 7690 ) 7691 elif self._match(TokenType.UPDATE): 7692 expressions = self._parse_star() 7693 if expressions: 7694 then = self.expression(exp.Update, expressions=expressions) 7695 else: 7696 then = self.expression( 7697 exp.Update, 7698 expressions=self._match(TokenType.SET) 7699 and self._parse_csv(self._parse_equality), 7700 ) 7701 elif self._match(TokenType.DELETE): 7702 then = self.expression(exp.Var, this=self._prev.text) 7703 else: 7704 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7705 7706 whens.append( 7707 self.expression( 7708 exp.When, 7709 matched=matched, 7710 source=source, 7711 condition=condition, 7712 then=then, 7713 ) 7714 ) 7715 return self.expression(exp.Whens, expressions=whens) 7716 7717 def _parse_show(self) -> t.Optional[exp.Expression]: 7718 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7719 if parser: 7720 return parser(self) 7721 return self._parse_as_command(self._prev) 7722 7723 def _parse_set_item_assignment( 7724 self, kind: t.Optional[str] = None 7725 ) -> t.Optional[exp.Expression]: 7726 index = self._index 7727 7728 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7729 return self._parse_set_transaction(global_=kind == "GLOBAL") 7730 7731 left = self._parse_primary() or self._parse_column() 7732 assignment_delimiter = self._match_texts(("=", "TO")) 7733 7734 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7735 self._retreat(index) 7736 return None 7737 7738 right = self._parse_statement() or self._parse_id_var() 7739 if isinstance(right, (exp.Column, exp.Identifier)): 7740 right = exp.var(right.name) 7741 7742 this = self.expression(exp.EQ, this=left, expression=right) 7743 return self.expression(exp.SetItem, this=this, kind=kind) 7744 7745 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7746 self._match_text_seq("TRANSACTION") 7747 characteristics = self._parse_csv( 7748 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7749 ) 7750 return self.expression( 7751 exp.SetItem, 7752 expressions=characteristics, 7753 kind="TRANSACTION", 7754 **{"global": global_}, # type: ignore 7755 ) 7756 7757 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7758 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7759 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7760 7761 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7762 index = self._index 7763 set_ = self.expression( 7764 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7765 ) 7766 7767 if self._curr: 7768 self._retreat(index) 7769 return self._parse_as_command(self._prev) 7770 7771 return set_ 7772 7773 def _parse_var_from_options( 7774 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7775 ) -> t.Optional[exp.Var]: 7776 start = self._curr 7777 if not start: 7778 return None 7779 7780 option = start.text.upper() 7781 continuations = options.get(option) 7782 7783 index = self._index 7784 self._advance() 7785 for keywords in continuations or []: 7786 if isinstance(keywords, str): 7787 keywords = (keywords,) 7788 7789 if self._match_text_seq(*keywords): 7790 option = f"{option} {' '.join(keywords)}" 7791 break 7792 else: 7793 if continuations or continuations is None: 7794 if raise_unmatched: 7795 self.raise_error(f"Unknown option {option}") 7796 7797 self._retreat(index) 7798 return None 7799 7800 return exp.var(option) 7801 7802 def _parse_as_command(self, start: Token) -> exp.Command: 7803 while self._curr: 7804 self._advance() 7805 text = self._find_sql(start, self._prev) 7806 size = len(start.text) 7807 self._warn_unsupported() 7808 return exp.Command(this=text[:size], expression=text[size:]) 7809 7810 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7811 settings = [] 7812 7813 self._match_l_paren() 7814 kind = self._parse_id_var() 7815 7816 if self._match(TokenType.L_PAREN): 7817 while True: 7818 key = self._parse_id_var() 7819 value = self._parse_primary() 7820 if not key and value is None: 7821 break 7822 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7823 self._match(TokenType.R_PAREN) 7824 7825 self._match_r_paren() 7826 7827 return self.expression( 7828 exp.DictProperty, 7829 this=this, 7830 kind=kind.this if kind else None, 7831 settings=settings, 7832 ) 7833 7834 def _parse_dict_range(self, this: str) -> exp.DictRange: 7835 self._match_l_paren() 7836 has_min = self._match_text_seq("MIN") 7837 if has_min: 7838 min = self._parse_var() or self._parse_primary() 7839 self._match_text_seq("MAX") 7840 max = self._parse_var() or self._parse_primary() 7841 else: 7842 max = self._parse_var() or self._parse_primary() 7843 min = exp.Literal.number(0) 7844 self._match_r_paren() 7845 return self.expression(exp.DictRange, this=this, min=min, max=max) 7846 7847 def _parse_comprehension( 7848 self, this: t.Optional[exp.Expression] 7849 ) -> t.Optional[exp.Comprehension]: 7850 index = self._index 7851 expression = self._parse_column() 7852 if not self._match(TokenType.IN): 7853 self._retreat(index - 1) 7854 return None 7855 iterator = self._parse_column() 7856 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7857 return self.expression( 7858 exp.Comprehension, 7859 this=this, 7860 expression=expression, 7861 iterator=iterator, 7862 condition=condition, 7863 ) 7864 7865 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7866 if self._match(TokenType.HEREDOC_STRING): 7867 return self.expression(exp.Heredoc, this=self._prev.text) 7868 7869 if not self._match_text_seq("$"): 7870 return None 7871 7872 tags = ["$"] 7873 tag_text = None 7874 7875 if self._is_connected(): 7876 self._advance() 7877 tags.append(self._prev.text.upper()) 7878 else: 7879 self.raise_error("No closing $ found") 7880 7881 if tags[-1] != "$": 7882 if self._is_connected() and self._match_text_seq("$"): 7883 tag_text = tags[-1] 7884 tags.append("$") 7885 else: 7886 self.raise_error("No closing $ found") 7887 7888 heredoc_start = self._curr 7889 7890 while self._curr: 7891 if self._match_text_seq(*tags, advance=False): 7892 this = self._find_sql(heredoc_start, self._prev) 7893 self._advance(len(tags)) 7894 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7895 7896 self._advance() 7897 7898 self.raise_error(f"No closing {''.join(tags)} found") 7899 return None 7900 7901 def _find_parser( 7902 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7903 ) -> t.Optional[t.Callable]: 7904 if not self._curr: 7905 return None 7906 7907 index = self._index 7908 this = [] 7909 while True: 7910 # The current token might be multiple words 7911 curr = self._curr.text.upper() 7912 key = curr.split(" ") 7913 this.append(curr) 7914 7915 self._advance() 7916 result, trie = in_trie(trie, key) 7917 if result == TrieResult.FAILED: 7918 break 7919 7920 if result == TrieResult.EXISTS: 7921 subparser = parsers[" ".join(this)] 7922 return subparser 7923 7924 self._retreat(index) 7925 return None 7926 7927 def _match(self, token_type, advance=True, expression=None): 7928 if not self._curr: 7929 return None 7930 7931 if self._curr.token_type == token_type: 7932 if advance: 7933 self._advance() 7934 self._add_comments(expression) 7935 return True 7936 7937 return None 7938 7939 def _match_set(self, types, advance=True): 7940 if not self._curr: 7941 return None 7942 7943 if self._curr.token_type in types: 7944 if advance: 7945 self._advance() 7946 return True 7947 7948 return None 7949 7950 def _match_pair(self, token_type_a, token_type_b, advance=True): 7951 if not self._curr or not self._next: 7952 return None 7953 7954 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7955 if advance: 7956 self._advance(2) 7957 return True 7958 7959 return None 7960 7961 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7962 if not self._match(TokenType.L_PAREN, expression=expression): 7963 self.raise_error("Expecting (") 7964 7965 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7966 if not self._match(TokenType.R_PAREN, expression=expression): 7967 self.raise_error("Expecting )") 7968 7969 def _match_texts(self, texts, advance=True): 7970 if ( 7971 self._curr 7972 and self._curr.token_type != TokenType.STRING 7973 and self._curr.text.upper() in texts 7974 ): 7975 if advance: 7976 self._advance() 7977 return True 7978 return None 7979 7980 def _match_text_seq(self, *texts, advance=True): 7981 index = self._index 7982 for text in texts: 7983 if ( 7984 self._curr 7985 and self._curr.token_type != TokenType.STRING 7986 and self._curr.text.upper() == text 7987 ): 7988 self._advance() 7989 else: 7990 self._retreat(index) 7991 return None 7992 7993 if not advance: 7994 self._retreat(index) 7995 7996 return True 7997 7998 def _replace_lambda( 7999 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8000 ) -> t.Optional[exp.Expression]: 8001 if not node: 8002 return node 8003 8004 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8005 8006 for column in node.find_all(exp.Column): 8007 typ = lambda_types.get(column.parts[0].name) 8008 if typ is not None: 8009 dot_or_id = column.to_dot() if column.table else column.this 8010 8011 if typ: 8012 dot_or_id = self.expression( 8013 exp.Cast, 8014 this=dot_or_id, 8015 to=typ, 8016 ) 8017 8018 parent = column.parent 8019 8020 while isinstance(parent, exp.Dot): 8021 if not isinstance(parent.parent, exp.Dot): 8022 parent.replace(dot_or_id) 8023 break 8024 parent = parent.parent 8025 else: 8026 if column is node: 8027 node = dot_or_id 8028 else: 8029 column.replace(dot_or_id) 8030 return node 8031 8032 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8033 start = self._prev 8034 8035 # Not to be confused with TRUNCATE(number, decimals) function call 8036 if self._match(TokenType.L_PAREN): 8037 self._retreat(self._index - 2) 8038 return self._parse_function() 8039 8040 # Clickhouse supports TRUNCATE DATABASE as well 8041 is_database = self._match(TokenType.DATABASE) 8042 8043 self._match(TokenType.TABLE) 8044 8045 exists = self._parse_exists(not_=False) 8046 8047 expressions = self._parse_csv( 8048 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8049 ) 8050 8051 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8052 8053 if self._match_text_seq("RESTART", "IDENTITY"): 8054 identity = "RESTART" 8055 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8056 identity = "CONTINUE" 8057 else: 8058 identity = None 8059 8060 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8061 option = self._prev.text 8062 else: 8063 option = None 8064 8065 partition = self._parse_partition() 8066 8067 # Fallback case 8068 if self._curr: 8069 return self._parse_as_command(start) 8070 8071 return self.expression( 8072 exp.TruncateTable, 8073 expressions=expressions, 8074 is_database=is_database, 8075 exists=exists, 8076 cluster=cluster, 8077 identity=identity, 8078 option=option, 8079 partition=partition, 8080 ) 8081 8082 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8083 this = self._parse_ordered(self._parse_opclass) 8084 8085 if not self._match(TokenType.WITH): 8086 return this 8087 8088 op = self._parse_var(any_token=True) 8089 8090 return self.expression(exp.WithOperator, this=this, op=op) 8091 8092 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8093 self._match(TokenType.EQ) 8094 self._match(TokenType.L_PAREN) 8095 8096 opts: t.List[t.Optional[exp.Expression]] = [] 8097 option: exp.Expression | None 8098 while self._curr and not self._match(TokenType.R_PAREN): 8099 if self._match_text_seq("FORMAT_NAME", "="): 8100 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8101 option = self._parse_format_name() 8102 else: 8103 option = self._parse_property() 8104 8105 if option is None: 8106 self.raise_error("Unable to parse option") 8107 break 8108 8109 opts.append(option) 8110 8111 return opts 8112 8113 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8114 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8115 8116 options = [] 8117 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8118 option = self._parse_var(any_token=True) 8119 prev = self._prev.text.upper() 8120 8121 # Different dialects might separate options and values by white space, "=" and "AS" 8122 self._match(TokenType.EQ) 8123 self._match(TokenType.ALIAS) 8124 8125 param = self.expression(exp.CopyParameter, this=option) 8126 8127 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8128 TokenType.L_PAREN, advance=False 8129 ): 8130 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8131 param.set("expressions", self._parse_wrapped_options()) 8132 elif prev == "FILE_FORMAT": 8133 # T-SQL's external file format case 8134 param.set("expression", self._parse_field()) 8135 else: 8136 param.set("expression", self._parse_unquoted_field()) 8137 8138 options.append(param) 8139 self._match(sep) 8140 8141 return options 8142 8143 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8144 expr = self.expression(exp.Credentials) 8145 8146 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8147 expr.set("storage", self._parse_field()) 8148 if self._match_text_seq("CREDENTIALS"): 8149 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8150 creds = ( 8151 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8152 ) 8153 expr.set("credentials", creds) 8154 if self._match_text_seq("ENCRYPTION"): 8155 expr.set("encryption", self._parse_wrapped_options()) 8156 if self._match_text_seq("IAM_ROLE"): 8157 expr.set("iam_role", self._parse_field()) 8158 if self._match_text_seq("REGION"): 8159 expr.set("region", self._parse_field()) 8160 8161 return expr 8162 8163 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8164 return self._parse_field() 8165 8166 def _parse_copy(self) -> exp.Copy | exp.Command: 8167 start = self._prev 8168 8169 self._match(TokenType.INTO) 8170 8171 this = ( 8172 self._parse_select(nested=True, parse_subquery_alias=False) 8173 if self._match(TokenType.L_PAREN, advance=False) 8174 else self._parse_table(schema=True) 8175 ) 8176 8177 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8178 8179 files = self._parse_csv(self._parse_file_location) 8180 credentials = self._parse_credentials() 8181 8182 self._match_text_seq("WITH") 8183 8184 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8185 8186 # Fallback case 8187 if self._curr: 8188 return self._parse_as_command(start) 8189 8190 return self.expression( 8191 exp.Copy, 8192 this=this, 8193 kind=kind, 8194 credentials=credentials, 8195 files=files, 8196 params=params, 8197 ) 8198 8199 def _parse_normalize(self) -> exp.Normalize: 8200 return self.expression( 8201 exp.Normalize, 8202 this=self._parse_bitwise(), 8203 form=self._match(TokenType.COMMA) and self._parse_var(), 8204 ) 8205 8206 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8207 args = self._parse_csv(lambda: self._parse_lambda()) 8208 8209 this = seq_get(args, 0) 8210 decimals = seq_get(args, 1) 8211 8212 return expr_type( 8213 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8214 ) 8215 8216 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8217 star_token = self._prev 8218 8219 if self._match_text_seq("COLUMNS", "(", advance=False): 8220 this = self._parse_function() 8221 if isinstance(this, exp.Columns): 8222 this.set("unpack", True) 8223 return this 8224 8225 return self.expression( 8226 exp.Star, 8227 **{ # type: ignore 8228 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8229 "replace": self._parse_star_op("REPLACE"), 8230 "rename": self._parse_star_op("RENAME"), 8231 }, 8232 ).update_positions(star_token) 8233 8234 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8235 privilege_parts = [] 8236 8237 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8238 # (end of privilege list) or L_PAREN (start of column list) are met 8239 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8240 privilege_parts.append(self._curr.text.upper()) 8241 self._advance() 8242 8243 this = exp.var(" ".join(privilege_parts)) 8244 expressions = ( 8245 self._parse_wrapped_csv(self._parse_column) 8246 if self._match(TokenType.L_PAREN, advance=False) 8247 else None 8248 ) 8249 8250 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8251 8252 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8253 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8254 principal = self._parse_id_var() 8255 8256 if not principal: 8257 return None 8258 8259 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8260 8261 def _parse_grant(self) -> exp.Grant | exp.Command: 8262 start = self._prev 8263 8264 privileges = self._parse_csv(self._parse_grant_privilege) 8265 8266 self._match(TokenType.ON) 8267 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8268 8269 # Attempt to parse the securable e.g. MySQL allows names 8270 # such as "foo.*", "*.*" which are not easily parseable yet 8271 securable = self._try_parse(self._parse_table_parts) 8272 8273 if not securable or not self._match_text_seq("TO"): 8274 return self._parse_as_command(start) 8275 8276 principals = self._parse_csv(self._parse_grant_principal) 8277 8278 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8279 8280 if self._curr: 8281 return self._parse_as_command(start) 8282 8283 return self.expression( 8284 exp.Grant, 8285 privileges=privileges, 8286 kind=kind, 8287 securable=securable, 8288 principals=principals, 8289 grant_option=grant_option, 8290 ) 8291 8292 def _parse_overlay(self) -> exp.Overlay: 8293 return self.expression( 8294 exp.Overlay, 8295 **{ # type: ignore 8296 "this": self._parse_bitwise(), 8297 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8298 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8299 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8300 }, 8301 ) 8302 8303 def _parse_format_name(self) -> exp.Property: 8304 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8305 # for FILE_FORMAT = <format_name> 8306 return self.expression( 8307 exp.Property, 8308 this=exp.var("FORMAT_NAME"), 8309 value=self._parse_string() or self._parse_table_parts(), 8310 ) 8311 8312 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8313 args: t.List[exp.Expression] = [] 8314 8315 if self._match(TokenType.DISTINCT): 8316 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8317 self._match(TokenType.COMMA) 8318 8319 args.extend(self._parse_csv(self._parse_assignment)) 8320 8321 return self.expression( 8322 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8323 ) 8324 8325 def _identifier_expression( 8326 self, token: t.Optional[Token] = None, **kwargs: t.Any 8327 ) -> exp.Identifier: 8328 token = token or self._prev 8329 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8330 expression.update_positions(token) 8331 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1546 def __init__( 1547 self, 1548 error_level: t.Optional[ErrorLevel] = None, 1549 error_message_context: int = 100, 1550 max_errors: int = 3, 1551 dialect: DialectType = None, 1552 ): 1553 from sqlglot.dialects import Dialect 1554 1555 self.error_level = error_level or ErrorLevel.IMMEDIATE 1556 self.error_message_context = error_message_context 1557 self.max_errors = max_errors 1558 self.dialect = Dialect.get_or_raise(dialect) 1559 self.reset()
1571 def parse( 1572 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1573 ) -> t.List[t.Optional[exp.Expression]]: 1574 """ 1575 Parses a list of tokens and returns a list of syntax trees, one tree 1576 per parsed SQL statement. 1577 1578 Args: 1579 raw_tokens: The list of tokens. 1580 sql: The original SQL string, used to produce helpful debug messages. 1581 1582 Returns: 1583 The list of the produced syntax trees. 1584 """ 1585 return self._parse( 1586 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1587 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1589 def parse_into( 1590 self, 1591 expression_types: exp.IntoType, 1592 raw_tokens: t.List[Token], 1593 sql: t.Optional[str] = None, 1594 ) -> t.List[t.Optional[exp.Expression]]: 1595 """ 1596 Parses a list of tokens into a given Expression type. If a collection of Expression 1597 types is given instead, this method will try to parse the token list into each one 1598 of them, stopping at the first for which the parsing succeeds. 1599 1600 Args: 1601 expression_types: The expression type(s) to try and parse the token list into. 1602 raw_tokens: The list of tokens. 1603 sql: The original SQL string, used to produce helpful debug messages. 1604 1605 Returns: 1606 The target Expression. 1607 """ 1608 errors = [] 1609 for expression_type in ensure_list(expression_types): 1610 parser = self.EXPRESSION_PARSERS.get(expression_type) 1611 if not parser: 1612 raise TypeError(f"No parser registered for {expression_type}") 1613 1614 try: 1615 return self._parse(parser, raw_tokens, sql) 1616 except ParseError as e: 1617 e.errors[0]["into_expression"] = expression_type 1618 errors.append(e) 1619 1620 raise ParseError( 1621 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1622 errors=merge_errors(errors), 1623 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1663 def check_errors(self) -> None: 1664 """Logs or raises any found errors, depending on the chosen error level setting.""" 1665 if self.error_level == ErrorLevel.WARN: 1666 for error in self.errors: 1667 logger.error(str(error)) 1668 elif self.error_level == ErrorLevel.RAISE and self.errors: 1669 raise ParseError( 1670 concat_messages(self.errors, self.max_errors), 1671 errors=merge_errors(self.errors), 1672 )
Logs or raises any found errors, depending on the chosen error level setting.
1674 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1675 """ 1676 Appends an error in the list of recorded errors or raises it, depending on the chosen 1677 error level setting. 1678 """ 1679 token = token or self._curr or self._prev or Token.string("") 1680 start = token.start 1681 end = token.end + 1 1682 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1683 highlight = self.sql[start:end] 1684 end_context = self.sql[end : end + self.error_message_context] 1685 1686 error = ParseError.new( 1687 f"{message}. Line {token.line}, Col: {token.col}.\n" 1688 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1689 description=message, 1690 line=token.line, 1691 col=token.col, 1692 start_context=start_context, 1693 highlight=highlight, 1694 end_context=end_context, 1695 ) 1696 1697 if self.error_level == ErrorLevel.IMMEDIATE: 1698 raise error 1699 1700 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1702 def expression( 1703 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1704 ) -> E: 1705 """ 1706 Creates a new, validated Expression. 1707 1708 Args: 1709 exp_class: The expression class to instantiate. 1710 comments: An optional list of comments to attach to the expression. 1711 kwargs: The arguments to set for the expression along with their respective values. 1712 1713 Returns: 1714 The target expression. 1715 """ 1716 instance = exp_class(**kwargs) 1717 instance.add_comments(comments) if comments else self._add_comments(instance) 1718 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1725 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1726 """ 1727 Validates an Expression, making sure that all its mandatory arguments are set. 1728 1729 Args: 1730 expression: The expression to validate. 1731 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1732 1733 Returns: 1734 The validated expression. 1735 """ 1736 if self.error_level != ErrorLevel.IGNORE: 1737 for error_message in expression.error_messages(args): 1738 self.raise_error(error_message) 1739 1740 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4715 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4716 start = self._index 4717 _, side_token, kind_token = self._parse_join_parts() 4718 4719 side = side_token.text if side_token else None 4720 kind = kind_token.text if kind_token else None 4721 4722 if not self._match_set(self.SET_OPERATIONS): 4723 self._retreat(start) 4724 return None 4725 4726 token_type = self._prev.token_type 4727 4728 if token_type == TokenType.UNION: 4729 operation: t.Type[exp.SetOperation] = exp.Union 4730 elif token_type == TokenType.EXCEPT: 4731 operation = exp.Except 4732 else: 4733 operation = exp.Intersect 4734 4735 comments = self._prev.comments 4736 4737 if self._match(TokenType.DISTINCT): 4738 distinct: t.Optional[bool] = True 4739 elif self._match(TokenType.ALL): 4740 distinct = False 4741 else: 4742 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4743 if distinct is None: 4744 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4745 4746 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4747 "STRICT", "CORRESPONDING" 4748 ) 4749 if self._match_text_seq("CORRESPONDING"): 4750 by_name = True 4751 if not side and not kind: 4752 kind = "INNER" 4753 4754 on_column_list = None 4755 if by_name and self._match_texts(("ON", "BY")): 4756 on_column_list = self._parse_wrapped_csv(self._parse_column) 4757 4758 expression = self._parse_select(nested=True, parse_set_operation=False) 4759 4760 return self.expression( 4761 operation, 4762 comments=comments, 4763 this=this, 4764 distinct=distinct, 4765 by_name=by_name, 4766 expression=expression, 4767 side=side, 4768 kind=kind, 4769 on=on_column_list, 4770 )