sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOMETRY, 384 TokenType.POINT, 385 TokenType.RING, 386 TokenType.LINESTRING, 387 TokenType.MULTILINESTRING, 388 TokenType.POLYGON, 389 TokenType.MULTIPOLYGON, 390 TokenType.HLLSKETCH, 391 TokenType.HSTORE, 392 TokenType.PSEUDO_TYPE, 393 TokenType.SUPER, 394 TokenType.SERIAL, 395 TokenType.SMALLSERIAL, 396 TokenType.BIGSERIAL, 397 TokenType.XML, 398 TokenType.YEAR, 399 TokenType.USERDEFINED, 400 TokenType.MONEY, 401 TokenType.SMALLMONEY, 402 TokenType.ROWVERSION, 403 TokenType.IMAGE, 404 TokenType.VARIANT, 405 TokenType.VECTOR, 406 TokenType.VOID, 407 TokenType.OBJECT, 408 TokenType.OBJECT_IDENTIFIER, 409 TokenType.INET, 410 TokenType.IPADDRESS, 411 TokenType.IPPREFIX, 412 TokenType.IPV4, 413 TokenType.IPV6, 414 TokenType.UNKNOWN, 415 TokenType.NOTHING, 416 TokenType.NULL, 417 TokenType.NAME, 418 TokenType.TDIGEST, 419 TokenType.DYNAMIC, 420 *ENUM_TYPE_TOKENS, 421 *NESTED_TYPE_TOKENS, 422 *AGGREGATE_TYPE_TOKENS, 423 } 424 425 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 426 TokenType.BIGINT: TokenType.UBIGINT, 427 TokenType.INT: TokenType.UINT, 428 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 429 TokenType.SMALLINT: TokenType.USMALLINT, 430 TokenType.TINYINT: TokenType.UTINYINT, 431 TokenType.DECIMAL: TokenType.UDECIMAL, 432 TokenType.DOUBLE: TokenType.UDOUBLE, 433 } 434 435 SUBQUERY_PREDICATES = { 436 TokenType.ANY: exp.Any, 437 TokenType.ALL: exp.All, 438 TokenType.EXISTS: exp.Exists, 439 TokenType.SOME: exp.Any, 440 } 441 442 RESERVED_TOKENS = { 443 *Tokenizer.SINGLE_TOKENS.values(), 444 TokenType.SELECT, 445 } - {TokenType.IDENTIFIER} 446 447 DB_CREATABLES = { 448 TokenType.DATABASE, 449 TokenType.DICTIONARY, 450 TokenType.FILE_FORMAT, 451 TokenType.MODEL, 452 TokenType.NAMESPACE, 453 TokenType.SCHEMA, 454 TokenType.SEQUENCE, 455 TokenType.SINK, 456 TokenType.SOURCE, 457 TokenType.STAGE, 458 TokenType.STORAGE_INTEGRATION, 459 TokenType.STREAMLIT, 460 TokenType.TABLE, 461 TokenType.TAG, 462 TokenType.VIEW, 463 TokenType.WAREHOUSE, 464 } 465 466 CREATABLES = { 467 TokenType.COLUMN, 468 TokenType.CONSTRAINT, 469 TokenType.FOREIGN_KEY, 470 TokenType.FUNCTION, 471 TokenType.INDEX, 472 TokenType.PROCEDURE, 473 *DB_CREATABLES, 474 } 475 476 ALTERABLES = { 477 TokenType.INDEX, 478 TokenType.TABLE, 479 TokenType.VIEW, 480 } 481 482 # Tokens that can represent identifiers 483 ID_VAR_TOKENS = { 484 TokenType.ALL, 485 TokenType.ATTACH, 486 TokenType.VAR, 487 TokenType.ANTI, 488 TokenType.APPLY, 489 TokenType.ASC, 490 TokenType.ASOF, 491 TokenType.AUTO_INCREMENT, 492 TokenType.BEGIN, 493 TokenType.BPCHAR, 494 TokenType.CACHE, 495 TokenType.CASE, 496 TokenType.COLLATE, 497 TokenType.COMMAND, 498 TokenType.COMMENT, 499 TokenType.COMMIT, 500 TokenType.CONSTRAINT, 501 TokenType.COPY, 502 TokenType.CUBE, 503 TokenType.CURRENT_SCHEMA, 504 TokenType.DEFAULT, 505 TokenType.DELETE, 506 TokenType.DESC, 507 TokenType.DESCRIBE, 508 TokenType.DETACH, 509 TokenType.DICTIONARY, 510 TokenType.DIV, 511 TokenType.END, 512 TokenType.EXECUTE, 513 TokenType.EXPORT, 514 TokenType.ESCAPE, 515 TokenType.FALSE, 516 TokenType.FIRST, 517 TokenType.FILTER, 518 TokenType.FINAL, 519 TokenType.FORMAT, 520 TokenType.FULL, 521 TokenType.GET, 522 TokenType.IDENTIFIER, 523 TokenType.IS, 524 TokenType.ISNULL, 525 TokenType.INTERVAL, 526 TokenType.KEEP, 527 TokenType.KILL, 528 TokenType.LEFT, 529 TokenType.LIMIT, 530 TokenType.LOAD, 531 TokenType.MERGE, 532 TokenType.NATURAL, 533 TokenType.NEXT, 534 TokenType.OFFSET, 535 TokenType.OPERATOR, 536 TokenType.ORDINALITY, 537 TokenType.OVERLAPS, 538 TokenType.OVERWRITE, 539 TokenType.PARTITION, 540 TokenType.PERCENT, 541 TokenType.PIVOT, 542 TokenType.PRAGMA, 543 TokenType.PUT, 544 TokenType.RANGE, 545 TokenType.RECURSIVE, 546 TokenType.REFERENCES, 547 TokenType.REFRESH, 548 TokenType.RENAME, 549 TokenType.REPLACE, 550 TokenType.RIGHT, 551 TokenType.ROLLUP, 552 TokenType.ROW, 553 TokenType.ROWS, 554 TokenType.SEMI, 555 TokenType.SET, 556 TokenType.SETTINGS, 557 TokenType.SHOW, 558 TokenType.TEMPORARY, 559 TokenType.TOP, 560 TokenType.TRUE, 561 TokenType.TRUNCATE, 562 TokenType.UNIQUE, 563 TokenType.UNNEST, 564 TokenType.UNPIVOT, 565 TokenType.UPDATE, 566 TokenType.USE, 567 TokenType.VOLATILE, 568 TokenType.WINDOW, 569 *CREATABLES, 570 *SUBQUERY_PREDICATES, 571 *TYPE_TOKENS, 572 *NO_PAREN_FUNCTIONS, 573 } 574 ID_VAR_TOKENS.remove(TokenType.UNION) 575 576 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 577 TokenType.ANTI, 578 TokenType.APPLY, 579 TokenType.ASOF, 580 TokenType.FULL, 581 TokenType.LEFT, 582 TokenType.LOCK, 583 TokenType.NATURAL, 584 TokenType.RIGHT, 585 TokenType.SEMI, 586 TokenType.WINDOW, 587 } 588 589 ALIAS_TOKENS = ID_VAR_TOKENS 590 591 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 592 593 ARRAY_CONSTRUCTORS = { 594 "ARRAY": exp.Array, 595 "LIST": exp.List, 596 } 597 598 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 599 600 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 601 602 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 603 604 FUNC_TOKENS = { 605 TokenType.COLLATE, 606 TokenType.COMMAND, 607 TokenType.CURRENT_DATE, 608 TokenType.CURRENT_DATETIME, 609 TokenType.CURRENT_SCHEMA, 610 TokenType.CURRENT_TIMESTAMP, 611 TokenType.CURRENT_TIME, 612 TokenType.CURRENT_USER, 613 TokenType.FILTER, 614 TokenType.FIRST, 615 TokenType.FORMAT, 616 TokenType.GET, 617 TokenType.GLOB, 618 TokenType.IDENTIFIER, 619 TokenType.INDEX, 620 TokenType.ISNULL, 621 TokenType.ILIKE, 622 TokenType.INSERT, 623 TokenType.LIKE, 624 TokenType.MERGE, 625 TokenType.NEXT, 626 TokenType.OFFSET, 627 TokenType.PRIMARY_KEY, 628 TokenType.RANGE, 629 TokenType.REPLACE, 630 TokenType.RLIKE, 631 TokenType.ROW, 632 TokenType.UNNEST, 633 TokenType.VAR, 634 TokenType.LEFT, 635 TokenType.RIGHT, 636 TokenType.SEQUENCE, 637 TokenType.DATE, 638 TokenType.DATETIME, 639 TokenType.TABLE, 640 TokenType.TIMESTAMP, 641 TokenType.TIMESTAMPTZ, 642 TokenType.TRUNCATE, 643 TokenType.WINDOW, 644 TokenType.XOR, 645 *TYPE_TOKENS, 646 *SUBQUERY_PREDICATES, 647 } 648 649 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.AND: exp.And, 651 } 652 653 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.COLON_EQ: exp.PropertyEQ, 655 } 656 657 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.OR: exp.Or, 659 } 660 661 EQUALITY = { 662 TokenType.EQ: exp.EQ, 663 TokenType.NEQ: exp.NEQ, 664 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 665 } 666 667 COMPARISON = { 668 TokenType.GT: exp.GT, 669 TokenType.GTE: exp.GTE, 670 TokenType.LT: exp.LT, 671 TokenType.LTE: exp.LTE, 672 } 673 674 BITWISE = { 675 TokenType.AMP: exp.BitwiseAnd, 676 TokenType.CARET: exp.BitwiseXor, 677 TokenType.PIPE: exp.BitwiseOr, 678 } 679 680 TERM = { 681 TokenType.DASH: exp.Sub, 682 TokenType.PLUS: exp.Add, 683 TokenType.MOD: exp.Mod, 684 TokenType.COLLATE: exp.Collate, 685 } 686 687 FACTOR = { 688 TokenType.DIV: exp.IntDiv, 689 TokenType.LR_ARROW: exp.Distance, 690 TokenType.SLASH: exp.Div, 691 TokenType.STAR: exp.Mul, 692 } 693 694 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 695 696 TIMES = { 697 TokenType.TIME, 698 TokenType.TIMETZ, 699 } 700 701 TIMESTAMPS = { 702 TokenType.TIMESTAMP, 703 TokenType.TIMESTAMPNTZ, 704 TokenType.TIMESTAMPTZ, 705 TokenType.TIMESTAMPLTZ, 706 *TIMES, 707 } 708 709 SET_OPERATIONS = { 710 TokenType.UNION, 711 TokenType.INTERSECT, 712 TokenType.EXCEPT, 713 } 714 715 JOIN_METHODS = { 716 TokenType.ASOF, 717 TokenType.NATURAL, 718 TokenType.POSITIONAL, 719 } 720 721 JOIN_SIDES = { 722 TokenType.LEFT, 723 TokenType.RIGHT, 724 TokenType.FULL, 725 } 726 727 JOIN_KINDS = { 728 TokenType.ANTI, 729 TokenType.CROSS, 730 TokenType.INNER, 731 TokenType.OUTER, 732 TokenType.SEMI, 733 TokenType.STRAIGHT_JOIN, 734 } 735 736 JOIN_HINTS: t.Set[str] = set() 737 738 LAMBDAS = { 739 TokenType.ARROW: lambda self, expressions: self.expression( 740 exp.Lambda, 741 this=self._replace_lambda( 742 self._parse_assignment(), 743 expressions, 744 ), 745 expressions=expressions, 746 ), 747 TokenType.FARROW: lambda self, expressions: self.expression( 748 exp.Kwarg, 749 this=exp.var(expressions[0].name), 750 expression=self._parse_assignment(), 751 ), 752 } 753 754 COLUMN_OPERATORS = { 755 TokenType.DOT: None, 756 TokenType.DOTCOLON: lambda self, this, to: self.expression( 757 exp.JSONCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.DCOLON: lambda self, this, to: self.expression( 762 exp.Cast if self.STRICT_CAST else exp.TryCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1141 klass = ( 1142 exp.PartitionedByBucket 1143 if self._prev.text.upper() == "BUCKET" 1144 else exp.PartitionByTruncate 1145 ) 1146 1147 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1148 this, expression = seq_get(args, 0), seq_get(args, 1) 1149 1150 if isinstance(this, exp.Literal): 1151 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1152 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1153 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1154 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1155 # 1156 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1157 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1158 this, expression = expression, this 1159 1160 return self.expression(klass, this=this, expression=expression) 1161 1162 ALTER_PARSERS = { 1163 "ADD": lambda self: self._parse_alter_table_add(), 1164 "AS": lambda self: self._parse_select(), 1165 "ALTER": lambda self: self._parse_alter_table_alter(), 1166 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1167 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1168 "DROP": lambda self: self._parse_alter_table_drop(), 1169 "RENAME": lambda self: self._parse_alter_table_rename(), 1170 "SET": lambda self: self._parse_alter_table_set(), 1171 "SWAP": lambda self: self.expression( 1172 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1173 ), 1174 } 1175 1176 ALTER_ALTER_PARSERS = { 1177 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1178 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1179 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1180 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1181 } 1182 1183 SCHEMA_UNNAMED_CONSTRAINTS = { 1184 "CHECK", 1185 "EXCLUDE", 1186 "FOREIGN KEY", 1187 "LIKE", 1188 "PERIOD", 1189 "PRIMARY KEY", 1190 "UNIQUE", 1191 "WATERMARK", 1192 "BUCKET", 1193 "TRUNCATE", 1194 } 1195 1196 NO_PAREN_FUNCTION_PARSERS = { 1197 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1198 "CASE": lambda self: self._parse_case(), 1199 "CONNECT_BY_ROOT": lambda self: self.expression( 1200 exp.ConnectByRoot, this=self._parse_column() 1201 ), 1202 "IF": lambda self: self._parse_if(), 1203 } 1204 1205 INVALID_FUNC_NAME_TOKENS = { 1206 TokenType.IDENTIFIER, 1207 TokenType.STRING, 1208 } 1209 1210 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1211 1212 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1213 1214 FUNCTION_PARSERS = { 1215 **{ 1216 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1217 }, 1218 **{ 1219 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1220 }, 1221 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1222 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1223 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1224 "DECODE": lambda self: self._parse_decode(), 1225 "EXTRACT": lambda self: self._parse_extract(), 1226 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1227 "GAP_FILL": lambda self: self._parse_gap_fill(), 1228 "JSON_OBJECT": lambda self: self._parse_json_object(), 1229 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1230 "JSON_TABLE": lambda self: self._parse_json_table(), 1231 "MATCH": lambda self: self._parse_match_against(), 1232 "NORMALIZE": lambda self: self._parse_normalize(), 1233 "OPENJSON": lambda self: self._parse_open_json(), 1234 "OVERLAY": lambda self: self._parse_overlay(), 1235 "POSITION": lambda self: self._parse_position(), 1236 "PREDICT": lambda self: self._parse_predict(), 1237 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1238 "STRING_AGG": lambda self: self._parse_string_agg(), 1239 "SUBSTRING": lambda self: self._parse_substring(), 1240 "TRIM": lambda self: self._parse_trim(), 1241 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1242 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1243 "XMLELEMENT": lambda self: self.expression( 1244 exp.XMLElement, 1245 this=self._match_text_seq("NAME") and self._parse_id_var(), 1246 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1247 ), 1248 "XMLTABLE": lambda self: self._parse_xml_table(), 1249 } 1250 1251 QUERY_MODIFIER_PARSERS = { 1252 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1253 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1254 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1255 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1256 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1257 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1258 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1259 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1260 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1261 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1262 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1263 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1264 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1265 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.CLUSTER_BY: lambda self: ( 1268 "cluster", 1269 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1270 ), 1271 TokenType.DISTRIBUTE_BY: lambda self: ( 1272 "distribute", 1273 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1274 ), 1275 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1276 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1277 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1278 } 1279 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1280 1281 SET_PARSERS = { 1282 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1283 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1284 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1285 "TRANSACTION": lambda self: self._parse_set_transaction(), 1286 } 1287 1288 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1289 1290 TYPE_LITERAL_PARSERS = { 1291 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1292 } 1293 1294 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1295 1296 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1297 1298 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1299 1300 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1301 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1302 "ISOLATION": ( 1303 ("LEVEL", "REPEATABLE", "READ"), 1304 ("LEVEL", "READ", "COMMITTED"), 1305 ("LEVEL", "READ", "UNCOMITTED"), 1306 ("LEVEL", "SERIALIZABLE"), 1307 ), 1308 "READ": ("WRITE", "ONLY"), 1309 } 1310 1311 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1312 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1313 ) 1314 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1315 1316 CREATE_SEQUENCE: OPTIONS_TYPE = { 1317 "SCALE": ("EXTEND", "NOEXTEND"), 1318 "SHARD": ("EXTEND", "NOEXTEND"), 1319 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1320 **dict.fromkeys( 1321 ( 1322 "SESSION", 1323 "GLOBAL", 1324 "KEEP", 1325 "NOKEEP", 1326 "ORDER", 1327 "NOORDER", 1328 "NOCACHE", 1329 "CYCLE", 1330 "NOCYCLE", 1331 "NOMINVALUE", 1332 "NOMAXVALUE", 1333 "NOSCALE", 1334 "NOSHARD", 1335 ), 1336 tuple(), 1337 ), 1338 } 1339 1340 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1341 1342 USABLES: OPTIONS_TYPE = dict.fromkeys( 1343 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1344 ) 1345 1346 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1347 1348 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1349 "TYPE": ("EVOLUTION",), 1350 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1351 } 1352 1353 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1354 1355 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1356 1357 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1358 "NOT": ("ENFORCED",), 1359 "MATCH": ( 1360 "FULL", 1361 "PARTIAL", 1362 "SIMPLE", 1363 ), 1364 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1365 "USING": ( 1366 "BTREE", 1367 "HASH", 1368 ), 1369 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1370 } 1371 1372 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1373 "NO": ("OTHERS",), 1374 "CURRENT": ("ROW",), 1375 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1376 } 1377 1378 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1379 1380 CLONE_KEYWORDS = {"CLONE", "COPY"} 1381 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1382 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1383 1384 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1385 1386 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1387 1388 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1389 1390 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1391 1392 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1393 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1394 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1395 1396 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1397 1398 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1399 1400 ADD_CONSTRAINT_TOKENS = { 1401 TokenType.CONSTRAINT, 1402 TokenType.FOREIGN_KEY, 1403 TokenType.INDEX, 1404 TokenType.KEY, 1405 TokenType.PRIMARY_KEY, 1406 TokenType.UNIQUE, 1407 } 1408 1409 DISTINCT_TOKENS = {TokenType.DISTINCT} 1410 1411 NULL_TOKENS = {TokenType.NULL} 1412 1413 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1414 1415 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1416 1417 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1418 1419 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1420 1421 ODBC_DATETIME_LITERALS = { 1422 "d": exp.Date, 1423 "t": exp.Time, 1424 "ts": exp.Timestamp, 1425 } 1426 1427 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1428 1429 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1430 1431 # The style options for the DESCRIBE statement 1432 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1433 1434 # The style options for the ANALYZE statement 1435 ANALYZE_STYLES = { 1436 "BUFFER_USAGE_LIMIT", 1437 "FULL", 1438 "LOCAL", 1439 "NO_WRITE_TO_BINLOG", 1440 "SAMPLE", 1441 "SKIP_LOCKED", 1442 "VERBOSE", 1443 } 1444 1445 ANALYZE_EXPRESSION_PARSERS = { 1446 "ALL": lambda self: self._parse_analyze_columns(), 1447 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1448 "DELETE": lambda self: self._parse_analyze_delete(), 1449 "DROP": lambda self: self._parse_analyze_histogram(), 1450 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1451 "LIST": lambda self: self._parse_analyze_list(), 1452 "PREDICATE": lambda self: self._parse_analyze_columns(), 1453 "UPDATE": lambda self: self._parse_analyze_histogram(), 1454 "VALIDATE": lambda self: self._parse_analyze_validate(), 1455 } 1456 1457 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1458 1459 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1460 1461 OPERATION_MODIFIERS: t.Set[str] = set() 1462 1463 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1464 1465 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1466 1467 STRICT_CAST = True 1468 1469 PREFIXED_PIVOT_COLUMNS = False 1470 IDENTIFY_PIVOT_STRINGS = False 1471 1472 LOG_DEFAULTS_TO_LN = False 1473 1474 # Whether the table sample clause expects CSV syntax 1475 TABLESAMPLE_CSV = False 1476 1477 # The default method used for table sampling 1478 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1479 1480 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1481 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1482 1483 # Whether the TRIM function expects the characters to trim as its first argument 1484 TRIM_PATTERN_FIRST = False 1485 1486 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1487 STRING_ALIASES = False 1488 1489 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1490 MODIFIERS_ATTACHED_TO_SET_OP = True 1491 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1492 1493 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1494 NO_PAREN_IF_COMMANDS = True 1495 1496 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1497 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1498 1499 # Whether the `:` operator is used to extract a value from a VARIANT column 1500 COLON_IS_VARIANT_EXTRACT = False 1501 1502 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1503 # If this is True and '(' is not found, the keyword will be treated as an identifier 1504 VALUES_FOLLOWED_BY_PAREN = True 1505 1506 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1507 SUPPORTS_IMPLICIT_UNNEST = False 1508 1509 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1510 INTERVAL_SPANS = True 1511 1512 # Whether a PARTITION clause can follow a table reference 1513 SUPPORTS_PARTITION_SELECTION = False 1514 1515 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1516 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1517 1518 # Whether the 'AS' keyword is optional in the CTE definition syntax 1519 OPTIONAL_ALIAS_TOKEN_CTE = True 1520 1521 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1522 ALTER_RENAME_REQUIRES_COLUMN = True 1523 1524 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1525 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1526 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1527 # as BigQuery, where all joins have the same precedence. 1528 JOINS_HAVE_EQUAL_PRECEDENCE = False 1529 1530 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1531 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1532 1533 # Whether map literals support arbitrary expressions as keys. 1534 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1535 # When False, keys are typically restricted to identifiers. 1536 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1537 1538 __slots__ = ( 1539 "error_level", 1540 "error_message_context", 1541 "max_errors", 1542 "dialect", 1543 "sql", 1544 "errors", 1545 "_tokens", 1546 "_index", 1547 "_curr", 1548 "_next", 1549 "_prev", 1550 "_prev_comments", 1551 "_pipe_cte_counter", 1552 ) 1553 1554 # Autofilled 1555 SHOW_TRIE: t.Dict = {} 1556 SET_TRIE: t.Dict = {} 1557 1558 def __init__( 1559 self, 1560 error_level: t.Optional[ErrorLevel] = None, 1561 error_message_context: int = 100, 1562 max_errors: int = 3, 1563 dialect: DialectType = None, 1564 ): 1565 from sqlglot.dialects import Dialect 1566 1567 self.error_level = error_level or ErrorLevel.IMMEDIATE 1568 self.error_message_context = error_message_context 1569 self.max_errors = max_errors 1570 self.dialect = Dialect.get_or_raise(dialect) 1571 self.reset() 1572 1573 def reset(self): 1574 self.sql = "" 1575 self.errors = [] 1576 self._tokens = [] 1577 self._index = 0 1578 self._curr = None 1579 self._next = None 1580 self._prev = None 1581 self._prev_comments = None 1582 self._pipe_cte_counter = 0 1583 1584 def parse( 1585 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1586 ) -> t.List[t.Optional[exp.Expression]]: 1587 """ 1588 Parses a list of tokens and returns a list of syntax trees, one tree 1589 per parsed SQL statement. 1590 1591 Args: 1592 raw_tokens: The list of tokens. 1593 sql: The original SQL string, used to produce helpful debug messages. 1594 1595 Returns: 1596 The list of the produced syntax trees. 1597 """ 1598 return self._parse( 1599 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1600 ) 1601 1602 def parse_into( 1603 self, 1604 expression_types: exp.IntoType, 1605 raw_tokens: t.List[Token], 1606 sql: t.Optional[str] = None, 1607 ) -> t.List[t.Optional[exp.Expression]]: 1608 """ 1609 Parses a list of tokens into a given Expression type. If a collection of Expression 1610 types is given instead, this method will try to parse the token list into each one 1611 of them, stopping at the first for which the parsing succeeds. 1612 1613 Args: 1614 expression_types: The expression type(s) to try and parse the token list into. 1615 raw_tokens: The list of tokens. 1616 sql: The original SQL string, used to produce helpful debug messages. 1617 1618 Returns: 1619 The target Expression. 1620 """ 1621 errors = [] 1622 for expression_type in ensure_list(expression_types): 1623 parser = self.EXPRESSION_PARSERS.get(expression_type) 1624 if not parser: 1625 raise TypeError(f"No parser registered for {expression_type}") 1626 1627 try: 1628 return self._parse(parser, raw_tokens, sql) 1629 except ParseError as e: 1630 e.errors[0]["into_expression"] = expression_type 1631 errors.append(e) 1632 1633 raise ParseError( 1634 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1635 errors=merge_errors(errors), 1636 ) from errors[-1] 1637 1638 def _parse( 1639 self, 1640 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1641 raw_tokens: t.List[Token], 1642 sql: t.Optional[str] = None, 1643 ) -> t.List[t.Optional[exp.Expression]]: 1644 self.reset() 1645 self.sql = sql or "" 1646 1647 total = len(raw_tokens) 1648 chunks: t.List[t.List[Token]] = [[]] 1649 1650 for i, token in enumerate(raw_tokens): 1651 if token.token_type == TokenType.SEMICOLON: 1652 if token.comments: 1653 chunks.append([token]) 1654 1655 if i < total - 1: 1656 chunks.append([]) 1657 else: 1658 chunks[-1].append(token) 1659 1660 expressions = [] 1661 1662 for tokens in chunks: 1663 self._index = -1 1664 self._tokens = tokens 1665 self._advance() 1666 1667 expressions.append(parse_method(self)) 1668 1669 if self._index < len(self._tokens): 1670 self.raise_error("Invalid expression / Unexpected token") 1671 1672 self.check_errors() 1673 1674 return expressions 1675 1676 def check_errors(self) -> None: 1677 """Logs or raises any found errors, depending on the chosen error level setting.""" 1678 if self.error_level == ErrorLevel.WARN: 1679 for error in self.errors: 1680 logger.error(str(error)) 1681 elif self.error_level == ErrorLevel.RAISE and self.errors: 1682 raise ParseError( 1683 concat_messages(self.errors, self.max_errors), 1684 errors=merge_errors(self.errors), 1685 ) 1686 1687 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1688 """ 1689 Appends an error in the list of recorded errors or raises it, depending on the chosen 1690 error level setting. 1691 """ 1692 token = token or self._curr or self._prev or Token.string("") 1693 start = token.start 1694 end = token.end + 1 1695 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1696 highlight = self.sql[start:end] 1697 end_context = self.sql[end : end + self.error_message_context] 1698 1699 error = ParseError.new( 1700 f"{message}. Line {token.line}, Col: {token.col}.\n" 1701 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1702 description=message, 1703 line=token.line, 1704 col=token.col, 1705 start_context=start_context, 1706 highlight=highlight, 1707 end_context=end_context, 1708 ) 1709 1710 if self.error_level == ErrorLevel.IMMEDIATE: 1711 raise error 1712 1713 self.errors.append(error) 1714 1715 def expression( 1716 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1717 ) -> E: 1718 """ 1719 Creates a new, validated Expression. 1720 1721 Args: 1722 exp_class: The expression class to instantiate. 1723 comments: An optional list of comments to attach to the expression. 1724 kwargs: The arguments to set for the expression along with their respective values. 1725 1726 Returns: 1727 The target expression. 1728 """ 1729 instance = exp_class(**kwargs) 1730 instance.add_comments(comments) if comments else self._add_comments(instance) 1731 return self.validate_expression(instance) 1732 1733 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1734 if expression and self._prev_comments: 1735 expression.add_comments(self._prev_comments) 1736 self._prev_comments = None 1737 1738 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1739 """ 1740 Validates an Expression, making sure that all its mandatory arguments are set. 1741 1742 Args: 1743 expression: The expression to validate. 1744 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1745 1746 Returns: 1747 The validated expression. 1748 """ 1749 if self.error_level != ErrorLevel.IGNORE: 1750 for error_message in expression.error_messages(args): 1751 self.raise_error(error_message) 1752 1753 return expression 1754 1755 def _find_sql(self, start: Token, end: Token) -> str: 1756 return self.sql[start.start : end.end + 1] 1757 1758 def _is_connected(self) -> bool: 1759 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1760 1761 def _advance(self, times: int = 1) -> None: 1762 self._index += times 1763 self._curr = seq_get(self._tokens, self._index) 1764 self._next = seq_get(self._tokens, self._index + 1) 1765 1766 if self._index > 0: 1767 self._prev = self._tokens[self._index - 1] 1768 self._prev_comments = self._prev.comments 1769 else: 1770 self._prev = None 1771 self._prev_comments = None 1772 1773 def _retreat(self, index: int) -> None: 1774 if index != self._index: 1775 self._advance(index - self._index) 1776 1777 def _warn_unsupported(self) -> None: 1778 if len(self._tokens) <= 1: 1779 return 1780 1781 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1782 # interested in emitting a warning for the one being currently processed. 1783 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1784 1785 logger.warning( 1786 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1787 ) 1788 1789 def _parse_command(self) -> exp.Command: 1790 self._warn_unsupported() 1791 return self.expression( 1792 exp.Command, 1793 comments=self._prev_comments, 1794 this=self._prev.text.upper(), 1795 expression=self._parse_string(), 1796 ) 1797 1798 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1799 """ 1800 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1801 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1802 solve this by setting & resetting the parser state accordingly 1803 """ 1804 index = self._index 1805 error_level = self.error_level 1806 1807 self.error_level = ErrorLevel.IMMEDIATE 1808 try: 1809 this = parse_method() 1810 except ParseError: 1811 this = None 1812 finally: 1813 if not this or retreat: 1814 self._retreat(index) 1815 self.error_level = error_level 1816 1817 return this 1818 1819 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1820 start = self._prev 1821 exists = self._parse_exists() if allow_exists else None 1822 1823 self._match(TokenType.ON) 1824 1825 materialized = self._match_text_seq("MATERIALIZED") 1826 kind = self._match_set(self.CREATABLES) and self._prev 1827 if not kind: 1828 return self._parse_as_command(start) 1829 1830 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1831 this = self._parse_user_defined_function(kind=kind.token_type) 1832 elif kind.token_type == TokenType.TABLE: 1833 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1834 elif kind.token_type == TokenType.COLUMN: 1835 this = self._parse_column() 1836 else: 1837 this = self._parse_id_var() 1838 1839 self._match(TokenType.IS) 1840 1841 return self.expression( 1842 exp.Comment, 1843 this=this, 1844 kind=kind.text, 1845 expression=self._parse_string(), 1846 exists=exists, 1847 materialized=materialized, 1848 ) 1849 1850 def _parse_to_table( 1851 self, 1852 ) -> exp.ToTableProperty: 1853 table = self._parse_table_parts(schema=True) 1854 return self.expression(exp.ToTableProperty, this=table) 1855 1856 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1857 def _parse_ttl(self) -> exp.Expression: 1858 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1859 this = self._parse_bitwise() 1860 1861 if self._match_text_seq("DELETE"): 1862 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1863 if self._match_text_seq("RECOMPRESS"): 1864 return self.expression( 1865 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1866 ) 1867 if self._match_text_seq("TO", "DISK"): 1868 return self.expression( 1869 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1870 ) 1871 if self._match_text_seq("TO", "VOLUME"): 1872 return self.expression( 1873 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1874 ) 1875 1876 return this 1877 1878 expressions = self._parse_csv(_parse_ttl_action) 1879 where = self._parse_where() 1880 group = self._parse_group() 1881 1882 aggregates = None 1883 if group and self._match(TokenType.SET): 1884 aggregates = self._parse_csv(self._parse_set_item) 1885 1886 return self.expression( 1887 exp.MergeTreeTTL, 1888 expressions=expressions, 1889 where=where, 1890 group=group, 1891 aggregates=aggregates, 1892 ) 1893 1894 def _parse_statement(self) -> t.Optional[exp.Expression]: 1895 if self._curr is None: 1896 return None 1897 1898 if self._match_set(self.STATEMENT_PARSERS): 1899 comments = self._prev_comments 1900 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1901 stmt.add_comments(comments, prepend=True) 1902 return stmt 1903 1904 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1905 return self._parse_command() 1906 1907 expression = self._parse_expression() 1908 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1909 return self._parse_query_modifiers(expression) 1910 1911 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1912 start = self._prev 1913 temporary = self._match(TokenType.TEMPORARY) 1914 materialized = self._match_text_seq("MATERIALIZED") 1915 1916 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1917 if not kind: 1918 return self._parse_as_command(start) 1919 1920 concurrently = self._match_text_seq("CONCURRENTLY") 1921 if_exists = exists or self._parse_exists() 1922 1923 if kind == "COLUMN": 1924 this = self._parse_column() 1925 else: 1926 this = self._parse_table_parts( 1927 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1928 ) 1929 1930 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1931 1932 if self._match(TokenType.L_PAREN, advance=False): 1933 expressions = self._parse_wrapped_csv(self._parse_types) 1934 else: 1935 expressions = None 1936 1937 return self.expression( 1938 exp.Drop, 1939 exists=if_exists, 1940 this=this, 1941 expressions=expressions, 1942 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1943 temporary=temporary, 1944 materialized=materialized, 1945 cascade=self._match_text_seq("CASCADE"), 1946 constraints=self._match_text_seq("CONSTRAINTS"), 1947 purge=self._match_text_seq("PURGE"), 1948 cluster=cluster, 1949 concurrently=concurrently, 1950 ) 1951 1952 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1953 return ( 1954 self._match_text_seq("IF") 1955 and (not not_ or self._match(TokenType.NOT)) 1956 and self._match(TokenType.EXISTS) 1957 ) 1958 1959 def _parse_create(self) -> exp.Create | exp.Command: 1960 # Note: this can't be None because we've matched a statement parser 1961 start = self._prev 1962 1963 replace = ( 1964 start.token_type == TokenType.REPLACE 1965 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1966 or self._match_pair(TokenType.OR, TokenType.ALTER) 1967 ) 1968 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1969 1970 unique = self._match(TokenType.UNIQUE) 1971 1972 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1973 clustered = True 1974 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1975 "COLUMNSTORE" 1976 ): 1977 clustered = False 1978 else: 1979 clustered = None 1980 1981 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1982 self._advance() 1983 1984 properties = None 1985 create_token = self._match_set(self.CREATABLES) and self._prev 1986 1987 if not create_token: 1988 # exp.Properties.Location.POST_CREATE 1989 properties = self._parse_properties() 1990 create_token = self._match_set(self.CREATABLES) and self._prev 1991 1992 if not properties or not create_token: 1993 return self._parse_as_command(start) 1994 1995 concurrently = self._match_text_seq("CONCURRENTLY") 1996 exists = self._parse_exists(not_=True) 1997 this = None 1998 expression: t.Optional[exp.Expression] = None 1999 indexes = None 2000 no_schema_binding = None 2001 begin = None 2002 end = None 2003 clone = None 2004 2005 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2006 nonlocal properties 2007 if properties and temp_props: 2008 properties.expressions.extend(temp_props.expressions) 2009 elif temp_props: 2010 properties = temp_props 2011 2012 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2013 this = self._parse_user_defined_function(kind=create_token.token_type) 2014 2015 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2016 extend_props(self._parse_properties()) 2017 2018 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2019 extend_props(self._parse_properties()) 2020 2021 if not expression: 2022 if self._match(TokenType.COMMAND): 2023 expression = self._parse_as_command(self._prev) 2024 else: 2025 begin = self._match(TokenType.BEGIN) 2026 return_ = self._match_text_seq("RETURN") 2027 2028 if self._match(TokenType.STRING, advance=False): 2029 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2030 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2031 expression = self._parse_string() 2032 extend_props(self._parse_properties()) 2033 else: 2034 expression = self._parse_user_defined_function_expression() 2035 2036 end = self._match_text_seq("END") 2037 2038 if return_: 2039 expression = self.expression(exp.Return, this=expression) 2040 elif create_token.token_type == TokenType.INDEX: 2041 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2042 if not self._match(TokenType.ON): 2043 index = self._parse_id_var() 2044 anonymous = False 2045 else: 2046 index = None 2047 anonymous = True 2048 2049 this = self._parse_index(index=index, anonymous=anonymous) 2050 elif create_token.token_type in self.DB_CREATABLES: 2051 table_parts = self._parse_table_parts( 2052 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2053 ) 2054 2055 # exp.Properties.Location.POST_NAME 2056 self._match(TokenType.COMMA) 2057 extend_props(self._parse_properties(before=True)) 2058 2059 this = self._parse_schema(this=table_parts) 2060 2061 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2062 extend_props(self._parse_properties()) 2063 2064 has_alias = self._match(TokenType.ALIAS) 2065 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2066 # exp.Properties.Location.POST_ALIAS 2067 extend_props(self._parse_properties()) 2068 2069 if create_token.token_type == TokenType.SEQUENCE: 2070 expression = self._parse_types() 2071 extend_props(self._parse_properties()) 2072 else: 2073 expression = self._parse_ddl_select() 2074 2075 # Some dialects also support using a table as an alias instead of a SELECT. 2076 # Here we fallback to this as an alternative. 2077 if not expression and has_alias: 2078 expression = self._try_parse(self._parse_table_parts) 2079 2080 if create_token.token_type == TokenType.TABLE: 2081 # exp.Properties.Location.POST_EXPRESSION 2082 extend_props(self._parse_properties()) 2083 2084 indexes = [] 2085 while True: 2086 index = self._parse_index() 2087 2088 # exp.Properties.Location.POST_INDEX 2089 extend_props(self._parse_properties()) 2090 if not index: 2091 break 2092 else: 2093 self._match(TokenType.COMMA) 2094 indexes.append(index) 2095 elif create_token.token_type == TokenType.VIEW: 2096 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2097 no_schema_binding = True 2098 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2099 extend_props(self._parse_properties()) 2100 2101 shallow = self._match_text_seq("SHALLOW") 2102 2103 if self._match_texts(self.CLONE_KEYWORDS): 2104 copy = self._prev.text.lower() == "copy" 2105 clone = self.expression( 2106 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2107 ) 2108 2109 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2110 return self._parse_as_command(start) 2111 2112 create_kind_text = create_token.text.upper() 2113 return self.expression( 2114 exp.Create, 2115 this=this, 2116 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2117 replace=replace, 2118 refresh=refresh, 2119 unique=unique, 2120 expression=expression, 2121 exists=exists, 2122 properties=properties, 2123 indexes=indexes, 2124 no_schema_binding=no_schema_binding, 2125 begin=begin, 2126 end=end, 2127 clone=clone, 2128 concurrently=concurrently, 2129 clustered=clustered, 2130 ) 2131 2132 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2133 seq = exp.SequenceProperties() 2134 2135 options = [] 2136 index = self._index 2137 2138 while self._curr: 2139 self._match(TokenType.COMMA) 2140 if self._match_text_seq("INCREMENT"): 2141 self._match_text_seq("BY") 2142 self._match_text_seq("=") 2143 seq.set("increment", self._parse_term()) 2144 elif self._match_text_seq("MINVALUE"): 2145 seq.set("minvalue", self._parse_term()) 2146 elif self._match_text_seq("MAXVALUE"): 2147 seq.set("maxvalue", self._parse_term()) 2148 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2149 self._match_text_seq("=") 2150 seq.set("start", self._parse_term()) 2151 elif self._match_text_seq("CACHE"): 2152 # T-SQL allows empty CACHE which is initialized dynamically 2153 seq.set("cache", self._parse_number() or True) 2154 elif self._match_text_seq("OWNED", "BY"): 2155 # "OWNED BY NONE" is the default 2156 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2157 else: 2158 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2159 if opt: 2160 options.append(opt) 2161 else: 2162 break 2163 2164 seq.set("options", options if options else None) 2165 return None if self._index == index else seq 2166 2167 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2168 # only used for teradata currently 2169 self._match(TokenType.COMMA) 2170 2171 kwargs = { 2172 "no": self._match_text_seq("NO"), 2173 "dual": self._match_text_seq("DUAL"), 2174 "before": self._match_text_seq("BEFORE"), 2175 "default": self._match_text_seq("DEFAULT"), 2176 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2177 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2178 "after": self._match_text_seq("AFTER"), 2179 "minimum": self._match_texts(("MIN", "MINIMUM")), 2180 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2181 } 2182 2183 if self._match_texts(self.PROPERTY_PARSERS): 2184 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2185 try: 2186 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2187 except TypeError: 2188 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2189 2190 return None 2191 2192 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2193 return self._parse_wrapped_csv(self._parse_property) 2194 2195 def _parse_property(self) -> t.Optional[exp.Expression]: 2196 if self._match_texts(self.PROPERTY_PARSERS): 2197 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2198 2199 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2200 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2201 2202 if self._match_text_seq("COMPOUND", "SORTKEY"): 2203 return self._parse_sortkey(compound=True) 2204 2205 if self._match_text_seq("SQL", "SECURITY"): 2206 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2207 2208 index = self._index 2209 key = self._parse_column() 2210 2211 if not self._match(TokenType.EQ): 2212 self._retreat(index) 2213 return self._parse_sequence_properties() 2214 2215 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2216 if isinstance(key, exp.Column): 2217 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2218 2219 value = self._parse_bitwise() or self._parse_var(any_token=True) 2220 2221 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2222 if isinstance(value, exp.Column): 2223 value = exp.var(value.name) 2224 2225 return self.expression(exp.Property, this=key, value=value) 2226 2227 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2228 if self._match_text_seq("BY"): 2229 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2230 2231 self._match(TokenType.ALIAS) 2232 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2233 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2234 2235 return self.expression( 2236 exp.FileFormatProperty, 2237 this=( 2238 self.expression( 2239 exp.InputOutputFormat, 2240 input_format=input_format, 2241 output_format=output_format, 2242 ) 2243 if input_format or output_format 2244 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2245 ), 2246 hive_format=True, 2247 ) 2248 2249 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2250 field = self._parse_field() 2251 if isinstance(field, exp.Identifier) and not field.quoted: 2252 field = exp.var(field) 2253 2254 return field 2255 2256 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2257 self._match(TokenType.EQ) 2258 self._match(TokenType.ALIAS) 2259 2260 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2261 2262 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2263 properties = [] 2264 while True: 2265 if before: 2266 prop = self._parse_property_before() 2267 else: 2268 prop = self._parse_property() 2269 if not prop: 2270 break 2271 for p in ensure_list(prop): 2272 properties.append(p) 2273 2274 if properties: 2275 return self.expression(exp.Properties, expressions=properties) 2276 2277 return None 2278 2279 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2280 return self.expression( 2281 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2282 ) 2283 2284 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2285 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2286 security_specifier = self._prev.text.upper() 2287 return self.expression(exp.SecurityProperty, this=security_specifier) 2288 return None 2289 2290 def _parse_settings_property(self) -> exp.SettingsProperty: 2291 return self.expression( 2292 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2293 ) 2294 2295 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2296 if self._index >= 2: 2297 pre_volatile_token = self._tokens[self._index - 2] 2298 else: 2299 pre_volatile_token = None 2300 2301 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2302 return exp.VolatileProperty() 2303 2304 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2305 2306 def _parse_retention_period(self) -> exp.Var: 2307 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2308 number = self._parse_number() 2309 number_str = f"{number} " if number else "" 2310 unit = self._parse_var(any_token=True) 2311 return exp.var(f"{number_str}{unit}") 2312 2313 def _parse_system_versioning_property( 2314 self, with_: bool = False 2315 ) -> exp.WithSystemVersioningProperty: 2316 self._match(TokenType.EQ) 2317 prop = self.expression( 2318 exp.WithSystemVersioningProperty, 2319 **{ # type: ignore 2320 "on": True, 2321 "with": with_, 2322 }, 2323 ) 2324 2325 if self._match_text_seq("OFF"): 2326 prop.set("on", False) 2327 return prop 2328 2329 self._match(TokenType.ON) 2330 if self._match(TokenType.L_PAREN): 2331 while self._curr and not self._match(TokenType.R_PAREN): 2332 if self._match_text_seq("HISTORY_TABLE", "="): 2333 prop.set("this", self._parse_table_parts()) 2334 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2335 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2336 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2337 prop.set("retention_period", self._parse_retention_period()) 2338 2339 self._match(TokenType.COMMA) 2340 2341 return prop 2342 2343 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2344 self._match(TokenType.EQ) 2345 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2346 prop = self.expression(exp.DataDeletionProperty, on=on) 2347 2348 if self._match(TokenType.L_PAREN): 2349 while self._curr and not self._match(TokenType.R_PAREN): 2350 if self._match_text_seq("FILTER_COLUMN", "="): 2351 prop.set("filter_column", self._parse_column()) 2352 elif self._match_text_seq("RETENTION_PERIOD", "="): 2353 prop.set("retention_period", self._parse_retention_period()) 2354 2355 self._match(TokenType.COMMA) 2356 2357 return prop 2358 2359 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2360 kind = "HASH" 2361 expressions: t.Optional[t.List[exp.Expression]] = None 2362 if self._match_text_seq("BY", "HASH"): 2363 expressions = self._parse_wrapped_csv(self._parse_id_var) 2364 elif self._match_text_seq("BY", "RANDOM"): 2365 kind = "RANDOM" 2366 2367 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2368 buckets: t.Optional[exp.Expression] = None 2369 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2370 buckets = self._parse_number() 2371 2372 return self.expression( 2373 exp.DistributedByProperty, 2374 expressions=expressions, 2375 kind=kind, 2376 buckets=buckets, 2377 order=self._parse_order(), 2378 ) 2379 2380 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2381 self._match_text_seq("KEY") 2382 expressions = self._parse_wrapped_id_vars() 2383 return self.expression(expr_type, expressions=expressions) 2384 2385 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2386 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2387 prop = self._parse_system_versioning_property(with_=True) 2388 self._match_r_paren() 2389 return prop 2390 2391 if self._match(TokenType.L_PAREN, advance=False): 2392 return self._parse_wrapped_properties() 2393 2394 if self._match_text_seq("JOURNAL"): 2395 return self._parse_withjournaltable() 2396 2397 if self._match_texts(self.VIEW_ATTRIBUTES): 2398 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2399 2400 if self._match_text_seq("DATA"): 2401 return self._parse_withdata(no=False) 2402 elif self._match_text_seq("NO", "DATA"): 2403 return self._parse_withdata(no=True) 2404 2405 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2406 return self._parse_serde_properties(with_=True) 2407 2408 if self._match(TokenType.SCHEMA): 2409 return self.expression( 2410 exp.WithSchemaBindingProperty, 2411 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2412 ) 2413 2414 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2415 return self.expression( 2416 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2417 ) 2418 2419 if not self._next: 2420 return None 2421 2422 return self._parse_withisolatedloading() 2423 2424 def _parse_procedure_option(self) -> exp.Expression | None: 2425 if self._match_text_seq("EXECUTE", "AS"): 2426 return self.expression( 2427 exp.ExecuteAsProperty, 2428 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2429 or self._parse_string(), 2430 ) 2431 2432 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2433 2434 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2435 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2436 self._match(TokenType.EQ) 2437 2438 user = self._parse_id_var() 2439 self._match(TokenType.PARAMETER) 2440 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2441 2442 if not user or not host: 2443 return None 2444 2445 return exp.DefinerProperty(this=f"{user}@{host}") 2446 2447 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2448 self._match(TokenType.TABLE) 2449 self._match(TokenType.EQ) 2450 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2451 2452 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2453 return self.expression(exp.LogProperty, no=no) 2454 2455 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2456 return self.expression(exp.JournalProperty, **kwargs) 2457 2458 def _parse_checksum(self) -> exp.ChecksumProperty: 2459 self._match(TokenType.EQ) 2460 2461 on = None 2462 if self._match(TokenType.ON): 2463 on = True 2464 elif self._match_text_seq("OFF"): 2465 on = False 2466 2467 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2468 2469 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2470 return self.expression( 2471 exp.Cluster, 2472 expressions=( 2473 self._parse_wrapped_csv(self._parse_ordered) 2474 if wrapped 2475 else self._parse_csv(self._parse_ordered) 2476 ), 2477 ) 2478 2479 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2480 self._match_text_seq("BY") 2481 2482 self._match_l_paren() 2483 expressions = self._parse_csv(self._parse_column) 2484 self._match_r_paren() 2485 2486 if self._match_text_seq("SORTED", "BY"): 2487 self._match_l_paren() 2488 sorted_by = self._parse_csv(self._parse_ordered) 2489 self._match_r_paren() 2490 else: 2491 sorted_by = None 2492 2493 self._match(TokenType.INTO) 2494 buckets = self._parse_number() 2495 self._match_text_seq("BUCKETS") 2496 2497 return self.expression( 2498 exp.ClusteredByProperty, 2499 expressions=expressions, 2500 sorted_by=sorted_by, 2501 buckets=buckets, 2502 ) 2503 2504 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2505 if not self._match_text_seq("GRANTS"): 2506 self._retreat(self._index - 1) 2507 return None 2508 2509 return self.expression(exp.CopyGrantsProperty) 2510 2511 def _parse_freespace(self) -> exp.FreespaceProperty: 2512 self._match(TokenType.EQ) 2513 return self.expression( 2514 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2515 ) 2516 2517 def _parse_mergeblockratio( 2518 self, no: bool = False, default: bool = False 2519 ) -> exp.MergeBlockRatioProperty: 2520 if self._match(TokenType.EQ): 2521 return self.expression( 2522 exp.MergeBlockRatioProperty, 2523 this=self._parse_number(), 2524 percent=self._match(TokenType.PERCENT), 2525 ) 2526 2527 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2528 2529 def _parse_datablocksize( 2530 self, 2531 default: t.Optional[bool] = None, 2532 minimum: t.Optional[bool] = None, 2533 maximum: t.Optional[bool] = None, 2534 ) -> exp.DataBlocksizeProperty: 2535 self._match(TokenType.EQ) 2536 size = self._parse_number() 2537 2538 units = None 2539 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2540 units = self._prev.text 2541 2542 return self.expression( 2543 exp.DataBlocksizeProperty, 2544 size=size, 2545 units=units, 2546 default=default, 2547 minimum=minimum, 2548 maximum=maximum, 2549 ) 2550 2551 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2552 self._match(TokenType.EQ) 2553 always = self._match_text_seq("ALWAYS") 2554 manual = self._match_text_seq("MANUAL") 2555 never = self._match_text_seq("NEVER") 2556 default = self._match_text_seq("DEFAULT") 2557 2558 autotemp = None 2559 if self._match_text_seq("AUTOTEMP"): 2560 autotemp = self._parse_schema() 2561 2562 return self.expression( 2563 exp.BlockCompressionProperty, 2564 always=always, 2565 manual=manual, 2566 never=never, 2567 default=default, 2568 autotemp=autotemp, 2569 ) 2570 2571 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2572 index = self._index 2573 no = self._match_text_seq("NO") 2574 concurrent = self._match_text_seq("CONCURRENT") 2575 2576 if not self._match_text_seq("ISOLATED", "LOADING"): 2577 self._retreat(index) 2578 return None 2579 2580 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2581 return self.expression( 2582 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2583 ) 2584 2585 def _parse_locking(self) -> exp.LockingProperty: 2586 if self._match(TokenType.TABLE): 2587 kind = "TABLE" 2588 elif self._match(TokenType.VIEW): 2589 kind = "VIEW" 2590 elif self._match(TokenType.ROW): 2591 kind = "ROW" 2592 elif self._match_text_seq("DATABASE"): 2593 kind = "DATABASE" 2594 else: 2595 kind = None 2596 2597 if kind in ("DATABASE", "TABLE", "VIEW"): 2598 this = self._parse_table_parts() 2599 else: 2600 this = None 2601 2602 if self._match(TokenType.FOR): 2603 for_or_in = "FOR" 2604 elif self._match(TokenType.IN): 2605 for_or_in = "IN" 2606 else: 2607 for_or_in = None 2608 2609 if self._match_text_seq("ACCESS"): 2610 lock_type = "ACCESS" 2611 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2612 lock_type = "EXCLUSIVE" 2613 elif self._match_text_seq("SHARE"): 2614 lock_type = "SHARE" 2615 elif self._match_text_seq("READ"): 2616 lock_type = "READ" 2617 elif self._match_text_seq("WRITE"): 2618 lock_type = "WRITE" 2619 elif self._match_text_seq("CHECKSUM"): 2620 lock_type = "CHECKSUM" 2621 else: 2622 lock_type = None 2623 2624 override = self._match_text_seq("OVERRIDE") 2625 2626 return self.expression( 2627 exp.LockingProperty, 2628 this=this, 2629 kind=kind, 2630 for_or_in=for_or_in, 2631 lock_type=lock_type, 2632 override=override, 2633 ) 2634 2635 def _parse_partition_by(self) -> t.List[exp.Expression]: 2636 if self._match(TokenType.PARTITION_BY): 2637 return self._parse_csv(self._parse_assignment) 2638 return [] 2639 2640 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2641 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2642 if self._match_text_seq("MINVALUE"): 2643 return exp.var("MINVALUE") 2644 if self._match_text_seq("MAXVALUE"): 2645 return exp.var("MAXVALUE") 2646 return self._parse_bitwise() 2647 2648 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2649 expression = None 2650 from_expressions = None 2651 to_expressions = None 2652 2653 if self._match(TokenType.IN): 2654 this = self._parse_wrapped_csv(self._parse_bitwise) 2655 elif self._match(TokenType.FROM): 2656 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2657 self._match_text_seq("TO") 2658 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2659 elif self._match_text_seq("WITH", "(", "MODULUS"): 2660 this = self._parse_number() 2661 self._match_text_seq(",", "REMAINDER") 2662 expression = self._parse_number() 2663 self._match_r_paren() 2664 else: 2665 self.raise_error("Failed to parse partition bound spec.") 2666 2667 return self.expression( 2668 exp.PartitionBoundSpec, 2669 this=this, 2670 expression=expression, 2671 from_expressions=from_expressions, 2672 to_expressions=to_expressions, 2673 ) 2674 2675 # https://www.postgresql.org/docs/current/sql-createtable.html 2676 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2677 if not self._match_text_seq("OF"): 2678 self._retreat(self._index - 1) 2679 return None 2680 2681 this = self._parse_table(schema=True) 2682 2683 if self._match(TokenType.DEFAULT): 2684 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2685 elif self._match_text_seq("FOR", "VALUES"): 2686 expression = self._parse_partition_bound_spec() 2687 else: 2688 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2689 2690 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2691 2692 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2693 self._match(TokenType.EQ) 2694 return self.expression( 2695 exp.PartitionedByProperty, 2696 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2697 ) 2698 2699 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2700 if self._match_text_seq("AND", "STATISTICS"): 2701 statistics = True 2702 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2703 statistics = False 2704 else: 2705 statistics = None 2706 2707 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2708 2709 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2710 if self._match_text_seq("SQL"): 2711 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2712 return None 2713 2714 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2715 if self._match_text_seq("SQL", "DATA"): 2716 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2717 return None 2718 2719 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2720 if self._match_text_seq("PRIMARY", "INDEX"): 2721 return exp.NoPrimaryIndexProperty() 2722 if self._match_text_seq("SQL"): 2723 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2724 return None 2725 2726 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2727 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2728 return exp.OnCommitProperty() 2729 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2730 return exp.OnCommitProperty(delete=True) 2731 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2732 2733 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2734 if self._match_text_seq("SQL", "DATA"): 2735 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2736 return None 2737 2738 def _parse_distkey(self) -> exp.DistKeyProperty: 2739 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2740 2741 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2742 table = self._parse_table(schema=True) 2743 2744 options = [] 2745 while self._match_texts(("INCLUDING", "EXCLUDING")): 2746 this = self._prev.text.upper() 2747 2748 id_var = self._parse_id_var() 2749 if not id_var: 2750 return None 2751 2752 options.append( 2753 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2754 ) 2755 2756 return self.expression(exp.LikeProperty, this=table, expressions=options) 2757 2758 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2759 return self.expression( 2760 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2761 ) 2762 2763 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2764 self._match(TokenType.EQ) 2765 return self.expression( 2766 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2767 ) 2768 2769 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2770 self._match_text_seq("WITH", "CONNECTION") 2771 return self.expression( 2772 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2773 ) 2774 2775 def _parse_returns(self) -> exp.ReturnsProperty: 2776 value: t.Optional[exp.Expression] 2777 null = None 2778 is_table = self._match(TokenType.TABLE) 2779 2780 if is_table: 2781 if self._match(TokenType.LT): 2782 value = self.expression( 2783 exp.Schema, 2784 this="TABLE", 2785 expressions=self._parse_csv(self._parse_struct_types), 2786 ) 2787 if not self._match(TokenType.GT): 2788 self.raise_error("Expecting >") 2789 else: 2790 value = self._parse_schema(exp.var("TABLE")) 2791 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2792 null = True 2793 value = None 2794 else: 2795 value = self._parse_types() 2796 2797 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2798 2799 def _parse_describe(self) -> exp.Describe: 2800 kind = self._match_set(self.CREATABLES) and self._prev.text 2801 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2802 if self._match(TokenType.DOT): 2803 style = None 2804 self._retreat(self._index - 2) 2805 2806 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2807 2808 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2809 this = self._parse_statement() 2810 else: 2811 this = self._parse_table(schema=True) 2812 2813 properties = self._parse_properties() 2814 expressions = properties.expressions if properties else None 2815 partition = self._parse_partition() 2816 return self.expression( 2817 exp.Describe, 2818 this=this, 2819 style=style, 2820 kind=kind, 2821 expressions=expressions, 2822 partition=partition, 2823 format=format, 2824 ) 2825 2826 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2827 kind = self._prev.text.upper() 2828 expressions = [] 2829 2830 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2831 if self._match(TokenType.WHEN): 2832 expression = self._parse_disjunction() 2833 self._match(TokenType.THEN) 2834 else: 2835 expression = None 2836 2837 else_ = self._match(TokenType.ELSE) 2838 2839 if not self._match(TokenType.INTO): 2840 return None 2841 2842 return self.expression( 2843 exp.ConditionalInsert, 2844 this=self.expression( 2845 exp.Insert, 2846 this=self._parse_table(schema=True), 2847 expression=self._parse_derived_table_values(), 2848 ), 2849 expression=expression, 2850 else_=else_, 2851 ) 2852 2853 expression = parse_conditional_insert() 2854 while expression is not None: 2855 expressions.append(expression) 2856 expression = parse_conditional_insert() 2857 2858 return self.expression( 2859 exp.MultitableInserts, 2860 kind=kind, 2861 comments=comments, 2862 expressions=expressions, 2863 source=self._parse_table(), 2864 ) 2865 2866 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2867 comments = [] 2868 hint = self._parse_hint() 2869 overwrite = self._match(TokenType.OVERWRITE) 2870 ignore = self._match(TokenType.IGNORE) 2871 local = self._match_text_seq("LOCAL") 2872 alternative = None 2873 is_function = None 2874 2875 if self._match_text_seq("DIRECTORY"): 2876 this: t.Optional[exp.Expression] = self.expression( 2877 exp.Directory, 2878 this=self._parse_var_or_string(), 2879 local=local, 2880 row_format=self._parse_row_format(match_row=True), 2881 ) 2882 else: 2883 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2884 comments += ensure_list(self._prev_comments) 2885 return self._parse_multitable_inserts(comments) 2886 2887 if self._match(TokenType.OR): 2888 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2889 2890 self._match(TokenType.INTO) 2891 comments += ensure_list(self._prev_comments) 2892 self._match(TokenType.TABLE) 2893 is_function = self._match(TokenType.FUNCTION) 2894 2895 this = ( 2896 self._parse_table(schema=True, parse_partition=True) 2897 if not is_function 2898 else self._parse_function() 2899 ) 2900 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2901 this.set("alias", self._parse_table_alias()) 2902 2903 returning = self._parse_returning() 2904 2905 return self.expression( 2906 exp.Insert, 2907 comments=comments, 2908 hint=hint, 2909 is_function=is_function, 2910 this=this, 2911 stored=self._match_text_seq("STORED") and self._parse_stored(), 2912 by_name=self._match_text_seq("BY", "NAME"), 2913 exists=self._parse_exists(), 2914 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2915 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2916 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2917 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2918 conflict=self._parse_on_conflict(), 2919 returning=returning or self._parse_returning(), 2920 overwrite=overwrite, 2921 alternative=alternative, 2922 ignore=ignore, 2923 source=self._match(TokenType.TABLE) and self._parse_table(), 2924 ) 2925 2926 def _parse_kill(self) -> exp.Kill: 2927 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2928 2929 return self.expression( 2930 exp.Kill, 2931 this=self._parse_primary(), 2932 kind=kind, 2933 ) 2934 2935 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2936 conflict = self._match_text_seq("ON", "CONFLICT") 2937 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2938 2939 if not conflict and not duplicate: 2940 return None 2941 2942 conflict_keys = None 2943 constraint = None 2944 2945 if conflict: 2946 if self._match_text_seq("ON", "CONSTRAINT"): 2947 constraint = self._parse_id_var() 2948 elif self._match(TokenType.L_PAREN): 2949 conflict_keys = self._parse_csv(self._parse_id_var) 2950 self._match_r_paren() 2951 2952 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2953 if self._prev.token_type == TokenType.UPDATE: 2954 self._match(TokenType.SET) 2955 expressions = self._parse_csv(self._parse_equality) 2956 else: 2957 expressions = None 2958 2959 return self.expression( 2960 exp.OnConflict, 2961 duplicate=duplicate, 2962 expressions=expressions, 2963 action=action, 2964 conflict_keys=conflict_keys, 2965 constraint=constraint, 2966 where=self._parse_where(), 2967 ) 2968 2969 def _parse_returning(self) -> t.Optional[exp.Returning]: 2970 if not self._match(TokenType.RETURNING): 2971 return None 2972 return self.expression( 2973 exp.Returning, 2974 expressions=self._parse_csv(self._parse_expression), 2975 into=self._match(TokenType.INTO) and self._parse_table_part(), 2976 ) 2977 2978 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2979 if not self._match(TokenType.FORMAT): 2980 return None 2981 return self._parse_row_format() 2982 2983 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2984 index = self._index 2985 with_ = with_ or self._match_text_seq("WITH") 2986 2987 if not self._match(TokenType.SERDE_PROPERTIES): 2988 self._retreat(index) 2989 return None 2990 return self.expression( 2991 exp.SerdeProperties, 2992 **{ # type: ignore 2993 "expressions": self._parse_wrapped_properties(), 2994 "with": with_, 2995 }, 2996 ) 2997 2998 def _parse_row_format( 2999 self, match_row: bool = False 3000 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3001 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3002 return None 3003 3004 if self._match_text_seq("SERDE"): 3005 this = self._parse_string() 3006 3007 serde_properties = self._parse_serde_properties() 3008 3009 return self.expression( 3010 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3011 ) 3012 3013 self._match_text_seq("DELIMITED") 3014 3015 kwargs = {} 3016 3017 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3018 kwargs["fields"] = self._parse_string() 3019 if self._match_text_seq("ESCAPED", "BY"): 3020 kwargs["escaped"] = self._parse_string() 3021 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3022 kwargs["collection_items"] = self._parse_string() 3023 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3024 kwargs["map_keys"] = self._parse_string() 3025 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3026 kwargs["lines"] = self._parse_string() 3027 if self._match_text_seq("NULL", "DEFINED", "AS"): 3028 kwargs["null"] = self._parse_string() 3029 3030 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3031 3032 def _parse_load(self) -> exp.LoadData | exp.Command: 3033 if self._match_text_seq("DATA"): 3034 local = self._match_text_seq("LOCAL") 3035 self._match_text_seq("INPATH") 3036 inpath = self._parse_string() 3037 overwrite = self._match(TokenType.OVERWRITE) 3038 self._match_pair(TokenType.INTO, TokenType.TABLE) 3039 3040 return self.expression( 3041 exp.LoadData, 3042 this=self._parse_table(schema=True), 3043 local=local, 3044 overwrite=overwrite, 3045 inpath=inpath, 3046 partition=self._parse_partition(), 3047 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3048 serde=self._match_text_seq("SERDE") and self._parse_string(), 3049 ) 3050 return self._parse_as_command(self._prev) 3051 3052 def _parse_delete(self) -> exp.Delete: 3053 # This handles MySQL's "Multiple-Table Syntax" 3054 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3055 tables = None 3056 if not self._match(TokenType.FROM, advance=False): 3057 tables = self._parse_csv(self._parse_table) or None 3058 3059 returning = self._parse_returning() 3060 3061 return self.expression( 3062 exp.Delete, 3063 tables=tables, 3064 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3065 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3066 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3067 where=self._parse_where(), 3068 returning=returning or self._parse_returning(), 3069 limit=self._parse_limit(), 3070 ) 3071 3072 def _parse_update(self) -> exp.Update: 3073 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3074 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3075 returning = self._parse_returning() 3076 return self.expression( 3077 exp.Update, 3078 **{ # type: ignore 3079 "this": this, 3080 "expressions": expressions, 3081 "from": self._parse_from(joins=True), 3082 "where": self._parse_where(), 3083 "returning": returning or self._parse_returning(), 3084 "order": self._parse_order(), 3085 "limit": self._parse_limit(), 3086 }, 3087 ) 3088 3089 def _parse_use(self) -> exp.Use: 3090 return self.expression( 3091 exp.Use, 3092 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3093 this=self._parse_table(schema=False), 3094 ) 3095 3096 def _parse_uncache(self) -> exp.Uncache: 3097 if not self._match(TokenType.TABLE): 3098 self.raise_error("Expecting TABLE after UNCACHE") 3099 3100 return self.expression( 3101 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3102 ) 3103 3104 def _parse_cache(self) -> exp.Cache: 3105 lazy = self._match_text_seq("LAZY") 3106 self._match(TokenType.TABLE) 3107 table = self._parse_table(schema=True) 3108 3109 options = [] 3110 if self._match_text_seq("OPTIONS"): 3111 self._match_l_paren() 3112 k = self._parse_string() 3113 self._match(TokenType.EQ) 3114 v = self._parse_string() 3115 options = [k, v] 3116 self._match_r_paren() 3117 3118 self._match(TokenType.ALIAS) 3119 return self.expression( 3120 exp.Cache, 3121 this=table, 3122 lazy=lazy, 3123 options=options, 3124 expression=self._parse_select(nested=True), 3125 ) 3126 3127 def _parse_partition(self) -> t.Optional[exp.Partition]: 3128 if not self._match_texts(self.PARTITION_KEYWORDS): 3129 return None 3130 3131 return self.expression( 3132 exp.Partition, 3133 subpartition=self._prev.text.upper() == "SUBPARTITION", 3134 expressions=self._parse_wrapped_csv(self._parse_assignment), 3135 ) 3136 3137 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3138 def _parse_value_expression() -> t.Optional[exp.Expression]: 3139 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3140 return exp.var(self._prev.text.upper()) 3141 return self._parse_expression() 3142 3143 if self._match(TokenType.L_PAREN): 3144 expressions = self._parse_csv(_parse_value_expression) 3145 self._match_r_paren() 3146 return self.expression(exp.Tuple, expressions=expressions) 3147 3148 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3149 expression = self._parse_expression() 3150 if expression: 3151 return self.expression(exp.Tuple, expressions=[expression]) 3152 return None 3153 3154 def _parse_projections(self) -> t.List[exp.Expression]: 3155 return self._parse_expressions() 3156 3157 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3158 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3159 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3160 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3161 ) 3162 elif self._match(TokenType.FROM): 3163 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3164 # Support parentheses for duckdb FROM-first syntax 3165 select = self._parse_select() 3166 if select: 3167 select.set("from", from_) 3168 this = select 3169 else: 3170 this = exp.select("*").from_(t.cast(exp.From, from_)) 3171 else: 3172 this = ( 3173 self._parse_table(consume_pipe=True) 3174 if table 3175 else self._parse_select(nested=True, parse_set_operation=False) 3176 ) 3177 3178 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3179 # in case a modifier (e.g. join) is following 3180 if table and isinstance(this, exp.Values) and this.alias: 3181 alias = this.args["alias"].pop() 3182 this = exp.Table(this=this, alias=alias) 3183 3184 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3185 3186 return this 3187 3188 def _parse_select( 3189 self, 3190 nested: bool = False, 3191 table: bool = False, 3192 parse_subquery_alias: bool = True, 3193 parse_set_operation: bool = True, 3194 consume_pipe: bool = True, 3195 ) -> t.Optional[exp.Expression]: 3196 query = self._parse_select_query( 3197 nested=nested, 3198 table=table, 3199 parse_subquery_alias=parse_subquery_alias, 3200 parse_set_operation=parse_set_operation, 3201 ) 3202 3203 if ( 3204 consume_pipe 3205 and self._match(TokenType.PIPE_GT, advance=False) 3206 and isinstance(query, exp.Query) 3207 ): 3208 query = self._parse_pipe_syntax_query(query) 3209 query = query.subquery(copy=False) if query and table else query 3210 3211 return query 3212 3213 def _parse_select_query( 3214 self, 3215 nested: bool = False, 3216 table: bool = False, 3217 parse_subquery_alias: bool = True, 3218 parse_set_operation: bool = True, 3219 ) -> t.Optional[exp.Expression]: 3220 cte = self._parse_with() 3221 3222 if cte: 3223 this = self._parse_statement() 3224 3225 if not this: 3226 self.raise_error("Failed to parse any statement following CTE") 3227 return cte 3228 3229 if "with" in this.arg_types: 3230 this.set("with", cte) 3231 else: 3232 self.raise_error(f"{this.key} does not support CTE") 3233 this = cte 3234 3235 return this 3236 3237 # duckdb supports leading with FROM x 3238 from_ = ( 3239 self._parse_from(consume_pipe=True) 3240 if self._match(TokenType.FROM, advance=False) 3241 else None 3242 ) 3243 3244 if self._match(TokenType.SELECT): 3245 comments = self._prev_comments 3246 3247 hint = self._parse_hint() 3248 3249 if self._next and not self._next.token_type == TokenType.DOT: 3250 all_ = self._match(TokenType.ALL) 3251 distinct = self._match_set(self.DISTINCT_TOKENS) 3252 else: 3253 all_, distinct = None, None 3254 3255 kind = ( 3256 self._match(TokenType.ALIAS) 3257 and self._match_texts(("STRUCT", "VALUE")) 3258 and self._prev.text.upper() 3259 ) 3260 3261 if distinct: 3262 distinct = self.expression( 3263 exp.Distinct, 3264 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3265 ) 3266 3267 if all_ and distinct: 3268 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3269 3270 operation_modifiers = [] 3271 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3272 operation_modifiers.append(exp.var(self._prev.text.upper())) 3273 3274 limit = self._parse_limit(top=True) 3275 projections = self._parse_projections() 3276 3277 this = self.expression( 3278 exp.Select, 3279 kind=kind, 3280 hint=hint, 3281 distinct=distinct, 3282 expressions=projections, 3283 limit=limit, 3284 operation_modifiers=operation_modifiers or None, 3285 ) 3286 this.comments = comments 3287 3288 into = self._parse_into() 3289 if into: 3290 this.set("into", into) 3291 3292 if not from_: 3293 from_ = self._parse_from() 3294 3295 if from_: 3296 this.set("from", from_) 3297 3298 this = self._parse_query_modifiers(this) 3299 elif (table or nested) and self._match(TokenType.L_PAREN): 3300 this = self._parse_wrapped_select(table=table) 3301 3302 # We return early here so that the UNION isn't attached to the subquery by the 3303 # following call to _parse_set_operations, but instead becomes the parent node 3304 self._match_r_paren() 3305 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3306 elif self._match(TokenType.VALUES, advance=False): 3307 this = self._parse_derived_table_values() 3308 elif from_: 3309 this = exp.select("*").from_(from_.this, copy=False) 3310 elif self._match(TokenType.SUMMARIZE): 3311 table = self._match(TokenType.TABLE) 3312 this = self._parse_select() or self._parse_string() or self._parse_table() 3313 return self.expression(exp.Summarize, this=this, table=table) 3314 elif self._match(TokenType.DESCRIBE): 3315 this = self._parse_describe() 3316 elif self._match_text_seq("STREAM"): 3317 this = self._parse_function() 3318 if this: 3319 this = self.expression(exp.Stream, this=this) 3320 else: 3321 self._retreat(self._index - 1) 3322 else: 3323 this = None 3324 3325 return self._parse_set_operations(this) if parse_set_operation else this 3326 3327 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3328 self._match_text_seq("SEARCH") 3329 3330 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3331 3332 if not kind: 3333 return None 3334 3335 self._match_text_seq("FIRST", "BY") 3336 3337 return self.expression( 3338 exp.RecursiveWithSearch, 3339 kind=kind, 3340 this=self._parse_id_var(), 3341 expression=self._match_text_seq("SET") and self._parse_id_var(), 3342 using=self._match_text_seq("USING") and self._parse_id_var(), 3343 ) 3344 3345 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3346 if not skip_with_token and not self._match(TokenType.WITH): 3347 return None 3348 3349 comments = self._prev_comments 3350 recursive = self._match(TokenType.RECURSIVE) 3351 3352 last_comments = None 3353 expressions = [] 3354 while True: 3355 cte = self._parse_cte() 3356 if isinstance(cte, exp.CTE): 3357 expressions.append(cte) 3358 if last_comments: 3359 cte.add_comments(last_comments) 3360 3361 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3362 break 3363 else: 3364 self._match(TokenType.WITH) 3365 3366 last_comments = self._prev_comments 3367 3368 return self.expression( 3369 exp.With, 3370 comments=comments, 3371 expressions=expressions, 3372 recursive=recursive, 3373 search=self._parse_recursive_with_search(), 3374 ) 3375 3376 def _parse_cte(self) -> t.Optional[exp.CTE]: 3377 index = self._index 3378 3379 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3380 if not alias or not alias.this: 3381 self.raise_error("Expected CTE to have alias") 3382 3383 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3384 self._retreat(index) 3385 return None 3386 3387 comments = self._prev_comments 3388 3389 if self._match_text_seq("NOT", "MATERIALIZED"): 3390 materialized = False 3391 elif self._match_text_seq("MATERIALIZED"): 3392 materialized = True 3393 else: 3394 materialized = None 3395 3396 cte = self.expression( 3397 exp.CTE, 3398 this=self._parse_wrapped(self._parse_statement), 3399 alias=alias, 3400 materialized=materialized, 3401 comments=comments, 3402 ) 3403 3404 values = cte.this 3405 if isinstance(values, exp.Values): 3406 if values.alias: 3407 cte.set("this", exp.select("*").from_(values)) 3408 else: 3409 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3410 3411 return cte 3412 3413 def _parse_table_alias( 3414 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3415 ) -> t.Optional[exp.TableAlias]: 3416 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3417 # so this section tries to parse the clause version and if it fails, it treats the token 3418 # as an identifier (alias) 3419 if self._can_parse_limit_or_offset(): 3420 return None 3421 3422 any_token = self._match(TokenType.ALIAS) 3423 alias = ( 3424 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3425 or self._parse_string_as_identifier() 3426 ) 3427 3428 index = self._index 3429 if self._match(TokenType.L_PAREN): 3430 columns = self._parse_csv(self._parse_function_parameter) 3431 self._match_r_paren() if columns else self._retreat(index) 3432 else: 3433 columns = None 3434 3435 if not alias and not columns: 3436 return None 3437 3438 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3439 3440 # We bubble up comments from the Identifier to the TableAlias 3441 if isinstance(alias, exp.Identifier): 3442 table_alias.add_comments(alias.pop_comments()) 3443 3444 return table_alias 3445 3446 def _parse_subquery( 3447 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3448 ) -> t.Optional[exp.Subquery]: 3449 if not this: 3450 return None 3451 3452 return self.expression( 3453 exp.Subquery, 3454 this=this, 3455 pivots=self._parse_pivots(), 3456 alias=self._parse_table_alias() if parse_alias else None, 3457 sample=self._parse_table_sample(), 3458 ) 3459 3460 def _implicit_unnests_to_explicit(self, this: E) -> E: 3461 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3462 3463 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3464 for i, join in enumerate(this.args.get("joins") or []): 3465 table = join.this 3466 normalized_table = table.copy() 3467 normalized_table.meta["maybe_column"] = True 3468 normalized_table = _norm(normalized_table, dialect=self.dialect) 3469 3470 if isinstance(table, exp.Table) and not join.args.get("on"): 3471 if normalized_table.parts[0].name in refs: 3472 table_as_column = table.to_column() 3473 unnest = exp.Unnest(expressions=[table_as_column]) 3474 3475 # Table.to_column creates a parent Alias node that we want to convert to 3476 # a TableAlias and attach to the Unnest, so it matches the parser's output 3477 if isinstance(table.args.get("alias"), exp.TableAlias): 3478 table_as_column.replace(table_as_column.this) 3479 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3480 3481 table.replace(unnest) 3482 3483 refs.add(normalized_table.alias_or_name) 3484 3485 return this 3486 3487 def _parse_query_modifiers( 3488 self, this: t.Optional[exp.Expression] 3489 ) -> t.Optional[exp.Expression]: 3490 if isinstance(this, self.MODIFIABLES): 3491 for join in self._parse_joins(): 3492 this.append("joins", join) 3493 for lateral in iter(self._parse_lateral, None): 3494 this.append("laterals", lateral) 3495 3496 while True: 3497 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3498 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3499 key, expression = parser(self) 3500 3501 if expression: 3502 this.set(key, expression) 3503 if key == "limit": 3504 offset = expression.args.pop("offset", None) 3505 3506 if offset: 3507 offset = exp.Offset(expression=offset) 3508 this.set("offset", offset) 3509 3510 limit_by_expressions = expression.expressions 3511 expression.set("expressions", None) 3512 offset.set("expressions", limit_by_expressions) 3513 continue 3514 break 3515 3516 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3517 this = self._implicit_unnests_to_explicit(this) 3518 3519 return this 3520 3521 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3522 start = self._curr 3523 while self._curr: 3524 self._advance() 3525 3526 end = self._tokens[self._index - 1] 3527 return exp.Hint(expressions=[self._find_sql(start, end)]) 3528 3529 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3530 return self._parse_function_call() 3531 3532 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3533 start_index = self._index 3534 should_fallback_to_string = False 3535 3536 hints = [] 3537 try: 3538 for hint in iter( 3539 lambda: self._parse_csv( 3540 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3541 ), 3542 [], 3543 ): 3544 hints.extend(hint) 3545 except ParseError: 3546 should_fallback_to_string = True 3547 3548 if should_fallback_to_string or self._curr: 3549 self._retreat(start_index) 3550 return self._parse_hint_fallback_to_string() 3551 3552 return self.expression(exp.Hint, expressions=hints) 3553 3554 def _parse_hint(self) -> t.Optional[exp.Hint]: 3555 if self._match(TokenType.HINT) and self._prev_comments: 3556 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3557 3558 return None 3559 3560 def _parse_into(self) -> t.Optional[exp.Into]: 3561 if not self._match(TokenType.INTO): 3562 return None 3563 3564 temp = self._match(TokenType.TEMPORARY) 3565 unlogged = self._match_text_seq("UNLOGGED") 3566 self._match(TokenType.TABLE) 3567 3568 return self.expression( 3569 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3570 ) 3571 3572 def _parse_from( 3573 self, 3574 joins: bool = False, 3575 skip_from_token: bool = False, 3576 consume_pipe: bool = False, 3577 ) -> t.Optional[exp.From]: 3578 if not skip_from_token and not self._match(TokenType.FROM): 3579 return None 3580 3581 return self.expression( 3582 exp.From, 3583 comments=self._prev_comments, 3584 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3585 ) 3586 3587 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3588 return self.expression( 3589 exp.MatchRecognizeMeasure, 3590 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3591 this=self._parse_expression(), 3592 ) 3593 3594 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3595 if not self._match(TokenType.MATCH_RECOGNIZE): 3596 return None 3597 3598 self._match_l_paren() 3599 3600 partition = self._parse_partition_by() 3601 order = self._parse_order() 3602 3603 measures = ( 3604 self._parse_csv(self._parse_match_recognize_measure) 3605 if self._match_text_seq("MEASURES") 3606 else None 3607 ) 3608 3609 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3610 rows = exp.var("ONE ROW PER MATCH") 3611 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3612 text = "ALL ROWS PER MATCH" 3613 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3614 text += " SHOW EMPTY MATCHES" 3615 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3616 text += " OMIT EMPTY MATCHES" 3617 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3618 text += " WITH UNMATCHED ROWS" 3619 rows = exp.var(text) 3620 else: 3621 rows = None 3622 3623 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3624 text = "AFTER MATCH SKIP" 3625 if self._match_text_seq("PAST", "LAST", "ROW"): 3626 text += " PAST LAST ROW" 3627 elif self._match_text_seq("TO", "NEXT", "ROW"): 3628 text += " TO NEXT ROW" 3629 elif self._match_text_seq("TO", "FIRST"): 3630 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3631 elif self._match_text_seq("TO", "LAST"): 3632 text += f" TO LAST {self._advance_any().text}" # type: ignore 3633 after = exp.var(text) 3634 else: 3635 after = None 3636 3637 if self._match_text_seq("PATTERN"): 3638 self._match_l_paren() 3639 3640 if not self._curr: 3641 self.raise_error("Expecting )", self._curr) 3642 3643 paren = 1 3644 start = self._curr 3645 3646 while self._curr and paren > 0: 3647 if self._curr.token_type == TokenType.L_PAREN: 3648 paren += 1 3649 if self._curr.token_type == TokenType.R_PAREN: 3650 paren -= 1 3651 3652 end = self._prev 3653 self._advance() 3654 3655 if paren > 0: 3656 self.raise_error("Expecting )", self._curr) 3657 3658 pattern = exp.var(self._find_sql(start, end)) 3659 else: 3660 pattern = None 3661 3662 define = ( 3663 self._parse_csv(self._parse_name_as_expression) 3664 if self._match_text_seq("DEFINE") 3665 else None 3666 ) 3667 3668 self._match_r_paren() 3669 3670 return self.expression( 3671 exp.MatchRecognize, 3672 partition_by=partition, 3673 order=order, 3674 measures=measures, 3675 rows=rows, 3676 after=after, 3677 pattern=pattern, 3678 define=define, 3679 alias=self._parse_table_alias(), 3680 ) 3681 3682 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3683 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3684 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3685 cross_apply = False 3686 3687 if cross_apply is not None: 3688 this = self._parse_select(table=True) 3689 view = None 3690 outer = None 3691 elif self._match(TokenType.LATERAL): 3692 this = self._parse_select(table=True) 3693 view = self._match(TokenType.VIEW) 3694 outer = self._match(TokenType.OUTER) 3695 else: 3696 return None 3697 3698 if not this: 3699 this = ( 3700 self._parse_unnest() 3701 or self._parse_function() 3702 or self._parse_id_var(any_token=False) 3703 ) 3704 3705 while self._match(TokenType.DOT): 3706 this = exp.Dot( 3707 this=this, 3708 expression=self._parse_function() or self._parse_id_var(any_token=False), 3709 ) 3710 3711 ordinality: t.Optional[bool] = None 3712 3713 if view: 3714 table = self._parse_id_var(any_token=False) 3715 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3716 table_alias: t.Optional[exp.TableAlias] = self.expression( 3717 exp.TableAlias, this=table, columns=columns 3718 ) 3719 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3720 # We move the alias from the lateral's child node to the lateral itself 3721 table_alias = this.args["alias"].pop() 3722 else: 3723 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3724 table_alias = self._parse_table_alias() 3725 3726 return self.expression( 3727 exp.Lateral, 3728 this=this, 3729 view=view, 3730 outer=outer, 3731 alias=table_alias, 3732 cross_apply=cross_apply, 3733 ordinality=ordinality, 3734 ) 3735 3736 def _parse_join_parts( 3737 self, 3738 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3739 return ( 3740 self._match_set(self.JOIN_METHODS) and self._prev, 3741 self._match_set(self.JOIN_SIDES) and self._prev, 3742 self._match_set(self.JOIN_KINDS) and self._prev, 3743 ) 3744 3745 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3746 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3747 this = self._parse_column() 3748 if isinstance(this, exp.Column): 3749 return this.this 3750 return this 3751 3752 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3753 3754 def _parse_join( 3755 self, skip_join_token: bool = False, parse_bracket: bool = False 3756 ) -> t.Optional[exp.Join]: 3757 if self._match(TokenType.COMMA): 3758 table = self._try_parse(self._parse_table) 3759 cross_join = self.expression(exp.Join, this=table) if table else None 3760 3761 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3762 cross_join.set("kind", "CROSS") 3763 3764 return cross_join 3765 3766 index = self._index 3767 method, side, kind = self._parse_join_parts() 3768 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3769 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3770 join_comments = self._prev_comments 3771 3772 if not skip_join_token and not join: 3773 self._retreat(index) 3774 kind = None 3775 method = None 3776 side = None 3777 3778 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3779 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3780 3781 if not skip_join_token and not join and not outer_apply and not cross_apply: 3782 return None 3783 3784 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3785 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3786 kwargs["expressions"] = self._parse_csv( 3787 lambda: self._parse_table(parse_bracket=parse_bracket) 3788 ) 3789 3790 if method: 3791 kwargs["method"] = method.text 3792 if side: 3793 kwargs["side"] = side.text 3794 if kind: 3795 kwargs["kind"] = kind.text 3796 if hint: 3797 kwargs["hint"] = hint 3798 3799 if self._match(TokenType.MATCH_CONDITION): 3800 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3801 3802 if self._match(TokenType.ON): 3803 kwargs["on"] = self._parse_assignment() 3804 elif self._match(TokenType.USING): 3805 kwargs["using"] = self._parse_using_identifiers() 3806 elif ( 3807 not (outer_apply or cross_apply) 3808 and not isinstance(kwargs["this"], exp.Unnest) 3809 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3810 ): 3811 index = self._index 3812 joins: t.Optional[list] = list(self._parse_joins()) 3813 3814 if joins and self._match(TokenType.ON): 3815 kwargs["on"] = self._parse_assignment() 3816 elif joins and self._match(TokenType.USING): 3817 kwargs["using"] = self._parse_using_identifiers() 3818 else: 3819 joins = None 3820 self._retreat(index) 3821 3822 kwargs["this"].set("joins", joins if joins else None) 3823 3824 kwargs["pivots"] = self._parse_pivots() 3825 3826 comments = [c for token in (method, side, kind) if token for c in token.comments] 3827 comments = (join_comments or []) + comments 3828 return self.expression(exp.Join, comments=comments, **kwargs) 3829 3830 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3831 this = self._parse_assignment() 3832 3833 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3834 return this 3835 3836 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3837 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3838 3839 return this 3840 3841 def _parse_index_params(self) -> exp.IndexParameters: 3842 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3843 3844 if self._match(TokenType.L_PAREN, advance=False): 3845 columns = self._parse_wrapped_csv(self._parse_with_operator) 3846 else: 3847 columns = None 3848 3849 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3850 partition_by = self._parse_partition_by() 3851 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3852 tablespace = ( 3853 self._parse_var(any_token=True) 3854 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3855 else None 3856 ) 3857 where = self._parse_where() 3858 3859 on = self._parse_field() if self._match(TokenType.ON) else None 3860 3861 return self.expression( 3862 exp.IndexParameters, 3863 using=using, 3864 columns=columns, 3865 include=include, 3866 partition_by=partition_by, 3867 where=where, 3868 with_storage=with_storage, 3869 tablespace=tablespace, 3870 on=on, 3871 ) 3872 3873 def _parse_index( 3874 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3875 ) -> t.Optional[exp.Index]: 3876 if index or anonymous: 3877 unique = None 3878 primary = None 3879 amp = None 3880 3881 self._match(TokenType.ON) 3882 self._match(TokenType.TABLE) # hive 3883 table = self._parse_table_parts(schema=True) 3884 else: 3885 unique = self._match(TokenType.UNIQUE) 3886 primary = self._match_text_seq("PRIMARY") 3887 amp = self._match_text_seq("AMP") 3888 3889 if not self._match(TokenType.INDEX): 3890 return None 3891 3892 index = self._parse_id_var() 3893 table = None 3894 3895 params = self._parse_index_params() 3896 3897 return self.expression( 3898 exp.Index, 3899 this=index, 3900 table=table, 3901 unique=unique, 3902 primary=primary, 3903 amp=amp, 3904 params=params, 3905 ) 3906 3907 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3908 hints: t.List[exp.Expression] = [] 3909 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3910 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3911 hints.append( 3912 self.expression( 3913 exp.WithTableHint, 3914 expressions=self._parse_csv( 3915 lambda: self._parse_function() or self._parse_var(any_token=True) 3916 ), 3917 ) 3918 ) 3919 self._match_r_paren() 3920 else: 3921 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3922 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3923 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3924 3925 self._match_set((TokenType.INDEX, TokenType.KEY)) 3926 if self._match(TokenType.FOR): 3927 hint.set("target", self._advance_any() and self._prev.text.upper()) 3928 3929 hint.set("expressions", self._parse_wrapped_id_vars()) 3930 hints.append(hint) 3931 3932 return hints or None 3933 3934 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3935 return ( 3936 (not schema and self._parse_function(optional_parens=False)) 3937 or self._parse_id_var(any_token=False) 3938 or self._parse_string_as_identifier() 3939 or self._parse_placeholder() 3940 ) 3941 3942 def _parse_table_parts( 3943 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3944 ) -> exp.Table: 3945 catalog = None 3946 db = None 3947 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3948 3949 while self._match(TokenType.DOT): 3950 if catalog: 3951 # This allows nesting the table in arbitrarily many dot expressions if needed 3952 table = self.expression( 3953 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3954 ) 3955 else: 3956 catalog = db 3957 db = table 3958 # "" used for tsql FROM a..b case 3959 table = self._parse_table_part(schema=schema) or "" 3960 3961 if ( 3962 wildcard 3963 and self._is_connected() 3964 and (isinstance(table, exp.Identifier) or not table) 3965 and self._match(TokenType.STAR) 3966 ): 3967 if isinstance(table, exp.Identifier): 3968 table.args["this"] += "*" 3969 else: 3970 table = exp.Identifier(this="*") 3971 3972 # We bubble up comments from the Identifier to the Table 3973 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3974 3975 if is_db_reference: 3976 catalog = db 3977 db = table 3978 table = None 3979 3980 if not table and not is_db_reference: 3981 self.raise_error(f"Expected table name but got {self._curr}") 3982 if not db and is_db_reference: 3983 self.raise_error(f"Expected database name but got {self._curr}") 3984 3985 table = self.expression( 3986 exp.Table, 3987 comments=comments, 3988 this=table, 3989 db=db, 3990 catalog=catalog, 3991 ) 3992 3993 changes = self._parse_changes() 3994 if changes: 3995 table.set("changes", changes) 3996 3997 at_before = self._parse_historical_data() 3998 if at_before: 3999 table.set("when", at_before) 4000 4001 pivots = self._parse_pivots() 4002 if pivots: 4003 table.set("pivots", pivots) 4004 4005 return table 4006 4007 def _parse_table( 4008 self, 4009 schema: bool = False, 4010 joins: bool = False, 4011 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4012 parse_bracket: bool = False, 4013 is_db_reference: bool = False, 4014 parse_partition: bool = False, 4015 consume_pipe: bool = False, 4016 ) -> t.Optional[exp.Expression]: 4017 lateral = self._parse_lateral() 4018 if lateral: 4019 return lateral 4020 4021 unnest = self._parse_unnest() 4022 if unnest: 4023 return unnest 4024 4025 values = self._parse_derived_table_values() 4026 if values: 4027 return values 4028 4029 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4030 if subquery: 4031 if not subquery.args.get("pivots"): 4032 subquery.set("pivots", self._parse_pivots()) 4033 return subquery 4034 4035 bracket = parse_bracket and self._parse_bracket(None) 4036 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4037 4038 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4039 self._parse_table 4040 ) 4041 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4042 4043 only = self._match(TokenType.ONLY) 4044 4045 this = t.cast( 4046 exp.Expression, 4047 bracket 4048 or rows_from 4049 or self._parse_bracket( 4050 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4051 ), 4052 ) 4053 4054 if only: 4055 this.set("only", only) 4056 4057 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4058 self._match_text_seq("*") 4059 4060 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4061 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4062 this.set("partition", self._parse_partition()) 4063 4064 if schema: 4065 return self._parse_schema(this=this) 4066 4067 version = self._parse_version() 4068 4069 if version: 4070 this.set("version", version) 4071 4072 if self.dialect.ALIAS_POST_TABLESAMPLE: 4073 this.set("sample", self._parse_table_sample()) 4074 4075 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4076 if alias: 4077 this.set("alias", alias) 4078 4079 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4080 return self.expression( 4081 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4082 ) 4083 4084 this.set("hints", self._parse_table_hints()) 4085 4086 if not this.args.get("pivots"): 4087 this.set("pivots", self._parse_pivots()) 4088 4089 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4090 this.set("sample", self._parse_table_sample()) 4091 4092 if joins: 4093 for join in self._parse_joins(): 4094 this.append("joins", join) 4095 4096 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4097 this.set("ordinality", True) 4098 this.set("alias", self._parse_table_alias()) 4099 4100 return this 4101 4102 def _parse_version(self) -> t.Optional[exp.Version]: 4103 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4104 this = "TIMESTAMP" 4105 elif self._match(TokenType.VERSION_SNAPSHOT): 4106 this = "VERSION" 4107 else: 4108 return None 4109 4110 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4111 kind = self._prev.text.upper() 4112 start = self._parse_bitwise() 4113 self._match_texts(("TO", "AND")) 4114 end = self._parse_bitwise() 4115 expression: t.Optional[exp.Expression] = self.expression( 4116 exp.Tuple, expressions=[start, end] 4117 ) 4118 elif self._match_text_seq("CONTAINED", "IN"): 4119 kind = "CONTAINED IN" 4120 expression = self.expression( 4121 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4122 ) 4123 elif self._match(TokenType.ALL): 4124 kind = "ALL" 4125 expression = None 4126 else: 4127 self._match_text_seq("AS", "OF") 4128 kind = "AS OF" 4129 expression = self._parse_type() 4130 4131 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4132 4133 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4134 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4135 index = self._index 4136 historical_data = None 4137 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4138 this = self._prev.text.upper() 4139 kind = ( 4140 self._match(TokenType.L_PAREN) 4141 and self._match_texts(self.HISTORICAL_DATA_KIND) 4142 and self._prev.text.upper() 4143 ) 4144 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4145 4146 if expression: 4147 self._match_r_paren() 4148 historical_data = self.expression( 4149 exp.HistoricalData, this=this, kind=kind, expression=expression 4150 ) 4151 else: 4152 self._retreat(index) 4153 4154 return historical_data 4155 4156 def _parse_changes(self) -> t.Optional[exp.Changes]: 4157 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4158 return None 4159 4160 information = self._parse_var(any_token=True) 4161 self._match_r_paren() 4162 4163 return self.expression( 4164 exp.Changes, 4165 information=information, 4166 at_before=self._parse_historical_data(), 4167 end=self._parse_historical_data(), 4168 ) 4169 4170 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4171 if not self._match(TokenType.UNNEST): 4172 return None 4173 4174 expressions = self._parse_wrapped_csv(self._parse_equality) 4175 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4176 4177 alias = self._parse_table_alias() if with_alias else None 4178 4179 if alias: 4180 if self.dialect.UNNEST_COLUMN_ONLY: 4181 if alias.args.get("columns"): 4182 self.raise_error("Unexpected extra column alias in unnest.") 4183 4184 alias.set("columns", [alias.this]) 4185 alias.set("this", None) 4186 4187 columns = alias.args.get("columns") or [] 4188 if offset and len(expressions) < len(columns): 4189 offset = columns.pop() 4190 4191 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4192 self._match(TokenType.ALIAS) 4193 offset = self._parse_id_var( 4194 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4195 ) or exp.to_identifier("offset") 4196 4197 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4198 4199 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4200 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4201 if not is_derived and not ( 4202 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4203 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4204 ): 4205 return None 4206 4207 expressions = self._parse_csv(self._parse_value) 4208 alias = self._parse_table_alias() 4209 4210 if is_derived: 4211 self._match_r_paren() 4212 4213 return self.expression( 4214 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4215 ) 4216 4217 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4218 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4219 as_modifier and self._match_text_seq("USING", "SAMPLE") 4220 ): 4221 return None 4222 4223 bucket_numerator = None 4224 bucket_denominator = None 4225 bucket_field = None 4226 percent = None 4227 size = None 4228 seed = None 4229 4230 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4231 matched_l_paren = self._match(TokenType.L_PAREN) 4232 4233 if self.TABLESAMPLE_CSV: 4234 num = None 4235 expressions = self._parse_csv(self._parse_primary) 4236 else: 4237 expressions = None 4238 num = ( 4239 self._parse_factor() 4240 if self._match(TokenType.NUMBER, advance=False) 4241 else self._parse_primary() or self._parse_placeholder() 4242 ) 4243 4244 if self._match_text_seq("BUCKET"): 4245 bucket_numerator = self._parse_number() 4246 self._match_text_seq("OUT", "OF") 4247 bucket_denominator = bucket_denominator = self._parse_number() 4248 self._match(TokenType.ON) 4249 bucket_field = self._parse_field() 4250 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4251 percent = num 4252 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4253 size = num 4254 else: 4255 percent = num 4256 4257 if matched_l_paren: 4258 self._match_r_paren() 4259 4260 if self._match(TokenType.L_PAREN): 4261 method = self._parse_var(upper=True) 4262 seed = self._match(TokenType.COMMA) and self._parse_number() 4263 self._match_r_paren() 4264 elif self._match_texts(("SEED", "REPEATABLE")): 4265 seed = self._parse_wrapped(self._parse_number) 4266 4267 if not method and self.DEFAULT_SAMPLING_METHOD: 4268 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4269 4270 return self.expression( 4271 exp.TableSample, 4272 expressions=expressions, 4273 method=method, 4274 bucket_numerator=bucket_numerator, 4275 bucket_denominator=bucket_denominator, 4276 bucket_field=bucket_field, 4277 percent=percent, 4278 size=size, 4279 seed=seed, 4280 ) 4281 4282 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4283 return list(iter(self._parse_pivot, None)) or None 4284 4285 def _parse_joins(self) -> t.Iterator[exp.Join]: 4286 return iter(self._parse_join, None) 4287 4288 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4289 if not self._match(TokenType.INTO): 4290 return None 4291 4292 return self.expression( 4293 exp.UnpivotColumns, 4294 this=self._match_text_seq("NAME") and self._parse_column(), 4295 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4296 ) 4297 4298 # https://duckdb.org/docs/sql/statements/pivot 4299 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4300 def _parse_on() -> t.Optional[exp.Expression]: 4301 this = self._parse_bitwise() 4302 4303 if self._match(TokenType.IN): 4304 # PIVOT ... ON col IN (row_val1, row_val2) 4305 return self._parse_in(this) 4306 if self._match(TokenType.ALIAS, advance=False): 4307 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4308 return self._parse_alias(this) 4309 4310 return this 4311 4312 this = self._parse_table() 4313 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4314 into = self._parse_unpivot_columns() 4315 using = self._match(TokenType.USING) and self._parse_csv( 4316 lambda: self._parse_alias(self._parse_function()) 4317 ) 4318 group = self._parse_group() 4319 4320 return self.expression( 4321 exp.Pivot, 4322 this=this, 4323 expressions=expressions, 4324 using=using, 4325 group=group, 4326 unpivot=is_unpivot, 4327 into=into, 4328 ) 4329 4330 def _parse_pivot_in(self) -> exp.In: 4331 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4332 this = self._parse_select_or_expression() 4333 4334 self._match(TokenType.ALIAS) 4335 alias = self._parse_bitwise() 4336 if alias: 4337 if isinstance(alias, exp.Column) and not alias.db: 4338 alias = alias.this 4339 return self.expression(exp.PivotAlias, this=this, alias=alias) 4340 4341 return this 4342 4343 value = self._parse_column() 4344 4345 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4346 self.raise_error("Expecting IN (") 4347 4348 if self._match(TokenType.ANY): 4349 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4350 else: 4351 exprs = self._parse_csv(_parse_aliased_expression) 4352 4353 self._match_r_paren() 4354 return self.expression(exp.In, this=value, expressions=exprs) 4355 4356 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4357 func = self._parse_function() 4358 if not func: 4359 self.raise_error("Expecting an aggregation function in PIVOT") 4360 4361 return self._parse_alias(func) 4362 4363 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4364 index = self._index 4365 include_nulls = None 4366 4367 if self._match(TokenType.PIVOT): 4368 unpivot = False 4369 elif self._match(TokenType.UNPIVOT): 4370 unpivot = True 4371 4372 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4373 if self._match_text_seq("INCLUDE", "NULLS"): 4374 include_nulls = True 4375 elif self._match_text_seq("EXCLUDE", "NULLS"): 4376 include_nulls = False 4377 else: 4378 return None 4379 4380 expressions = [] 4381 4382 if not self._match(TokenType.L_PAREN): 4383 self._retreat(index) 4384 return None 4385 4386 if unpivot: 4387 expressions = self._parse_csv(self._parse_column) 4388 else: 4389 expressions = self._parse_csv(self._parse_pivot_aggregation) 4390 4391 if not expressions: 4392 self.raise_error("Failed to parse PIVOT's aggregation list") 4393 4394 if not self._match(TokenType.FOR): 4395 self.raise_error("Expecting FOR") 4396 4397 fields = [] 4398 while True: 4399 field = self._try_parse(self._parse_pivot_in) 4400 if not field: 4401 break 4402 fields.append(field) 4403 4404 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4405 self._parse_bitwise 4406 ) 4407 4408 group = self._parse_group() 4409 4410 self._match_r_paren() 4411 4412 pivot = self.expression( 4413 exp.Pivot, 4414 expressions=expressions, 4415 fields=fields, 4416 unpivot=unpivot, 4417 include_nulls=include_nulls, 4418 default_on_null=default_on_null, 4419 group=group, 4420 ) 4421 4422 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4423 pivot.set("alias", self._parse_table_alias()) 4424 4425 if not unpivot: 4426 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4427 4428 columns: t.List[exp.Expression] = [] 4429 all_fields = [] 4430 for pivot_field in pivot.fields: 4431 pivot_field_expressions = pivot_field.expressions 4432 4433 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4434 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4435 continue 4436 4437 all_fields.append( 4438 [ 4439 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4440 for fld in pivot_field_expressions 4441 ] 4442 ) 4443 4444 if all_fields: 4445 if names: 4446 all_fields.append(names) 4447 4448 # Generate all possible combinations of the pivot columns 4449 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4450 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4451 for fld_parts_tuple in itertools.product(*all_fields): 4452 fld_parts = list(fld_parts_tuple) 4453 4454 if names and self.PREFIXED_PIVOT_COLUMNS: 4455 # Move the "name" to the front of the list 4456 fld_parts.insert(0, fld_parts.pop(-1)) 4457 4458 columns.append(exp.to_identifier("_".join(fld_parts))) 4459 4460 pivot.set("columns", columns) 4461 4462 return pivot 4463 4464 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4465 return [agg.alias for agg in aggregations if agg.alias] 4466 4467 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4468 if not skip_where_token and not self._match(TokenType.PREWHERE): 4469 return None 4470 4471 return self.expression( 4472 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4473 ) 4474 4475 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4476 if not skip_where_token and not self._match(TokenType.WHERE): 4477 return None 4478 4479 return self.expression( 4480 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4481 ) 4482 4483 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4484 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4485 return None 4486 comments = self._prev_comments 4487 4488 elements: t.Dict[str, t.Any] = defaultdict(list) 4489 4490 if self._match(TokenType.ALL): 4491 elements["all"] = True 4492 elif self._match(TokenType.DISTINCT): 4493 elements["all"] = False 4494 4495 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4496 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4497 4498 while True: 4499 index = self._index 4500 4501 elements["expressions"].extend( 4502 self._parse_csv( 4503 lambda: None 4504 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4505 else self._parse_assignment() 4506 ) 4507 ) 4508 4509 before_with_index = self._index 4510 with_prefix = self._match(TokenType.WITH) 4511 4512 if self._match(TokenType.ROLLUP): 4513 elements["rollup"].append( 4514 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4515 ) 4516 elif self._match(TokenType.CUBE): 4517 elements["cube"].append( 4518 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4519 ) 4520 elif self._match(TokenType.GROUPING_SETS): 4521 elements["grouping_sets"].append( 4522 self.expression( 4523 exp.GroupingSets, 4524 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4525 ) 4526 ) 4527 elif self._match_text_seq("TOTALS"): 4528 elements["totals"] = True # type: ignore 4529 4530 if before_with_index <= self._index <= before_with_index + 1: 4531 self._retreat(before_with_index) 4532 break 4533 4534 if index == self._index: 4535 break 4536 4537 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4538 4539 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4540 return self.expression( 4541 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4542 ) 4543 4544 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4545 if self._match(TokenType.L_PAREN): 4546 grouping_set = self._parse_csv(self._parse_column) 4547 self._match_r_paren() 4548 return self.expression(exp.Tuple, expressions=grouping_set) 4549 4550 return self._parse_column() 4551 4552 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4553 if not skip_having_token and not self._match(TokenType.HAVING): 4554 return None 4555 return self.expression( 4556 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4557 ) 4558 4559 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4560 if not self._match(TokenType.QUALIFY): 4561 return None 4562 return self.expression(exp.Qualify, this=self._parse_assignment()) 4563 4564 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4565 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4566 exp.Prior, this=self._parse_bitwise() 4567 ) 4568 connect = self._parse_assignment() 4569 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4570 return connect 4571 4572 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4573 if skip_start_token: 4574 start = None 4575 elif self._match(TokenType.START_WITH): 4576 start = self._parse_assignment() 4577 else: 4578 return None 4579 4580 self._match(TokenType.CONNECT_BY) 4581 nocycle = self._match_text_seq("NOCYCLE") 4582 connect = self._parse_connect_with_prior() 4583 4584 if not start and self._match(TokenType.START_WITH): 4585 start = self._parse_assignment() 4586 4587 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4588 4589 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4590 this = self._parse_id_var(any_token=True) 4591 if self._match(TokenType.ALIAS): 4592 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4593 return this 4594 4595 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4596 if self._match_text_seq("INTERPOLATE"): 4597 return self._parse_wrapped_csv(self._parse_name_as_expression) 4598 return None 4599 4600 def _parse_order( 4601 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4602 ) -> t.Optional[exp.Expression]: 4603 siblings = None 4604 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4605 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4606 return this 4607 4608 siblings = True 4609 4610 return self.expression( 4611 exp.Order, 4612 comments=self._prev_comments, 4613 this=this, 4614 expressions=self._parse_csv(self._parse_ordered), 4615 siblings=siblings, 4616 ) 4617 4618 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4619 if not self._match(token): 4620 return None 4621 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4622 4623 def _parse_ordered( 4624 self, parse_method: t.Optional[t.Callable] = None 4625 ) -> t.Optional[exp.Ordered]: 4626 this = parse_method() if parse_method else self._parse_assignment() 4627 if not this: 4628 return None 4629 4630 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4631 this = exp.var("ALL") 4632 4633 asc = self._match(TokenType.ASC) 4634 desc = self._match(TokenType.DESC) or (asc and False) 4635 4636 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4637 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4638 4639 nulls_first = is_nulls_first or False 4640 explicitly_null_ordered = is_nulls_first or is_nulls_last 4641 4642 if ( 4643 not explicitly_null_ordered 4644 and ( 4645 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4646 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4647 ) 4648 and self.dialect.NULL_ORDERING != "nulls_are_last" 4649 ): 4650 nulls_first = True 4651 4652 if self._match_text_seq("WITH", "FILL"): 4653 with_fill = self.expression( 4654 exp.WithFill, 4655 **{ # type: ignore 4656 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4657 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4658 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4659 "interpolate": self._parse_interpolate(), 4660 }, 4661 ) 4662 else: 4663 with_fill = None 4664 4665 return self.expression( 4666 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4667 ) 4668 4669 def _parse_limit_options(self) -> exp.LimitOptions: 4670 percent = self._match(TokenType.PERCENT) 4671 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4672 self._match_text_seq("ONLY") 4673 with_ties = self._match_text_seq("WITH", "TIES") 4674 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4675 4676 def _parse_limit( 4677 self, 4678 this: t.Optional[exp.Expression] = None, 4679 top: bool = False, 4680 skip_limit_token: bool = False, 4681 ) -> t.Optional[exp.Expression]: 4682 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4683 comments = self._prev_comments 4684 if top: 4685 limit_paren = self._match(TokenType.L_PAREN) 4686 expression = self._parse_term() if limit_paren else self._parse_number() 4687 4688 if limit_paren: 4689 self._match_r_paren() 4690 4691 limit_options = self._parse_limit_options() 4692 else: 4693 limit_options = None 4694 expression = self._parse_term() 4695 4696 if self._match(TokenType.COMMA): 4697 offset = expression 4698 expression = self._parse_term() 4699 else: 4700 offset = None 4701 4702 limit_exp = self.expression( 4703 exp.Limit, 4704 this=this, 4705 expression=expression, 4706 offset=offset, 4707 comments=comments, 4708 limit_options=limit_options, 4709 expressions=self._parse_limit_by(), 4710 ) 4711 4712 return limit_exp 4713 4714 if self._match(TokenType.FETCH): 4715 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4716 direction = self._prev.text.upper() if direction else "FIRST" 4717 4718 count = self._parse_field(tokens=self.FETCH_TOKENS) 4719 4720 return self.expression( 4721 exp.Fetch, 4722 direction=direction, 4723 count=count, 4724 limit_options=self._parse_limit_options(), 4725 ) 4726 4727 return this 4728 4729 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4730 if not self._match(TokenType.OFFSET): 4731 return this 4732 4733 count = self._parse_term() 4734 self._match_set((TokenType.ROW, TokenType.ROWS)) 4735 4736 return self.expression( 4737 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4738 ) 4739 4740 def _can_parse_limit_or_offset(self) -> bool: 4741 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4742 return False 4743 4744 index = self._index 4745 result = bool( 4746 self._try_parse(self._parse_limit, retreat=True) 4747 or self._try_parse(self._parse_offset, retreat=True) 4748 ) 4749 self._retreat(index) 4750 return result 4751 4752 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4753 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4754 4755 def _parse_locks(self) -> t.List[exp.Lock]: 4756 locks = [] 4757 while True: 4758 update, key = None, None 4759 if self._match_text_seq("FOR", "UPDATE"): 4760 update = True 4761 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4762 "LOCK", "IN", "SHARE", "MODE" 4763 ): 4764 update = False 4765 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4766 update, key = False, True 4767 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4768 update, key = True, True 4769 else: 4770 break 4771 4772 expressions = None 4773 if self._match_text_seq("OF"): 4774 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4775 4776 wait: t.Optional[bool | exp.Expression] = None 4777 if self._match_text_seq("NOWAIT"): 4778 wait = True 4779 elif self._match_text_seq("WAIT"): 4780 wait = self._parse_primary() 4781 elif self._match_text_seq("SKIP", "LOCKED"): 4782 wait = False 4783 4784 locks.append( 4785 self.expression( 4786 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4787 ) 4788 ) 4789 4790 return locks 4791 4792 def parse_set_operation( 4793 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4794 ) -> t.Optional[exp.Expression]: 4795 start = self._index 4796 _, side_token, kind_token = self._parse_join_parts() 4797 4798 side = side_token.text if side_token else None 4799 kind = kind_token.text if kind_token else None 4800 4801 if not self._match_set(self.SET_OPERATIONS): 4802 self._retreat(start) 4803 return None 4804 4805 token_type = self._prev.token_type 4806 4807 if token_type == TokenType.UNION: 4808 operation: t.Type[exp.SetOperation] = exp.Union 4809 elif token_type == TokenType.EXCEPT: 4810 operation = exp.Except 4811 else: 4812 operation = exp.Intersect 4813 4814 comments = self._prev.comments 4815 4816 if self._match(TokenType.DISTINCT): 4817 distinct: t.Optional[bool] = True 4818 elif self._match(TokenType.ALL): 4819 distinct = False 4820 else: 4821 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4822 if distinct is None: 4823 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4824 4825 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4826 "STRICT", "CORRESPONDING" 4827 ) 4828 if self._match_text_seq("CORRESPONDING"): 4829 by_name = True 4830 if not side and not kind: 4831 kind = "INNER" 4832 4833 on_column_list = None 4834 if by_name and self._match_texts(("ON", "BY")): 4835 on_column_list = self._parse_wrapped_csv(self._parse_column) 4836 4837 expression = self._parse_select( 4838 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4839 ) 4840 4841 return self.expression( 4842 operation, 4843 comments=comments, 4844 this=this, 4845 distinct=distinct, 4846 by_name=by_name, 4847 expression=expression, 4848 side=side, 4849 kind=kind, 4850 on=on_column_list, 4851 ) 4852 4853 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4854 while this: 4855 setop = self.parse_set_operation(this) 4856 if not setop: 4857 break 4858 this = setop 4859 4860 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4861 expression = this.expression 4862 4863 if expression: 4864 for arg in self.SET_OP_MODIFIERS: 4865 expr = expression.args.get(arg) 4866 if expr: 4867 this.set(arg, expr.pop()) 4868 4869 return this 4870 4871 def _parse_expression(self) -> t.Optional[exp.Expression]: 4872 return self._parse_alias(self._parse_assignment()) 4873 4874 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4875 this = self._parse_disjunction() 4876 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4877 # This allows us to parse <non-identifier token> := <expr> 4878 this = exp.column( 4879 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4880 ) 4881 4882 while self._match_set(self.ASSIGNMENT): 4883 if isinstance(this, exp.Column) and len(this.parts) == 1: 4884 this = this.this 4885 4886 this = self.expression( 4887 self.ASSIGNMENT[self._prev.token_type], 4888 this=this, 4889 comments=self._prev_comments, 4890 expression=self._parse_assignment(), 4891 ) 4892 4893 return this 4894 4895 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4896 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4897 4898 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4899 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4900 4901 def _parse_equality(self) -> t.Optional[exp.Expression]: 4902 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4903 4904 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4905 return self._parse_tokens(self._parse_range, self.COMPARISON) 4906 4907 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4908 this = this or self._parse_bitwise() 4909 negate = self._match(TokenType.NOT) 4910 4911 if self._match_set(self.RANGE_PARSERS): 4912 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4913 if not expression: 4914 return this 4915 4916 this = expression 4917 elif self._match(TokenType.ISNULL): 4918 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4919 4920 # Postgres supports ISNULL and NOTNULL for conditions. 4921 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4922 if self._match(TokenType.NOTNULL): 4923 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4924 this = self.expression(exp.Not, this=this) 4925 4926 if negate: 4927 this = self._negate_range(this) 4928 4929 if self._match(TokenType.IS): 4930 this = self._parse_is(this) 4931 4932 return this 4933 4934 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4935 if not this: 4936 return this 4937 4938 return self.expression(exp.Not, this=this) 4939 4940 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4941 index = self._index - 1 4942 negate = self._match(TokenType.NOT) 4943 4944 if self._match_text_seq("DISTINCT", "FROM"): 4945 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4946 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4947 4948 if self._match(TokenType.JSON): 4949 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4950 4951 if self._match_text_seq("WITH"): 4952 _with = True 4953 elif self._match_text_seq("WITHOUT"): 4954 _with = False 4955 else: 4956 _with = None 4957 4958 unique = self._match(TokenType.UNIQUE) 4959 self._match_text_seq("KEYS") 4960 expression: t.Optional[exp.Expression] = self.expression( 4961 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4962 ) 4963 else: 4964 expression = self._parse_primary() or self._parse_null() 4965 if not expression: 4966 self._retreat(index) 4967 return None 4968 4969 this = self.expression(exp.Is, this=this, expression=expression) 4970 return self.expression(exp.Not, this=this) if negate else this 4971 4972 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4973 unnest = self._parse_unnest(with_alias=False) 4974 if unnest: 4975 this = self.expression(exp.In, this=this, unnest=unnest) 4976 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4977 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4978 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4979 4980 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4981 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4982 else: 4983 this = self.expression(exp.In, this=this, expressions=expressions) 4984 4985 if matched_l_paren: 4986 self._match_r_paren(this) 4987 elif not self._match(TokenType.R_BRACKET, expression=this): 4988 self.raise_error("Expecting ]") 4989 else: 4990 this = self.expression(exp.In, this=this, field=self._parse_column()) 4991 4992 return this 4993 4994 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4995 low = self._parse_bitwise() 4996 self._match(TokenType.AND) 4997 high = self._parse_bitwise() 4998 return self.expression(exp.Between, this=this, low=low, high=high) 4999 5000 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5001 if not self._match(TokenType.ESCAPE): 5002 return this 5003 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5004 5005 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5006 index = self._index 5007 5008 if not self._match(TokenType.INTERVAL) and match_interval: 5009 return None 5010 5011 if self._match(TokenType.STRING, advance=False): 5012 this = self._parse_primary() 5013 else: 5014 this = self._parse_term() 5015 5016 if not this or ( 5017 isinstance(this, exp.Column) 5018 and not this.table 5019 and not this.this.quoted 5020 and this.name.upper() == "IS" 5021 ): 5022 self._retreat(index) 5023 return None 5024 5025 unit = self._parse_function() or ( 5026 not self._match(TokenType.ALIAS, advance=False) 5027 and self._parse_var(any_token=True, upper=True) 5028 ) 5029 5030 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5031 # each INTERVAL expression into this canonical form so it's easy to transpile 5032 if this and this.is_number: 5033 this = exp.Literal.string(this.to_py()) 5034 elif this and this.is_string: 5035 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5036 if parts and unit: 5037 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5038 unit = None 5039 self._retreat(self._index - 1) 5040 5041 if len(parts) == 1: 5042 this = exp.Literal.string(parts[0][0]) 5043 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5044 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5045 unit = self.expression( 5046 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5047 ) 5048 5049 interval = self.expression(exp.Interval, this=this, unit=unit) 5050 5051 index = self._index 5052 self._match(TokenType.PLUS) 5053 5054 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5055 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5056 return self.expression( 5057 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5058 ) 5059 5060 self._retreat(index) 5061 return interval 5062 5063 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5064 this = self._parse_term() 5065 5066 while True: 5067 if self._match_set(self.BITWISE): 5068 this = self.expression( 5069 self.BITWISE[self._prev.token_type], 5070 this=this, 5071 expression=self._parse_term(), 5072 ) 5073 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5074 this = self.expression( 5075 exp.DPipe, 5076 this=this, 5077 expression=self._parse_term(), 5078 safe=not self.dialect.STRICT_STRING_CONCAT, 5079 ) 5080 elif self._match(TokenType.DQMARK): 5081 this = self.expression( 5082 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5083 ) 5084 elif self._match_pair(TokenType.LT, TokenType.LT): 5085 this = self.expression( 5086 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5087 ) 5088 elif self._match_pair(TokenType.GT, TokenType.GT): 5089 this = self.expression( 5090 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5091 ) 5092 else: 5093 break 5094 5095 return this 5096 5097 def _parse_term(self) -> t.Optional[exp.Expression]: 5098 this = self._parse_factor() 5099 5100 while self._match_set(self.TERM): 5101 klass = self.TERM[self._prev.token_type] 5102 comments = self._prev_comments 5103 expression = self._parse_factor() 5104 5105 this = self.expression(klass, this=this, comments=comments, expression=expression) 5106 5107 if isinstance(this, exp.Collate): 5108 expr = this.expression 5109 5110 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5111 # fallback to Identifier / Var 5112 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5113 ident = expr.this 5114 if isinstance(ident, exp.Identifier): 5115 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5116 5117 return this 5118 5119 def _parse_factor(self) -> t.Optional[exp.Expression]: 5120 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5121 this = parse_method() 5122 5123 while self._match_set(self.FACTOR): 5124 klass = self.FACTOR[self._prev.token_type] 5125 comments = self._prev_comments 5126 expression = parse_method() 5127 5128 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5129 self._retreat(self._index - 1) 5130 return this 5131 5132 this = self.expression(klass, this=this, comments=comments, expression=expression) 5133 5134 if isinstance(this, exp.Div): 5135 this.args["typed"] = self.dialect.TYPED_DIVISION 5136 this.args["safe"] = self.dialect.SAFE_DIVISION 5137 5138 return this 5139 5140 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5141 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5142 5143 def _parse_unary(self) -> t.Optional[exp.Expression]: 5144 if self._match_set(self.UNARY_PARSERS): 5145 return self.UNARY_PARSERS[self._prev.token_type](self) 5146 return self._parse_at_time_zone(self._parse_type()) 5147 5148 def _parse_type( 5149 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5150 ) -> t.Optional[exp.Expression]: 5151 interval = parse_interval and self._parse_interval() 5152 if interval: 5153 return interval 5154 5155 index = self._index 5156 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5157 5158 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5159 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5160 if isinstance(data_type, exp.Cast): 5161 # This constructor can contain ops directly after it, for instance struct unnesting: 5162 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5163 return self._parse_column_ops(data_type) 5164 5165 if data_type: 5166 index2 = self._index 5167 this = self._parse_primary() 5168 5169 if isinstance(this, exp.Literal): 5170 literal = this.name 5171 this = self._parse_column_ops(this) 5172 5173 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5174 if parser: 5175 return parser(self, this, data_type) 5176 5177 if ( 5178 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5179 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5180 and TIME_ZONE_RE.search(literal) 5181 ): 5182 data_type = exp.DataType.build("TIMESTAMPTZ") 5183 5184 return self.expression(exp.Cast, this=this, to=data_type) 5185 5186 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5187 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5188 # 5189 # If the index difference here is greater than 1, that means the parser itself must have 5190 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5191 # 5192 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5193 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5194 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5195 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5196 # 5197 # In these cases, we don't really want to return the converted type, but instead retreat 5198 # and try to parse a Column or Identifier in the section below. 5199 if data_type.expressions and index2 - index > 1: 5200 self._retreat(index2) 5201 return self._parse_column_ops(data_type) 5202 5203 self._retreat(index) 5204 5205 if fallback_to_identifier: 5206 return self._parse_id_var() 5207 5208 this = self._parse_column() 5209 return this and self._parse_column_ops(this) 5210 5211 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5212 this = self._parse_type() 5213 if not this: 5214 return None 5215 5216 if isinstance(this, exp.Column) and not this.table: 5217 this = exp.var(this.name.upper()) 5218 5219 return self.expression( 5220 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5221 ) 5222 5223 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5224 type_name = identifier.name 5225 5226 while self._match(TokenType.DOT): 5227 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5228 5229 return exp.DataType.build(type_name, udt=True) 5230 5231 def _parse_types( 5232 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5233 ) -> t.Optional[exp.Expression]: 5234 index = self._index 5235 5236 this: t.Optional[exp.Expression] = None 5237 prefix = self._match_text_seq("SYSUDTLIB", ".") 5238 5239 if not self._match_set(self.TYPE_TOKENS): 5240 identifier = allow_identifiers and self._parse_id_var( 5241 any_token=False, tokens=(TokenType.VAR,) 5242 ) 5243 if isinstance(identifier, exp.Identifier): 5244 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5245 5246 if len(tokens) != 1: 5247 self.raise_error("Unexpected identifier", self._prev) 5248 5249 if tokens[0].token_type in self.TYPE_TOKENS: 5250 self._prev = tokens[0] 5251 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5252 this = self._parse_user_defined_type(identifier) 5253 else: 5254 self._retreat(self._index - 1) 5255 return None 5256 else: 5257 return None 5258 5259 type_token = self._prev.token_type 5260 5261 if type_token == TokenType.PSEUDO_TYPE: 5262 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5263 5264 if type_token == TokenType.OBJECT_IDENTIFIER: 5265 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5266 5267 # https://materialize.com/docs/sql/types/map/ 5268 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5269 key_type = self._parse_types( 5270 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5271 ) 5272 if not self._match(TokenType.FARROW): 5273 self._retreat(index) 5274 return None 5275 5276 value_type = self._parse_types( 5277 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5278 ) 5279 if not self._match(TokenType.R_BRACKET): 5280 self._retreat(index) 5281 return None 5282 5283 return exp.DataType( 5284 this=exp.DataType.Type.MAP, 5285 expressions=[key_type, value_type], 5286 nested=True, 5287 prefix=prefix, 5288 ) 5289 5290 nested = type_token in self.NESTED_TYPE_TOKENS 5291 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5292 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5293 expressions = None 5294 maybe_func = False 5295 5296 if self._match(TokenType.L_PAREN): 5297 if is_struct: 5298 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5299 elif nested: 5300 expressions = self._parse_csv( 5301 lambda: self._parse_types( 5302 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5303 ) 5304 ) 5305 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5306 this = expressions[0] 5307 this.set("nullable", True) 5308 self._match_r_paren() 5309 return this 5310 elif type_token in self.ENUM_TYPE_TOKENS: 5311 expressions = self._parse_csv(self._parse_equality) 5312 elif is_aggregate: 5313 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5314 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5315 ) 5316 if not func_or_ident: 5317 return None 5318 expressions = [func_or_ident] 5319 if self._match(TokenType.COMMA): 5320 expressions.extend( 5321 self._parse_csv( 5322 lambda: self._parse_types( 5323 check_func=check_func, 5324 schema=schema, 5325 allow_identifiers=allow_identifiers, 5326 ) 5327 ) 5328 ) 5329 else: 5330 expressions = self._parse_csv(self._parse_type_size) 5331 5332 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5333 if type_token == TokenType.VECTOR and len(expressions) == 2: 5334 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5335 5336 if not expressions or not self._match(TokenType.R_PAREN): 5337 self._retreat(index) 5338 return None 5339 5340 maybe_func = True 5341 5342 values: t.Optional[t.List[exp.Expression]] = None 5343 5344 if nested and self._match(TokenType.LT): 5345 if is_struct: 5346 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5347 else: 5348 expressions = self._parse_csv( 5349 lambda: self._parse_types( 5350 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5351 ) 5352 ) 5353 5354 if not self._match(TokenType.GT): 5355 self.raise_error("Expecting >") 5356 5357 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5358 values = self._parse_csv(self._parse_assignment) 5359 if not values and is_struct: 5360 values = None 5361 self._retreat(self._index - 1) 5362 else: 5363 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5364 5365 if type_token in self.TIMESTAMPS: 5366 if self._match_text_seq("WITH", "TIME", "ZONE"): 5367 maybe_func = False 5368 tz_type = ( 5369 exp.DataType.Type.TIMETZ 5370 if type_token in self.TIMES 5371 else exp.DataType.Type.TIMESTAMPTZ 5372 ) 5373 this = exp.DataType(this=tz_type, expressions=expressions) 5374 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5375 maybe_func = False 5376 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5377 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5378 maybe_func = False 5379 elif type_token == TokenType.INTERVAL: 5380 unit = self._parse_var(upper=True) 5381 if unit: 5382 if self._match_text_seq("TO"): 5383 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5384 5385 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5386 else: 5387 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5388 elif type_token == TokenType.VOID: 5389 this = exp.DataType(this=exp.DataType.Type.NULL) 5390 5391 if maybe_func and check_func: 5392 index2 = self._index 5393 peek = self._parse_string() 5394 5395 if not peek: 5396 self._retreat(index) 5397 return None 5398 5399 self._retreat(index2) 5400 5401 if not this: 5402 if self._match_text_seq("UNSIGNED"): 5403 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5404 if not unsigned_type_token: 5405 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5406 5407 type_token = unsigned_type_token or type_token 5408 5409 this = exp.DataType( 5410 this=exp.DataType.Type[type_token.value], 5411 expressions=expressions, 5412 nested=nested, 5413 prefix=prefix, 5414 ) 5415 5416 # Empty arrays/structs are allowed 5417 if values is not None: 5418 cls = exp.Struct if is_struct else exp.Array 5419 this = exp.cast(cls(expressions=values), this, copy=False) 5420 5421 elif expressions: 5422 this.set("expressions", expressions) 5423 5424 # https://materialize.com/docs/sql/types/list/#type-name 5425 while self._match(TokenType.LIST): 5426 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5427 5428 index = self._index 5429 5430 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5431 matched_array = self._match(TokenType.ARRAY) 5432 5433 while self._curr: 5434 datatype_token = self._prev.token_type 5435 matched_l_bracket = self._match(TokenType.L_BRACKET) 5436 5437 if (not matched_l_bracket and not matched_array) or ( 5438 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5439 ): 5440 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5441 # not to be confused with the fixed size array parsing 5442 break 5443 5444 matched_array = False 5445 values = self._parse_csv(self._parse_assignment) or None 5446 if ( 5447 values 5448 and not schema 5449 and ( 5450 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5451 ) 5452 ): 5453 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5454 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5455 self._retreat(index) 5456 break 5457 5458 this = exp.DataType( 5459 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5460 ) 5461 self._match(TokenType.R_BRACKET) 5462 5463 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5464 converter = self.TYPE_CONVERTERS.get(this.this) 5465 if converter: 5466 this = converter(t.cast(exp.DataType, this)) 5467 5468 return this 5469 5470 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5471 index = self._index 5472 5473 if ( 5474 self._curr 5475 and self._next 5476 and self._curr.token_type in self.TYPE_TOKENS 5477 and self._next.token_type in self.TYPE_TOKENS 5478 ): 5479 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5480 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5481 this = self._parse_id_var() 5482 else: 5483 this = ( 5484 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5485 or self._parse_id_var() 5486 ) 5487 5488 self._match(TokenType.COLON) 5489 5490 if ( 5491 type_required 5492 and not isinstance(this, exp.DataType) 5493 and not self._match_set(self.TYPE_TOKENS, advance=False) 5494 ): 5495 self._retreat(index) 5496 return self._parse_types() 5497 5498 return self._parse_column_def(this) 5499 5500 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5501 if not self._match_text_seq("AT", "TIME", "ZONE"): 5502 return this 5503 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5504 5505 def _parse_column(self) -> t.Optional[exp.Expression]: 5506 this = self._parse_column_reference() 5507 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5508 5509 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5510 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5511 5512 return column 5513 5514 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5515 this = self._parse_field() 5516 if ( 5517 not this 5518 and self._match(TokenType.VALUES, advance=False) 5519 and self.VALUES_FOLLOWED_BY_PAREN 5520 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5521 ): 5522 this = self._parse_id_var() 5523 5524 if isinstance(this, exp.Identifier): 5525 # We bubble up comments from the Identifier to the Column 5526 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5527 5528 return this 5529 5530 def _parse_colon_as_variant_extract( 5531 self, this: t.Optional[exp.Expression] 5532 ) -> t.Optional[exp.Expression]: 5533 casts = [] 5534 json_path = [] 5535 escape = None 5536 5537 while self._match(TokenType.COLON): 5538 start_index = self._index 5539 5540 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5541 path = self._parse_column_ops( 5542 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5543 ) 5544 5545 # The cast :: operator has a lower precedence than the extraction operator :, so 5546 # we rearrange the AST appropriately to avoid casting the JSON path 5547 while isinstance(path, exp.Cast): 5548 casts.append(path.to) 5549 path = path.this 5550 5551 if casts: 5552 dcolon_offset = next( 5553 i 5554 for i, t in enumerate(self._tokens[start_index:]) 5555 if t.token_type == TokenType.DCOLON 5556 ) 5557 end_token = self._tokens[start_index + dcolon_offset - 1] 5558 else: 5559 end_token = self._prev 5560 5561 if path: 5562 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5563 # it'll roundtrip to a string literal in GET_PATH 5564 if isinstance(path, exp.Identifier) and path.quoted: 5565 escape = True 5566 5567 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5568 5569 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5570 # Databricks transforms it back to the colon/dot notation 5571 if json_path: 5572 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5573 5574 if json_path_expr: 5575 json_path_expr.set("escape", escape) 5576 5577 this = self.expression( 5578 exp.JSONExtract, 5579 this=this, 5580 expression=json_path_expr, 5581 variant_extract=True, 5582 ) 5583 5584 while casts: 5585 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5586 5587 return this 5588 5589 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5590 return self._parse_types() 5591 5592 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5593 this = self._parse_bracket(this) 5594 5595 while self._match_set(self.COLUMN_OPERATORS): 5596 op_token = self._prev.token_type 5597 op = self.COLUMN_OPERATORS.get(op_token) 5598 5599 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5600 field = self._parse_dcolon() 5601 if not field: 5602 self.raise_error("Expected type") 5603 elif op and self._curr: 5604 field = self._parse_column_reference() or self._parse_bracket() 5605 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5606 field = self._parse_column_ops(field) 5607 else: 5608 field = self._parse_field(any_token=True, anonymous_func=True) 5609 5610 # Function calls can be qualified, e.g., x.y.FOO() 5611 # This converts the final AST to a series of Dots leading to the function call 5612 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5613 if isinstance(field, (exp.Func, exp.Window)) and this: 5614 this = this.transform( 5615 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5616 ) 5617 5618 if op: 5619 this = op(self, this, field) 5620 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5621 this = self.expression( 5622 exp.Column, 5623 comments=this.comments, 5624 this=field, 5625 table=this.this, 5626 db=this.args.get("table"), 5627 catalog=this.args.get("db"), 5628 ) 5629 elif isinstance(field, exp.Window): 5630 # Move the exp.Dot's to the window's function 5631 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5632 field.set("this", window_func) 5633 this = field 5634 else: 5635 this = self.expression(exp.Dot, this=this, expression=field) 5636 5637 if field and field.comments: 5638 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5639 5640 this = self._parse_bracket(this) 5641 5642 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5643 5644 def _parse_paren(self) -> t.Optional[exp.Expression]: 5645 if not self._match(TokenType.L_PAREN): 5646 return None 5647 5648 comments = self._prev_comments 5649 query = self._parse_select() 5650 5651 if query: 5652 expressions = [query] 5653 else: 5654 expressions = self._parse_expressions() 5655 5656 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5657 5658 if not this and self._match(TokenType.R_PAREN, advance=False): 5659 this = self.expression(exp.Tuple) 5660 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5661 this = self._parse_subquery(this=this, parse_alias=False) 5662 elif isinstance(this, exp.Subquery): 5663 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5664 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5665 this = self.expression(exp.Tuple, expressions=expressions) 5666 else: 5667 this = self.expression(exp.Paren, this=this) 5668 5669 if this: 5670 this.add_comments(comments) 5671 5672 self._match_r_paren(expression=this) 5673 return this 5674 5675 def _parse_primary(self) -> t.Optional[exp.Expression]: 5676 if self._match_set(self.PRIMARY_PARSERS): 5677 token_type = self._prev.token_type 5678 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5679 5680 if token_type == TokenType.STRING: 5681 expressions = [primary] 5682 while self._match(TokenType.STRING): 5683 expressions.append(exp.Literal.string(self._prev.text)) 5684 5685 if len(expressions) > 1: 5686 return self.expression(exp.Concat, expressions=expressions) 5687 5688 return primary 5689 5690 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5691 return exp.Literal.number(f"0.{self._prev.text}") 5692 5693 return self._parse_paren() 5694 5695 def _parse_field( 5696 self, 5697 any_token: bool = False, 5698 tokens: t.Optional[t.Collection[TokenType]] = None, 5699 anonymous_func: bool = False, 5700 ) -> t.Optional[exp.Expression]: 5701 if anonymous_func: 5702 field = ( 5703 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5704 or self._parse_primary() 5705 ) 5706 else: 5707 field = self._parse_primary() or self._parse_function( 5708 anonymous=anonymous_func, any_token=any_token 5709 ) 5710 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5711 5712 def _parse_function( 5713 self, 5714 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5715 anonymous: bool = False, 5716 optional_parens: bool = True, 5717 any_token: bool = False, 5718 ) -> t.Optional[exp.Expression]: 5719 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5720 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5721 fn_syntax = False 5722 if ( 5723 self._match(TokenType.L_BRACE, advance=False) 5724 and self._next 5725 and self._next.text.upper() == "FN" 5726 ): 5727 self._advance(2) 5728 fn_syntax = True 5729 5730 func = self._parse_function_call( 5731 functions=functions, 5732 anonymous=anonymous, 5733 optional_parens=optional_parens, 5734 any_token=any_token, 5735 ) 5736 5737 if fn_syntax: 5738 self._match(TokenType.R_BRACE) 5739 5740 return func 5741 5742 def _parse_function_call( 5743 self, 5744 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5745 anonymous: bool = False, 5746 optional_parens: bool = True, 5747 any_token: bool = False, 5748 ) -> t.Optional[exp.Expression]: 5749 if not self._curr: 5750 return None 5751 5752 comments = self._curr.comments 5753 token = self._curr 5754 token_type = self._curr.token_type 5755 this = self._curr.text 5756 upper = this.upper() 5757 5758 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5759 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5760 self._advance() 5761 return self._parse_window(parser(self)) 5762 5763 if not self._next or self._next.token_type != TokenType.L_PAREN: 5764 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5765 self._advance() 5766 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5767 5768 return None 5769 5770 if any_token: 5771 if token_type in self.RESERVED_TOKENS: 5772 return None 5773 elif token_type not in self.FUNC_TOKENS: 5774 return None 5775 5776 self._advance(2) 5777 5778 parser = self.FUNCTION_PARSERS.get(upper) 5779 if parser and not anonymous: 5780 this = parser(self) 5781 else: 5782 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5783 5784 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5785 this = self.expression( 5786 subquery_predicate, comments=comments, this=self._parse_select() 5787 ) 5788 self._match_r_paren() 5789 return this 5790 5791 if functions is None: 5792 functions = self.FUNCTIONS 5793 5794 function = functions.get(upper) 5795 known_function = function and not anonymous 5796 5797 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5798 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5799 5800 post_func_comments = self._curr and self._curr.comments 5801 if known_function and post_func_comments: 5802 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5803 # call we'll construct it as exp.Anonymous, even if it's "known" 5804 if any( 5805 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5806 for comment in post_func_comments 5807 ): 5808 known_function = False 5809 5810 if alias and known_function: 5811 args = self._kv_to_prop_eq(args) 5812 5813 if known_function: 5814 func_builder = t.cast(t.Callable, function) 5815 5816 if "dialect" in func_builder.__code__.co_varnames: 5817 func = func_builder(args, dialect=self.dialect) 5818 else: 5819 func = func_builder(args) 5820 5821 func = self.validate_expression(func, args) 5822 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5823 func.meta["name"] = this 5824 5825 this = func 5826 else: 5827 if token_type == TokenType.IDENTIFIER: 5828 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5829 5830 this = self.expression(exp.Anonymous, this=this, expressions=args) 5831 this = this.update_positions(token) 5832 5833 if isinstance(this, exp.Expression): 5834 this.add_comments(comments) 5835 5836 self._match_r_paren(this) 5837 return self._parse_window(this) 5838 5839 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5840 return expression 5841 5842 def _kv_to_prop_eq( 5843 self, expressions: t.List[exp.Expression], parse_map: bool = False 5844 ) -> t.List[exp.Expression]: 5845 transformed = [] 5846 5847 for index, e in enumerate(expressions): 5848 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5849 if isinstance(e, exp.Alias): 5850 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5851 5852 if not isinstance(e, exp.PropertyEQ): 5853 e = self.expression( 5854 exp.PropertyEQ, 5855 this=e.this if parse_map else exp.to_identifier(e.this.name), 5856 expression=e.expression, 5857 ) 5858 5859 if isinstance(e.this, exp.Column): 5860 e.this.replace(e.this.this) 5861 else: 5862 e = self._to_prop_eq(e, index) 5863 5864 transformed.append(e) 5865 5866 return transformed 5867 5868 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5869 return self._parse_statement() 5870 5871 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5872 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5873 5874 def _parse_user_defined_function( 5875 self, kind: t.Optional[TokenType] = None 5876 ) -> t.Optional[exp.Expression]: 5877 this = self._parse_table_parts(schema=True) 5878 5879 if not self._match(TokenType.L_PAREN): 5880 return this 5881 5882 expressions = self._parse_csv(self._parse_function_parameter) 5883 self._match_r_paren() 5884 return self.expression( 5885 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5886 ) 5887 5888 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5889 literal = self._parse_primary() 5890 if literal: 5891 return self.expression(exp.Introducer, this=token.text, expression=literal) 5892 5893 return self._identifier_expression(token) 5894 5895 def _parse_session_parameter(self) -> exp.SessionParameter: 5896 kind = None 5897 this = self._parse_id_var() or self._parse_primary() 5898 5899 if this and self._match(TokenType.DOT): 5900 kind = this.name 5901 this = self._parse_var() or self._parse_primary() 5902 5903 return self.expression(exp.SessionParameter, this=this, kind=kind) 5904 5905 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5906 return self._parse_id_var() 5907 5908 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5909 index = self._index 5910 5911 if self._match(TokenType.L_PAREN): 5912 expressions = t.cast( 5913 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5914 ) 5915 5916 if not self._match(TokenType.R_PAREN): 5917 self._retreat(index) 5918 else: 5919 expressions = [self._parse_lambda_arg()] 5920 5921 if self._match_set(self.LAMBDAS): 5922 return self.LAMBDAS[self._prev.token_type](self, expressions) 5923 5924 self._retreat(index) 5925 5926 this: t.Optional[exp.Expression] 5927 5928 if self._match(TokenType.DISTINCT): 5929 this = self.expression( 5930 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5931 ) 5932 else: 5933 this = self._parse_select_or_expression(alias=alias) 5934 5935 return self._parse_limit( 5936 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5937 ) 5938 5939 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5940 index = self._index 5941 if not self._match(TokenType.L_PAREN): 5942 return this 5943 5944 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5945 # expr can be of both types 5946 if self._match_set(self.SELECT_START_TOKENS): 5947 self._retreat(index) 5948 return this 5949 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5950 self._match_r_paren() 5951 return self.expression(exp.Schema, this=this, expressions=args) 5952 5953 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5954 return self._parse_column_def(self._parse_field(any_token=True)) 5955 5956 def _parse_column_def( 5957 self, this: t.Optional[exp.Expression], computed_column: bool = True 5958 ) -> t.Optional[exp.Expression]: 5959 # column defs are not really columns, they're identifiers 5960 if isinstance(this, exp.Column): 5961 this = this.this 5962 5963 if not computed_column: 5964 self._match(TokenType.ALIAS) 5965 5966 kind = self._parse_types(schema=True) 5967 5968 if self._match_text_seq("FOR", "ORDINALITY"): 5969 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5970 5971 constraints: t.List[exp.Expression] = [] 5972 5973 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5974 ("ALIAS", "MATERIALIZED") 5975 ): 5976 persisted = self._prev.text.upper() == "MATERIALIZED" 5977 constraint_kind = exp.ComputedColumnConstraint( 5978 this=self._parse_assignment(), 5979 persisted=persisted or self._match_text_seq("PERSISTED"), 5980 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5981 ) 5982 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5983 elif ( 5984 kind 5985 and self._match(TokenType.ALIAS, advance=False) 5986 and ( 5987 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5988 or (self._next and self._next.token_type == TokenType.L_PAREN) 5989 ) 5990 ): 5991 self._advance() 5992 constraints.append( 5993 self.expression( 5994 exp.ColumnConstraint, 5995 kind=exp.ComputedColumnConstraint( 5996 this=self._parse_disjunction(), 5997 persisted=self._match_texts(("STORED", "VIRTUAL")) 5998 and self._prev.text.upper() == "STORED", 5999 ), 6000 ) 6001 ) 6002 6003 while True: 6004 constraint = self._parse_column_constraint() 6005 if not constraint: 6006 break 6007 constraints.append(constraint) 6008 6009 if not kind and not constraints: 6010 return this 6011 6012 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6013 6014 def _parse_auto_increment( 6015 self, 6016 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6017 start = None 6018 increment = None 6019 order = None 6020 6021 if self._match(TokenType.L_PAREN, advance=False): 6022 args = self._parse_wrapped_csv(self._parse_bitwise) 6023 start = seq_get(args, 0) 6024 increment = seq_get(args, 1) 6025 elif self._match_text_seq("START"): 6026 start = self._parse_bitwise() 6027 self._match_text_seq("INCREMENT") 6028 increment = self._parse_bitwise() 6029 if self._match_text_seq("ORDER"): 6030 order = True 6031 elif self._match_text_seq("NOORDER"): 6032 order = False 6033 6034 if start and increment: 6035 return exp.GeneratedAsIdentityColumnConstraint( 6036 start=start, increment=increment, this=False, order=order 6037 ) 6038 6039 return exp.AutoIncrementColumnConstraint() 6040 6041 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6042 if not self._match_text_seq("REFRESH"): 6043 self._retreat(self._index - 1) 6044 return None 6045 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6046 6047 def _parse_compress(self) -> exp.CompressColumnConstraint: 6048 if self._match(TokenType.L_PAREN, advance=False): 6049 return self.expression( 6050 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6051 ) 6052 6053 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6054 6055 def _parse_generated_as_identity( 6056 self, 6057 ) -> ( 6058 exp.GeneratedAsIdentityColumnConstraint 6059 | exp.ComputedColumnConstraint 6060 | exp.GeneratedAsRowColumnConstraint 6061 ): 6062 if self._match_text_seq("BY", "DEFAULT"): 6063 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6064 this = self.expression( 6065 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6066 ) 6067 else: 6068 self._match_text_seq("ALWAYS") 6069 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6070 6071 self._match(TokenType.ALIAS) 6072 6073 if self._match_text_seq("ROW"): 6074 start = self._match_text_seq("START") 6075 if not start: 6076 self._match(TokenType.END) 6077 hidden = self._match_text_seq("HIDDEN") 6078 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6079 6080 identity = self._match_text_seq("IDENTITY") 6081 6082 if self._match(TokenType.L_PAREN): 6083 if self._match(TokenType.START_WITH): 6084 this.set("start", self._parse_bitwise()) 6085 if self._match_text_seq("INCREMENT", "BY"): 6086 this.set("increment", self._parse_bitwise()) 6087 if self._match_text_seq("MINVALUE"): 6088 this.set("minvalue", self._parse_bitwise()) 6089 if self._match_text_seq("MAXVALUE"): 6090 this.set("maxvalue", self._parse_bitwise()) 6091 6092 if self._match_text_seq("CYCLE"): 6093 this.set("cycle", True) 6094 elif self._match_text_seq("NO", "CYCLE"): 6095 this.set("cycle", False) 6096 6097 if not identity: 6098 this.set("expression", self._parse_range()) 6099 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6100 args = self._parse_csv(self._parse_bitwise) 6101 this.set("start", seq_get(args, 0)) 6102 this.set("increment", seq_get(args, 1)) 6103 6104 self._match_r_paren() 6105 6106 return this 6107 6108 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6109 self._match_text_seq("LENGTH") 6110 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6111 6112 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6113 if self._match_text_seq("NULL"): 6114 return self.expression(exp.NotNullColumnConstraint) 6115 if self._match_text_seq("CASESPECIFIC"): 6116 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6117 if self._match_text_seq("FOR", "REPLICATION"): 6118 return self.expression(exp.NotForReplicationColumnConstraint) 6119 6120 # Unconsume the `NOT` token 6121 self._retreat(self._index - 1) 6122 return None 6123 6124 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6125 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6126 6127 procedure_option_follows = ( 6128 self._match(TokenType.WITH, advance=False) 6129 and self._next 6130 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6131 ) 6132 6133 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6134 return self.expression( 6135 exp.ColumnConstraint, 6136 this=this, 6137 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6138 ) 6139 6140 return this 6141 6142 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6143 if not self._match(TokenType.CONSTRAINT): 6144 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6145 6146 return self.expression( 6147 exp.Constraint, 6148 this=self._parse_id_var(), 6149 expressions=self._parse_unnamed_constraints(), 6150 ) 6151 6152 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6153 constraints = [] 6154 while True: 6155 constraint = self._parse_unnamed_constraint() or self._parse_function() 6156 if not constraint: 6157 break 6158 constraints.append(constraint) 6159 6160 return constraints 6161 6162 def _parse_unnamed_constraint( 6163 self, constraints: t.Optional[t.Collection[str]] = None 6164 ) -> t.Optional[exp.Expression]: 6165 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6166 constraints or self.CONSTRAINT_PARSERS 6167 ): 6168 return None 6169 6170 constraint = self._prev.text.upper() 6171 if constraint not in self.CONSTRAINT_PARSERS: 6172 self.raise_error(f"No parser found for schema constraint {constraint}.") 6173 6174 return self.CONSTRAINT_PARSERS[constraint](self) 6175 6176 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6177 return self._parse_id_var(any_token=False) 6178 6179 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6180 self._match_text_seq("KEY") 6181 return self.expression( 6182 exp.UniqueColumnConstraint, 6183 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6184 this=self._parse_schema(self._parse_unique_key()), 6185 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6186 on_conflict=self._parse_on_conflict(), 6187 options=self._parse_key_constraint_options(), 6188 ) 6189 6190 def _parse_key_constraint_options(self) -> t.List[str]: 6191 options = [] 6192 while True: 6193 if not self._curr: 6194 break 6195 6196 if self._match(TokenType.ON): 6197 action = None 6198 on = self._advance_any() and self._prev.text 6199 6200 if self._match_text_seq("NO", "ACTION"): 6201 action = "NO ACTION" 6202 elif self._match_text_seq("CASCADE"): 6203 action = "CASCADE" 6204 elif self._match_text_seq("RESTRICT"): 6205 action = "RESTRICT" 6206 elif self._match_pair(TokenType.SET, TokenType.NULL): 6207 action = "SET NULL" 6208 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6209 action = "SET DEFAULT" 6210 else: 6211 self.raise_error("Invalid key constraint") 6212 6213 options.append(f"ON {on} {action}") 6214 else: 6215 var = self._parse_var_from_options( 6216 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6217 ) 6218 if not var: 6219 break 6220 options.append(var.name) 6221 6222 return options 6223 6224 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6225 if match and not self._match(TokenType.REFERENCES): 6226 return None 6227 6228 expressions = None 6229 this = self._parse_table(schema=True) 6230 options = self._parse_key_constraint_options() 6231 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6232 6233 def _parse_foreign_key(self) -> exp.ForeignKey: 6234 expressions = ( 6235 self._parse_wrapped_id_vars() 6236 if not self._match(TokenType.REFERENCES, advance=False) 6237 else None 6238 ) 6239 reference = self._parse_references() 6240 on_options = {} 6241 6242 while self._match(TokenType.ON): 6243 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6244 self.raise_error("Expected DELETE or UPDATE") 6245 6246 kind = self._prev.text.lower() 6247 6248 if self._match_text_seq("NO", "ACTION"): 6249 action = "NO ACTION" 6250 elif self._match(TokenType.SET): 6251 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6252 action = "SET " + self._prev.text.upper() 6253 else: 6254 self._advance() 6255 action = self._prev.text.upper() 6256 6257 on_options[kind] = action 6258 6259 return self.expression( 6260 exp.ForeignKey, 6261 expressions=expressions, 6262 reference=reference, 6263 options=self._parse_key_constraint_options(), 6264 **on_options, # type: ignore 6265 ) 6266 6267 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6268 return self._parse_ordered() or self._parse_field() 6269 6270 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6271 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6272 self._retreat(self._index - 1) 6273 return None 6274 6275 id_vars = self._parse_wrapped_id_vars() 6276 return self.expression( 6277 exp.PeriodForSystemTimeConstraint, 6278 this=seq_get(id_vars, 0), 6279 expression=seq_get(id_vars, 1), 6280 ) 6281 6282 def _parse_primary_key( 6283 self, wrapped_optional: bool = False, in_props: bool = False 6284 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6285 desc = ( 6286 self._match_set((TokenType.ASC, TokenType.DESC)) 6287 and self._prev.token_type == TokenType.DESC 6288 ) 6289 6290 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6291 return self.expression( 6292 exp.PrimaryKeyColumnConstraint, 6293 desc=desc, 6294 options=self._parse_key_constraint_options(), 6295 ) 6296 6297 expressions = self._parse_wrapped_csv( 6298 self._parse_primary_key_part, optional=wrapped_optional 6299 ) 6300 options = self._parse_key_constraint_options() 6301 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6302 6303 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6304 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6305 6306 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6307 """ 6308 Parses a datetime column in ODBC format. We parse the column into the corresponding 6309 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6310 same as we did for `DATE('yyyy-mm-dd')`. 6311 6312 Reference: 6313 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6314 """ 6315 self._match(TokenType.VAR) 6316 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6317 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6318 if not self._match(TokenType.R_BRACE): 6319 self.raise_error("Expected }") 6320 return expression 6321 6322 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6323 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6324 return this 6325 6326 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6327 map_token = seq_get(self._tokens, self._index - 2) 6328 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6329 else: 6330 parse_map = False 6331 6332 bracket_kind = self._prev.token_type 6333 if ( 6334 bracket_kind == TokenType.L_BRACE 6335 and self._curr 6336 and self._curr.token_type == TokenType.VAR 6337 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6338 ): 6339 return self._parse_odbc_datetime_literal() 6340 6341 expressions = self._parse_csv( 6342 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6343 ) 6344 6345 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6346 self.raise_error("Expected ]") 6347 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6348 self.raise_error("Expected }") 6349 6350 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6351 if bracket_kind == TokenType.L_BRACE: 6352 this = self.expression( 6353 exp.Struct, 6354 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6355 ) 6356 elif not this: 6357 this = build_array_constructor( 6358 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6359 ) 6360 else: 6361 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6362 if constructor_type: 6363 return build_array_constructor( 6364 constructor_type, 6365 args=expressions, 6366 bracket_kind=bracket_kind, 6367 dialect=self.dialect, 6368 ) 6369 6370 expressions = apply_index_offset( 6371 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6372 ) 6373 this = self.expression( 6374 exp.Bracket, 6375 this=this, 6376 expressions=expressions, 6377 comments=this.pop_comments(), 6378 ) 6379 6380 self._add_comments(this) 6381 return self._parse_bracket(this) 6382 6383 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6384 if self._match(TokenType.COLON): 6385 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6386 return this 6387 6388 def _parse_case(self) -> t.Optional[exp.Expression]: 6389 ifs = [] 6390 default = None 6391 6392 comments = self._prev_comments 6393 expression = self._parse_assignment() 6394 6395 while self._match(TokenType.WHEN): 6396 this = self._parse_assignment() 6397 self._match(TokenType.THEN) 6398 then = self._parse_assignment() 6399 ifs.append(self.expression(exp.If, this=this, true=then)) 6400 6401 if self._match(TokenType.ELSE): 6402 default = self._parse_assignment() 6403 6404 if not self._match(TokenType.END): 6405 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6406 default = exp.column("interval") 6407 else: 6408 self.raise_error("Expected END after CASE", self._prev) 6409 6410 return self.expression( 6411 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6412 ) 6413 6414 def _parse_if(self) -> t.Optional[exp.Expression]: 6415 if self._match(TokenType.L_PAREN): 6416 args = self._parse_csv( 6417 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6418 ) 6419 this = self.validate_expression(exp.If.from_arg_list(args), args) 6420 self._match_r_paren() 6421 else: 6422 index = self._index - 1 6423 6424 if self.NO_PAREN_IF_COMMANDS and index == 0: 6425 return self._parse_as_command(self._prev) 6426 6427 condition = self._parse_assignment() 6428 6429 if not condition: 6430 self._retreat(index) 6431 return None 6432 6433 self._match(TokenType.THEN) 6434 true = self._parse_assignment() 6435 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6436 self._match(TokenType.END) 6437 this = self.expression(exp.If, this=condition, true=true, false=false) 6438 6439 return this 6440 6441 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6442 if not self._match_text_seq("VALUE", "FOR"): 6443 self._retreat(self._index - 1) 6444 return None 6445 6446 return self.expression( 6447 exp.NextValueFor, 6448 this=self._parse_column(), 6449 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6450 ) 6451 6452 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6453 this = self._parse_function() or self._parse_var_or_string(upper=True) 6454 6455 if self._match(TokenType.FROM): 6456 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6457 6458 if not self._match(TokenType.COMMA): 6459 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6460 6461 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6462 6463 def _parse_gap_fill(self) -> exp.GapFill: 6464 self._match(TokenType.TABLE) 6465 this = self._parse_table() 6466 6467 self._match(TokenType.COMMA) 6468 args = [this, *self._parse_csv(self._parse_lambda)] 6469 6470 gap_fill = exp.GapFill.from_arg_list(args) 6471 return self.validate_expression(gap_fill, args) 6472 6473 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6474 this = self._parse_assignment() 6475 6476 if not self._match(TokenType.ALIAS): 6477 if self._match(TokenType.COMMA): 6478 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6479 6480 self.raise_error("Expected AS after CAST") 6481 6482 fmt = None 6483 to = self._parse_types() 6484 6485 default = self._match(TokenType.DEFAULT) 6486 if default: 6487 default = self._parse_bitwise() 6488 self._match_text_seq("ON", "CONVERSION", "ERROR") 6489 6490 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6491 fmt_string = self._parse_string() 6492 fmt = self._parse_at_time_zone(fmt_string) 6493 6494 if not to: 6495 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6496 if to.this in exp.DataType.TEMPORAL_TYPES: 6497 this = self.expression( 6498 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6499 this=this, 6500 format=exp.Literal.string( 6501 format_time( 6502 fmt_string.this if fmt_string else "", 6503 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6504 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6505 ) 6506 ), 6507 safe=safe, 6508 ) 6509 6510 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6511 this.set("zone", fmt.args["zone"]) 6512 return this 6513 elif not to: 6514 self.raise_error("Expected TYPE after CAST") 6515 elif isinstance(to, exp.Identifier): 6516 to = exp.DataType.build(to.name, udt=True) 6517 elif to.this == exp.DataType.Type.CHAR: 6518 if self._match(TokenType.CHARACTER_SET): 6519 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6520 6521 return self.expression( 6522 exp.Cast if strict else exp.TryCast, 6523 this=this, 6524 to=to, 6525 format=fmt, 6526 safe=safe, 6527 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6528 default=default, 6529 ) 6530 6531 def _parse_string_agg(self) -> exp.GroupConcat: 6532 if self._match(TokenType.DISTINCT): 6533 args: t.List[t.Optional[exp.Expression]] = [ 6534 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6535 ] 6536 if self._match(TokenType.COMMA): 6537 args.extend(self._parse_csv(self._parse_assignment)) 6538 else: 6539 args = self._parse_csv(self._parse_assignment) # type: ignore 6540 6541 if self._match_text_seq("ON", "OVERFLOW"): 6542 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6543 if self._match_text_seq("ERROR"): 6544 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6545 else: 6546 self._match_text_seq("TRUNCATE") 6547 on_overflow = self.expression( 6548 exp.OverflowTruncateBehavior, 6549 this=self._parse_string(), 6550 with_count=( 6551 self._match_text_seq("WITH", "COUNT") 6552 or not self._match_text_seq("WITHOUT", "COUNT") 6553 ), 6554 ) 6555 else: 6556 on_overflow = None 6557 6558 index = self._index 6559 if not self._match(TokenType.R_PAREN) and args: 6560 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6561 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6562 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6563 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6564 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6565 6566 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6567 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6568 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6569 if not self._match_text_seq("WITHIN", "GROUP"): 6570 self._retreat(index) 6571 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6572 6573 # The corresponding match_r_paren will be called in parse_function (caller) 6574 self._match_l_paren() 6575 6576 return self.expression( 6577 exp.GroupConcat, 6578 this=self._parse_order(this=seq_get(args, 0)), 6579 separator=seq_get(args, 1), 6580 on_overflow=on_overflow, 6581 ) 6582 6583 def _parse_convert( 6584 self, strict: bool, safe: t.Optional[bool] = None 6585 ) -> t.Optional[exp.Expression]: 6586 this = self._parse_bitwise() 6587 6588 if self._match(TokenType.USING): 6589 to: t.Optional[exp.Expression] = self.expression( 6590 exp.CharacterSet, this=self._parse_var() 6591 ) 6592 elif self._match(TokenType.COMMA): 6593 to = self._parse_types() 6594 else: 6595 to = None 6596 6597 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6598 6599 def _parse_xml_table(self) -> exp.XMLTable: 6600 namespaces = None 6601 passing = None 6602 columns = None 6603 6604 if self._match_text_seq("XMLNAMESPACES", "("): 6605 namespaces = self._parse_xml_namespace() 6606 self._match_text_seq(")", ",") 6607 6608 this = self._parse_string() 6609 6610 if self._match_text_seq("PASSING"): 6611 # The BY VALUE keywords are optional and are provided for semantic clarity 6612 self._match_text_seq("BY", "VALUE") 6613 passing = self._parse_csv(self._parse_column) 6614 6615 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6616 6617 if self._match_text_seq("COLUMNS"): 6618 columns = self._parse_csv(self._parse_field_def) 6619 6620 return self.expression( 6621 exp.XMLTable, 6622 this=this, 6623 namespaces=namespaces, 6624 passing=passing, 6625 columns=columns, 6626 by_ref=by_ref, 6627 ) 6628 6629 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6630 namespaces = [] 6631 6632 while True: 6633 if self._match(TokenType.DEFAULT): 6634 uri = self._parse_string() 6635 else: 6636 uri = self._parse_alias(self._parse_string()) 6637 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6638 if not self._match(TokenType.COMMA): 6639 break 6640 6641 return namespaces 6642 6643 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6644 args = self._parse_csv(self._parse_assignment) 6645 6646 if len(args) < 3: 6647 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6648 6649 return self.expression(exp.DecodeCase, expressions=args) 6650 6651 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6652 self._match_text_seq("KEY") 6653 key = self._parse_column() 6654 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6655 self._match_text_seq("VALUE") 6656 value = self._parse_bitwise() 6657 6658 if not key and not value: 6659 return None 6660 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6661 6662 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6663 if not this or not self._match_text_seq("FORMAT", "JSON"): 6664 return this 6665 6666 return self.expression(exp.FormatJson, this=this) 6667 6668 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6669 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6670 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6671 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6672 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6673 else: 6674 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6675 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6676 6677 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6678 6679 if not empty and not error and not null: 6680 return None 6681 6682 return self.expression( 6683 exp.OnCondition, 6684 empty=empty, 6685 error=error, 6686 null=null, 6687 ) 6688 6689 def _parse_on_handling( 6690 self, on: str, *values: str 6691 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6692 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6693 for value in values: 6694 if self._match_text_seq(value, "ON", on): 6695 return f"{value} ON {on}" 6696 6697 index = self._index 6698 if self._match(TokenType.DEFAULT): 6699 default_value = self._parse_bitwise() 6700 if self._match_text_seq("ON", on): 6701 return default_value 6702 6703 self._retreat(index) 6704 6705 return None 6706 6707 @t.overload 6708 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6709 6710 @t.overload 6711 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6712 6713 def _parse_json_object(self, agg=False): 6714 star = self._parse_star() 6715 expressions = ( 6716 [star] 6717 if star 6718 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6719 ) 6720 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6721 6722 unique_keys = None 6723 if self._match_text_seq("WITH", "UNIQUE"): 6724 unique_keys = True 6725 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6726 unique_keys = False 6727 6728 self._match_text_seq("KEYS") 6729 6730 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6731 self._parse_type() 6732 ) 6733 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6734 6735 return self.expression( 6736 exp.JSONObjectAgg if agg else exp.JSONObject, 6737 expressions=expressions, 6738 null_handling=null_handling, 6739 unique_keys=unique_keys, 6740 return_type=return_type, 6741 encoding=encoding, 6742 ) 6743 6744 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6745 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6746 if not self._match_text_seq("NESTED"): 6747 this = self._parse_id_var() 6748 kind = self._parse_types(allow_identifiers=False) 6749 nested = None 6750 else: 6751 this = None 6752 kind = None 6753 nested = True 6754 6755 path = self._match_text_seq("PATH") and self._parse_string() 6756 nested_schema = nested and self._parse_json_schema() 6757 6758 return self.expression( 6759 exp.JSONColumnDef, 6760 this=this, 6761 kind=kind, 6762 path=path, 6763 nested_schema=nested_schema, 6764 ) 6765 6766 def _parse_json_schema(self) -> exp.JSONSchema: 6767 self._match_text_seq("COLUMNS") 6768 return self.expression( 6769 exp.JSONSchema, 6770 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6771 ) 6772 6773 def _parse_json_table(self) -> exp.JSONTable: 6774 this = self._parse_format_json(self._parse_bitwise()) 6775 path = self._match(TokenType.COMMA) and self._parse_string() 6776 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6777 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6778 schema = self._parse_json_schema() 6779 6780 return exp.JSONTable( 6781 this=this, 6782 schema=schema, 6783 path=path, 6784 error_handling=error_handling, 6785 empty_handling=empty_handling, 6786 ) 6787 6788 def _parse_match_against(self) -> exp.MatchAgainst: 6789 expressions = self._parse_csv(self._parse_column) 6790 6791 self._match_text_seq(")", "AGAINST", "(") 6792 6793 this = self._parse_string() 6794 6795 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6796 modifier = "IN NATURAL LANGUAGE MODE" 6797 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6798 modifier = f"{modifier} WITH QUERY EXPANSION" 6799 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6800 modifier = "IN BOOLEAN MODE" 6801 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6802 modifier = "WITH QUERY EXPANSION" 6803 else: 6804 modifier = None 6805 6806 return self.expression( 6807 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6808 ) 6809 6810 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6811 def _parse_open_json(self) -> exp.OpenJSON: 6812 this = self._parse_bitwise() 6813 path = self._match(TokenType.COMMA) and self._parse_string() 6814 6815 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6816 this = self._parse_field(any_token=True) 6817 kind = self._parse_types() 6818 path = self._parse_string() 6819 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6820 6821 return self.expression( 6822 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6823 ) 6824 6825 expressions = None 6826 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6827 self._match_l_paren() 6828 expressions = self._parse_csv(_parse_open_json_column_def) 6829 6830 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6831 6832 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6833 args = self._parse_csv(self._parse_bitwise) 6834 6835 if self._match(TokenType.IN): 6836 return self.expression( 6837 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6838 ) 6839 6840 if haystack_first: 6841 haystack = seq_get(args, 0) 6842 needle = seq_get(args, 1) 6843 else: 6844 haystack = seq_get(args, 1) 6845 needle = seq_get(args, 0) 6846 6847 return self.expression( 6848 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6849 ) 6850 6851 def _parse_predict(self) -> exp.Predict: 6852 self._match_text_seq("MODEL") 6853 this = self._parse_table() 6854 6855 self._match(TokenType.COMMA) 6856 self._match_text_seq("TABLE") 6857 6858 return self.expression( 6859 exp.Predict, 6860 this=this, 6861 expression=self._parse_table(), 6862 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6863 ) 6864 6865 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6866 args = self._parse_csv(self._parse_table) 6867 return exp.JoinHint(this=func_name.upper(), expressions=args) 6868 6869 def _parse_substring(self) -> exp.Substring: 6870 # Postgres supports the form: substring(string [from int] [for int]) 6871 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6872 6873 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6874 6875 if self._match(TokenType.FROM): 6876 args.append(self._parse_bitwise()) 6877 if self._match(TokenType.FOR): 6878 if len(args) == 1: 6879 args.append(exp.Literal.number(1)) 6880 args.append(self._parse_bitwise()) 6881 6882 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6883 6884 def _parse_trim(self) -> exp.Trim: 6885 # https://www.w3resource.com/sql/character-functions/trim.php 6886 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6887 6888 position = None 6889 collation = None 6890 expression = None 6891 6892 if self._match_texts(self.TRIM_TYPES): 6893 position = self._prev.text.upper() 6894 6895 this = self._parse_bitwise() 6896 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6897 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6898 expression = self._parse_bitwise() 6899 6900 if invert_order: 6901 this, expression = expression, this 6902 6903 if self._match(TokenType.COLLATE): 6904 collation = self._parse_bitwise() 6905 6906 return self.expression( 6907 exp.Trim, this=this, position=position, expression=expression, collation=collation 6908 ) 6909 6910 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6911 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6912 6913 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6914 return self._parse_window(self._parse_id_var(), alias=True) 6915 6916 def _parse_respect_or_ignore_nulls( 6917 self, this: t.Optional[exp.Expression] 6918 ) -> t.Optional[exp.Expression]: 6919 if self._match_text_seq("IGNORE", "NULLS"): 6920 return self.expression(exp.IgnoreNulls, this=this) 6921 if self._match_text_seq("RESPECT", "NULLS"): 6922 return self.expression(exp.RespectNulls, this=this) 6923 return this 6924 6925 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6926 if self._match(TokenType.HAVING): 6927 self._match_texts(("MAX", "MIN")) 6928 max = self._prev.text.upper() != "MIN" 6929 return self.expression( 6930 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6931 ) 6932 6933 return this 6934 6935 def _parse_window( 6936 self, this: t.Optional[exp.Expression], alias: bool = False 6937 ) -> t.Optional[exp.Expression]: 6938 func = this 6939 comments = func.comments if isinstance(func, exp.Expression) else None 6940 6941 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6942 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6943 if self._match_text_seq("WITHIN", "GROUP"): 6944 order = self._parse_wrapped(self._parse_order) 6945 this = self.expression(exp.WithinGroup, this=this, expression=order) 6946 6947 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6948 self._match(TokenType.WHERE) 6949 this = self.expression( 6950 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6951 ) 6952 self._match_r_paren() 6953 6954 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6955 # Some dialects choose to implement and some do not. 6956 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6957 6958 # There is some code above in _parse_lambda that handles 6959 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6960 6961 # The below changes handle 6962 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6963 6964 # Oracle allows both formats 6965 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6966 # and Snowflake chose to do the same for familiarity 6967 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6968 if isinstance(this, exp.AggFunc): 6969 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6970 6971 if ignore_respect and ignore_respect is not this: 6972 ignore_respect.replace(ignore_respect.this) 6973 this = self.expression(ignore_respect.__class__, this=this) 6974 6975 this = self._parse_respect_or_ignore_nulls(this) 6976 6977 # bigquery select from window x AS (partition by ...) 6978 if alias: 6979 over = None 6980 self._match(TokenType.ALIAS) 6981 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6982 return this 6983 else: 6984 over = self._prev.text.upper() 6985 6986 if comments and isinstance(func, exp.Expression): 6987 func.pop_comments() 6988 6989 if not self._match(TokenType.L_PAREN): 6990 return self.expression( 6991 exp.Window, 6992 comments=comments, 6993 this=this, 6994 alias=self._parse_id_var(False), 6995 over=over, 6996 ) 6997 6998 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6999 7000 first = self._match(TokenType.FIRST) 7001 if self._match_text_seq("LAST"): 7002 first = False 7003 7004 partition, order = self._parse_partition_and_order() 7005 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7006 7007 if kind: 7008 self._match(TokenType.BETWEEN) 7009 start = self._parse_window_spec() 7010 self._match(TokenType.AND) 7011 end = self._parse_window_spec() 7012 exclude = ( 7013 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7014 if self._match_text_seq("EXCLUDE") 7015 else None 7016 ) 7017 7018 spec = self.expression( 7019 exp.WindowSpec, 7020 kind=kind, 7021 start=start["value"], 7022 start_side=start["side"], 7023 end=end["value"], 7024 end_side=end["side"], 7025 exclude=exclude, 7026 ) 7027 else: 7028 spec = None 7029 7030 self._match_r_paren() 7031 7032 window = self.expression( 7033 exp.Window, 7034 comments=comments, 7035 this=this, 7036 partition_by=partition, 7037 order=order, 7038 spec=spec, 7039 alias=window_alias, 7040 over=over, 7041 first=first, 7042 ) 7043 7044 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7045 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7046 return self._parse_window(window, alias=alias) 7047 7048 return window 7049 7050 def _parse_partition_and_order( 7051 self, 7052 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7053 return self._parse_partition_by(), self._parse_order() 7054 7055 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7056 self._match(TokenType.BETWEEN) 7057 7058 return { 7059 "value": ( 7060 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7061 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7062 or self._parse_bitwise() 7063 ), 7064 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7065 } 7066 7067 def _parse_alias( 7068 self, this: t.Optional[exp.Expression], explicit: bool = False 7069 ) -> t.Optional[exp.Expression]: 7070 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7071 # so this section tries to parse the clause version and if it fails, it treats the token 7072 # as an identifier (alias) 7073 if self._can_parse_limit_or_offset(): 7074 return this 7075 7076 any_token = self._match(TokenType.ALIAS) 7077 comments = self._prev_comments or [] 7078 7079 if explicit and not any_token: 7080 return this 7081 7082 if self._match(TokenType.L_PAREN): 7083 aliases = self.expression( 7084 exp.Aliases, 7085 comments=comments, 7086 this=this, 7087 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7088 ) 7089 self._match_r_paren(aliases) 7090 return aliases 7091 7092 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7093 self.STRING_ALIASES and self._parse_string_as_identifier() 7094 ) 7095 7096 if alias: 7097 comments.extend(alias.pop_comments()) 7098 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7099 column = this.this 7100 7101 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7102 if not this.comments and column and column.comments: 7103 this.comments = column.pop_comments() 7104 7105 return this 7106 7107 def _parse_id_var( 7108 self, 7109 any_token: bool = True, 7110 tokens: t.Optional[t.Collection[TokenType]] = None, 7111 ) -> t.Optional[exp.Expression]: 7112 expression = self._parse_identifier() 7113 if not expression and ( 7114 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7115 ): 7116 quoted = self._prev.token_type == TokenType.STRING 7117 expression = self._identifier_expression(quoted=quoted) 7118 7119 return expression 7120 7121 def _parse_string(self) -> t.Optional[exp.Expression]: 7122 if self._match_set(self.STRING_PARSERS): 7123 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7124 return self._parse_placeholder() 7125 7126 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7127 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7128 if output: 7129 output.update_positions(self._prev) 7130 return output 7131 7132 def _parse_number(self) -> t.Optional[exp.Expression]: 7133 if self._match_set(self.NUMERIC_PARSERS): 7134 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7135 return self._parse_placeholder() 7136 7137 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7138 if self._match(TokenType.IDENTIFIER): 7139 return self._identifier_expression(quoted=True) 7140 return self._parse_placeholder() 7141 7142 def _parse_var( 7143 self, 7144 any_token: bool = False, 7145 tokens: t.Optional[t.Collection[TokenType]] = None, 7146 upper: bool = False, 7147 ) -> t.Optional[exp.Expression]: 7148 if ( 7149 (any_token and self._advance_any()) 7150 or self._match(TokenType.VAR) 7151 or (self._match_set(tokens) if tokens else False) 7152 ): 7153 return self.expression( 7154 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7155 ) 7156 return self._parse_placeholder() 7157 7158 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7159 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7160 self._advance() 7161 return self._prev 7162 return None 7163 7164 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7165 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7166 7167 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7168 return self._parse_primary() or self._parse_var(any_token=True) 7169 7170 def _parse_null(self) -> t.Optional[exp.Expression]: 7171 if self._match_set(self.NULL_TOKENS): 7172 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7173 return self._parse_placeholder() 7174 7175 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7176 if self._match(TokenType.TRUE): 7177 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7178 if self._match(TokenType.FALSE): 7179 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7180 return self._parse_placeholder() 7181 7182 def _parse_star(self) -> t.Optional[exp.Expression]: 7183 if self._match(TokenType.STAR): 7184 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7185 return self._parse_placeholder() 7186 7187 def _parse_parameter(self) -> exp.Parameter: 7188 this = self._parse_identifier() or self._parse_primary_or_var() 7189 return self.expression(exp.Parameter, this=this) 7190 7191 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7192 if self._match_set(self.PLACEHOLDER_PARSERS): 7193 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7194 if placeholder: 7195 return placeholder 7196 self._advance(-1) 7197 return None 7198 7199 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7200 if not self._match_texts(keywords): 7201 return None 7202 if self._match(TokenType.L_PAREN, advance=False): 7203 return self._parse_wrapped_csv(self._parse_expression) 7204 7205 expression = self._parse_expression() 7206 return [expression] if expression else None 7207 7208 def _parse_csv( 7209 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7210 ) -> t.List[exp.Expression]: 7211 parse_result = parse_method() 7212 items = [parse_result] if parse_result is not None else [] 7213 7214 while self._match(sep): 7215 self._add_comments(parse_result) 7216 parse_result = parse_method() 7217 if parse_result is not None: 7218 items.append(parse_result) 7219 7220 return items 7221 7222 def _parse_tokens( 7223 self, parse_method: t.Callable, expressions: t.Dict 7224 ) -> t.Optional[exp.Expression]: 7225 this = parse_method() 7226 7227 while self._match_set(expressions): 7228 this = self.expression( 7229 expressions[self._prev.token_type], 7230 this=this, 7231 comments=self._prev_comments, 7232 expression=parse_method(), 7233 ) 7234 7235 return this 7236 7237 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7238 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7239 7240 def _parse_wrapped_csv( 7241 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7242 ) -> t.List[exp.Expression]: 7243 return self._parse_wrapped( 7244 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7245 ) 7246 7247 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7248 wrapped = self._match(TokenType.L_PAREN) 7249 if not wrapped and not optional: 7250 self.raise_error("Expecting (") 7251 parse_result = parse_method() 7252 if wrapped: 7253 self._match_r_paren() 7254 return parse_result 7255 7256 def _parse_expressions(self) -> t.List[exp.Expression]: 7257 return self._parse_csv(self._parse_expression) 7258 7259 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7260 return self._parse_select() or self._parse_set_operations( 7261 self._parse_alias(self._parse_assignment(), explicit=True) 7262 if alias 7263 else self._parse_assignment() 7264 ) 7265 7266 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7267 return self._parse_query_modifiers( 7268 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7269 ) 7270 7271 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7272 this = None 7273 if self._match_texts(self.TRANSACTION_KIND): 7274 this = self._prev.text 7275 7276 self._match_texts(("TRANSACTION", "WORK")) 7277 7278 modes = [] 7279 while True: 7280 mode = [] 7281 while self._match(TokenType.VAR): 7282 mode.append(self._prev.text) 7283 7284 if mode: 7285 modes.append(" ".join(mode)) 7286 if not self._match(TokenType.COMMA): 7287 break 7288 7289 return self.expression(exp.Transaction, this=this, modes=modes) 7290 7291 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7292 chain = None 7293 savepoint = None 7294 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7295 7296 self._match_texts(("TRANSACTION", "WORK")) 7297 7298 if self._match_text_seq("TO"): 7299 self._match_text_seq("SAVEPOINT") 7300 savepoint = self._parse_id_var() 7301 7302 if self._match(TokenType.AND): 7303 chain = not self._match_text_seq("NO") 7304 self._match_text_seq("CHAIN") 7305 7306 if is_rollback: 7307 return self.expression(exp.Rollback, savepoint=savepoint) 7308 7309 return self.expression(exp.Commit, chain=chain) 7310 7311 def _parse_refresh(self) -> exp.Refresh: 7312 self._match(TokenType.TABLE) 7313 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7314 7315 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7316 if not self._prev.text.upper() == "ADD": 7317 return None 7318 7319 start = self._index 7320 self._match(TokenType.COLUMN) 7321 7322 exists_column = self._parse_exists(not_=True) 7323 expression = self._parse_field_def() 7324 7325 if not isinstance(expression, exp.ColumnDef): 7326 self._retreat(start) 7327 return None 7328 7329 expression.set("exists", exists_column) 7330 7331 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7332 if self._match_texts(("FIRST", "AFTER")): 7333 position = self._prev.text 7334 column_position = self.expression( 7335 exp.ColumnPosition, this=self._parse_column(), position=position 7336 ) 7337 expression.set("position", column_position) 7338 7339 return expression 7340 7341 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7342 drop = self._match(TokenType.DROP) and self._parse_drop() 7343 if drop and not isinstance(drop, exp.Command): 7344 drop.set("kind", drop.args.get("kind", "COLUMN")) 7345 return drop 7346 7347 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7348 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7349 return self.expression( 7350 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7351 ) 7352 7353 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7354 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7355 self._match_text_seq("ADD") 7356 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7357 return self.expression( 7358 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7359 ) 7360 7361 column_def = self._parse_add_column() 7362 if isinstance(column_def, exp.ColumnDef): 7363 return column_def 7364 7365 exists = self._parse_exists(not_=True) 7366 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7367 return self.expression( 7368 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7369 ) 7370 7371 return None 7372 7373 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7374 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7375 or self._match_text_seq("COLUMNS") 7376 ): 7377 self._match(TokenType.COLUMN) 7378 7379 schema = self._parse_schema() 7380 7381 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7382 7383 return self._parse_csv(_parse_add_alteration) 7384 7385 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7386 if self._match_texts(self.ALTER_ALTER_PARSERS): 7387 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7388 7389 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7390 # keyword after ALTER we default to parsing this statement 7391 self._match(TokenType.COLUMN) 7392 column = self._parse_field(any_token=True) 7393 7394 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7395 return self.expression(exp.AlterColumn, this=column, drop=True) 7396 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7397 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7398 if self._match(TokenType.COMMENT): 7399 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7400 if self._match_text_seq("DROP", "NOT", "NULL"): 7401 return self.expression( 7402 exp.AlterColumn, 7403 this=column, 7404 drop=True, 7405 allow_null=True, 7406 ) 7407 if self._match_text_seq("SET", "NOT", "NULL"): 7408 return self.expression( 7409 exp.AlterColumn, 7410 this=column, 7411 allow_null=False, 7412 ) 7413 7414 if self._match_text_seq("SET", "VISIBLE"): 7415 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7416 if self._match_text_seq("SET", "INVISIBLE"): 7417 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7418 7419 self._match_text_seq("SET", "DATA") 7420 self._match_text_seq("TYPE") 7421 return self.expression( 7422 exp.AlterColumn, 7423 this=column, 7424 dtype=self._parse_types(), 7425 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7426 using=self._match(TokenType.USING) and self._parse_assignment(), 7427 ) 7428 7429 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7430 if self._match_texts(("ALL", "EVEN", "AUTO")): 7431 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7432 7433 self._match_text_seq("KEY", "DISTKEY") 7434 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7435 7436 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7437 if compound: 7438 self._match_text_seq("SORTKEY") 7439 7440 if self._match(TokenType.L_PAREN, advance=False): 7441 return self.expression( 7442 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7443 ) 7444 7445 self._match_texts(("AUTO", "NONE")) 7446 return self.expression( 7447 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7448 ) 7449 7450 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7451 index = self._index - 1 7452 7453 partition_exists = self._parse_exists() 7454 if self._match(TokenType.PARTITION, advance=False): 7455 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7456 7457 self._retreat(index) 7458 return self._parse_csv(self._parse_drop_column) 7459 7460 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7461 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7462 exists = self._parse_exists() 7463 old_column = self._parse_column() 7464 to = self._match_text_seq("TO") 7465 new_column = self._parse_column() 7466 7467 if old_column is None or to is None or new_column is None: 7468 return None 7469 7470 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7471 7472 self._match_text_seq("TO") 7473 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7474 7475 def _parse_alter_table_set(self) -> exp.AlterSet: 7476 alter_set = self.expression(exp.AlterSet) 7477 7478 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7479 "TABLE", "PROPERTIES" 7480 ): 7481 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7482 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7483 alter_set.set("expressions", [self._parse_assignment()]) 7484 elif self._match_texts(("LOGGED", "UNLOGGED")): 7485 alter_set.set("option", exp.var(self._prev.text.upper())) 7486 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7487 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7488 elif self._match_text_seq("LOCATION"): 7489 alter_set.set("location", self._parse_field()) 7490 elif self._match_text_seq("ACCESS", "METHOD"): 7491 alter_set.set("access_method", self._parse_field()) 7492 elif self._match_text_seq("TABLESPACE"): 7493 alter_set.set("tablespace", self._parse_field()) 7494 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7495 alter_set.set("file_format", [self._parse_field()]) 7496 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7497 alter_set.set("file_format", self._parse_wrapped_options()) 7498 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7499 alter_set.set("copy_options", self._parse_wrapped_options()) 7500 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7501 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7502 else: 7503 if self._match_text_seq("SERDE"): 7504 alter_set.set("serde", self._parse_field()) 7505 7506 properties = self._parse_wrapped(self._parse_properties, optional=True) 7507 alter_set.set("expressions", [properties]) 7508 7509 return alter_set 7510 7511 def _parse_alter(self) -> exp.Alter | exp.Command: 7512 start = self._prev 7513 7514 alter_token = self._match_set(self.ALTERABLES) and self._prev 7515 if not alter_token: 7516 return self._parse_as_command(start) 7517 7518 exists = self._parse_exists() 7519 only = self._match_text_seq("ONLY") 7520 this = self._parse_table(schema=True) 7521 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7522 7523 if self._next: 7524 self._advance() 7525 7526 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7527 if parser: 7528 actions = ensure_list(parser(self)) 7529 not_valid = self._match_text_seq("NOT", "VALID") 7530 options = self._parse_csv(self._parse_property) 7531 7532 if not self._curr and actions: 7533 return self.expression( 7534 exp.Alter, 7535 this=this, 7536 kind=alter_token.text.upper(), 7537 exists=exists, 7538 actions=actions, 7539 only=only, 7540 options=options, 7541 cluster=cluster, 7542 not_valid=not_valid, 7543 ) 7544 7545 return self._parse_as_command(start) 7546 7547 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7548 start = self._prev 7549 # https://duckdb.org/docs/sql/statements/analyze 7550 if not self._curr: 7551 return self.expression(exp.Analyze) 7552 7553 options = [] 7554 while self._match_texts(self.ANALYZE_STYLES): 7555 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7556 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7557 else: 7558 options.append(self._prev.text.upper()) 7559 7560 this: t.Optional[exp.Expression] = None 7561 inner_expression: t.Optional[exp.Expression] = None 7562 7563 kind = self._curr and self._curr.text.upper() 7564 7565 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7566 this = self._parse_table_parts() 7567 elif self._match_text_seq("TABLES"): 7568 if self._match_set((TokenType.FROM, TokenType.IN)): 7569 kind = f"{kind} {self._prev.text.upper()}" 7570 this = self._parse_table(schema=True, is_db_reference=True) 7571 elif self._match_text_seq("DATABASE"): 7572 this = self._parse_table(schema=True, is_db_reference=True) 7573 elif self._match_text_seq("CLUSTER"): 7574 this = self._parse_table() 7575 # Try matching inner expr keywords before fallback to parse table. 7576 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7577 kind = None 7578 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7579 else: 7580 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7581 kind = None 7582 this = self._parse_table_parts() 7583 7584 partition = self._try_parse(self._parse_partition) 7585 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7586 return self._parse_as_command(start) 7587 7588 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7589 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7590 "WITH", "ASYNC", "MODE" 7591 ): 7592 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7593 else: 7594 mode = None 7595 7596 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7597 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7598 7599 properties = self._parse_properties() 7600 return self.expression( 7601 exp.Analyze, 7602 kind=kind, 7603 this=this, 7604 mode=mode, 7605 partition=partition, 7606 properties=properties, 7607 expression=inner_expression, 7608 options=options, 7609 ) 7610 7611 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7612 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7613 this = None 7614 kind = self._prev.text.upper() 7615 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7616 expressions = [] 7617 7618 if not self._match_text_seq("STATISTICS"): 7619 self.raise_error("Expecting token STATISTICS") 7620 7621 if self._match_text_seq("NOSCAN"): 7622 this = "NOSCAN" 7623 elif self._match(TokenType.FOR): 7624 if self._match_text_seq("ALL", "COLUMNS"): 7625 this = "FOR ALL COLUMNS" 7626 if self._match_texts("COLUMNS"): 7627 this = "FOR COLUMNS" 7628 expressions = self._parse_csv(self._parse_column_reference) 7629 elif self._match_text_seq("SAMPLE"): 7630 sample = self._parse_number() 7631 expressions = [ 7632 self.expression( 7633 exp.AnalyzeSample, 7634 sample=sample, 7635 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7636 ) 7637 ] 7638 7639 return self.expression( 7640 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7641 ) 7642 7643 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7644 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7645 kind = None 7646 this = None 7647 expression: t.Optional[exp.Expression] = None 7648 if self._match_text_seq("REF", "UPDATE"): 7649 kind = "REF" 7650 this = "UPDATE" 7651 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7652 this = "UPDATE SET DANGLING TO NULL" 7653 elif self._match_text_seq("STRUCTURE"): 7654 kind = "STRUCTURE" 7655 if self._match_text_seq("CASCADE", "FAST"): 7656 this = "CASCADE FAST" 7657 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7658 ("ONLINE", "OFFLINE") 7659 ): 7660 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7661 expression = self._parse_into() 7662 7663 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7664 7665 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7666 this = self._prev.text.upper() 7667 if self._match_text_seq("COLUMNS"): 7668 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7669 return None 7670 7671 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7672 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7673 if self._match_text_seq("STATISTICS"): 7674 return self.expression(exp.AnalyzeDelete, kind=kind) 7675 return None 7676 7677 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7678 if self._match_text_seq("CHAINED", "ROWS"): 7679 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7680 return None 7681 7682 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7683 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7684 this = self._prev.text.upper() 7685 expression: t.Optional[exp.Expression] = None 7686 expressions = [] 7687 update_options = None 7688 7689 if self._match_text_seq("HISTOGRAM", "ON"): 7690 expressions = self._parse_csv(self._parse_column_reference) 7691 with_expressions = [] 7692 while self._match(TokenType.WITH): 7693 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7694 if self._match_texts(("SYNC", "ASYNC")): 7695 if self._match_text_seq("MODE", advance=False): 7696 with_expressions.append(f"{self._prev.text.upper()} MODE") 7697 self._advance() 7698 else: 7699 buckets = self._parse_number() 7700 if self._match_text_seq("BUCKETS"): 7701 with_expressions.append(f"{buckets} BUCKETS") 7702 if with_expressions: 7703 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7704 7705 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7706 TokenType.UPDATE, advance=False 7707 ): 7708 update_options = self._prev.text.upper() 7709 self._advance() 7710 elif self._match_text_seq("USING", "DATA"): 7711 expression = self.expression(exp.UsingData, this=self._parse_string()) 7712 7713 return self.expression( 7714 exp.AnalyzeHistogram, 7715 this=this, 7716 expressions=expressions, 7717 expression=expression, 7718 update_options=update_options, 7719 ) 7720 7721 def _parse_merge(self) -> exp.Merge: 7722 self._match(TokenType.INTO) 7723 target = self._parse_table() 7724 7725 if target and self._match(TokenType.ALIAS, advance=False): 7726 target.set("alias", self._parse_table_alias()) 7727 7728 self._match(TokenType.USING) 7729 using = self._parse_table() 7730 7731 self._match(TokenType.ON) 7732 on = self._parse_assignment() 7733 7734 return self.expression( 7735 exp.Merge, 7736 this=target, 7737 using=using, 7738 on=on, 7739 whens=self._parse_when_matched(), 7740 returning=self._parse_returning(), 7741 ) 7742 7743 def _parse_when_matched(self) -> exp.Whens: 7744 whens = [] 7745 7746 while self._match(TokenType.WHEN): 7747 matched = not self._match(TokenType.NOT) 7748 self._match_text_seq("MATCHED") 7749 source = ( 7750 False 7751 if self._match_text_seq("BY", "TARGET") 7752 else self._match_text_seq("BY", "SOURCE") 7753 ) 7754 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7755 7756 self._match(TokenType.THEN) 7757 7758 if self._match(TokenType.INSERT): 7759 this = self._parse_star() 7760 if this: 7761 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7762 else: 7763 then = self.expression( 7764 exp.Insert, 7765 this=exp.var("ROW") 7766 if self._match_text_seq("ROW") 7767 else self._parse_value(values=False), 7768 expression=self._match_text_seq("VALUES") and self._parse_value(), 7769 ) 7770 elif self._match(TokenType.UPDATE): 7771 expressions = self._parse_star() 7772 if expressions: 7773 then = self.expression(exp.Update, expressions=expressions) 7774 else: 7775 then = self.expression( 7776 exp.Update, 7777 expressions=self._match(TokenType.SET) 7778 and self._parse_csv(self._parse_equality), 7779 ) 7780 elif self._match(TokenType.DELETE): 7781 then = self.expression(exp.Var, this=self._prev.text) 7782 else: 7783 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7784 7785 whens.append( 7786 self.expression( 7787 exp.When, 7788 matched=matched, 7789 source=source, 7790 condition=condition, 7791 then=then, 7792 ) 7793 ) 7794 return self.expression(exp.Whens, expressions=whens) 7795 7796 def _parse_show(self) -> t.Optional[exp.Expression]: 7797 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7798 if parser: 7799 return parser(self) 7800 return self._parse_as_command(self._prev) 7801 7802 def _parse_set_item_assignment( 7803 self, kind: t.Optional[str] = None 7804 ) -> t.Optional[exp.Expression]: 7805 index = self._index 7806 7807 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7808 return self._parse_set_transaction(global_=kind == "GLOBAL") 7809 7810 left = self._parse_primary() or self._parse_column() 7811 assignment_delimiter = self._match_texts(("=", "TO")) 7812 7813 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7814 self._retreat(index) 7815 return None 7816 7817 right = self._parse_statement() or self._parse_id_var() 7818 if isinstance(right, (exp.Column, exp.Identifier)): 7819 right = exp.var(right.name) 7820 7821 this = self.expression(exp.EQ, this=left, expression=right) 7822 return self.expression(exp.SetItem, this=this, kind=kind) 7823 7824 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7825 self._match_text_seq("TRANSACTION") 7826 characteristics = self._parse_csv( 7827 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7828 ) 7829 return self.expression( 7830 exp.SetItem, 7831 expressions=characteristics, 7832 kind="TRANSACTION", 7833 **{"global": global_}, # type: ignore 7834 ) 7835 7836 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7837 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7838 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7839 7840 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7841 index = self._index 7842 set_ = self.expression( 7843 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7844 ) 7845 7846 if self._curr: 7847 self._retreat(index) 7848 return self._parse_as_command(self._prev) 7849 7850 return set_ 7851 7852 def _parse_var_from_options( 7853 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7854 ) -> t.Optional[exp.Var]: 7855 start = self._curr 7856 if not start: 7857 return None 7858 7859 option = start.text.upper() 7860 continuations = options.get(option) 7861 7862 index = self._index 7863 self._advance() 7864 for keywords in continuations or []: 7865 if isinstance(keywords, str): 7866 keywords = (keywords,) 7867 7868 if self._match_text_seq(*keywords): 7869 option = f"{option} {' '.join(keywords)}" 7870 break 7871 else: 7872 if continuations or continuations is None: 7873 if raise_unmatched: 7874 self.raise_error(f"Unknown option {option}") 7875 7876 self._retreat(index) 7877 return None 7878 7879 return exp.var(option) 7880 7881 def _parse_as_command(self, start: Token) -> exp.Command: 7882 while self._curr: 7883 self._advance() 7884 text = self._find_sql(start, self._prev) 7885 size = len(start.text) 7886 self._warn_unsupported() 7887 return exp.Command(this=text[:size], expression=text[size:]) 7888 7889 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7890 settings = [] 7891 7892 self._match_l_paren() 7893 kind = self._parse_id_var() 7894 7895 if self._match(TokenType.L_PAREN): 7896 while True: 7897 key = self._parse_id_var() 7898 value = self._parse_primary() 7899 if not key and value is None: 7900 break 7901 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7902 self._match(TokenType.R_PAREN) 7903 7904 self._match_r_paren() 7905 7906 return self.expression( 7907 exp.DictProperty, 7908 this=this, 7909 kind=kind.this if kind else None, 7910 settings=settings, 7911 ) 7912 7913 def _parse_dict_range(self, this: str) -> exp.DictRange: 7914 self._match_l_paren() 7915 has_min = self._match_text_seq("MIN") 7916 if has_min: 7917 min = self._parse_var() or self._parse_primary() 7918 self._match_text_seq("MAX") 7919 max = self._parse_var() or self._parse_primary() 7920 else: 7921 max = self._parse_var() or self._parse_primary() 7922 min = exp.Literal.number(0) 7923 self._match_r_paren() 7924 return self.expression(exp.DictRange, this=this, min=min, max=max) 7925 7926 def _parse_comprehension( 7927 self, this: t.Optional[exp.Expression] 7928 ) -> t.Optional[exp.Comprehension]: 7929 index = self._index 7930 expression = self._parse_column() 7931 if not self._match(TokenType.IN): 7932 self._retreat(index - 1) 7933 return None 7934 iterator = self._parse_column() 7935 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7936 return self.expression( 7937 exp.Comprehension, 7938 this=this, 7939 expression=expression, 7940 iterator=iterator, 7941 condition=condition, 7942 ) 7943 7944 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7945 if self._match(TokenType.HEREDOC_STRING): 7946 return self.expression(exp.Heredoc, this=self._prev.text) 7947 7948 if not self._match_text_seq("$"): 7949 return None 7950 7951 tags = ["$"] 7952 tag_text = None 7953 7954 if self._is_connected(): 7955 self._advance() 7956 tags.append(self._prev.text.upper()) 7957 else: 7958 self.raise_error("No closing $ found") 7959 7960 if tags[-1] != "$": 7961 if self._is_connected() and self._match_text_seq("$"): 7962 tag_text = tags[-1] 7963 tags.append("$") 7964 else: 7965 self.raise_error("No closing $ found") 7966 7967 heredoc_start = self._curr 7968 7969 while self._curr: 7970 if self._match_text_seq(*tags, advance=False): 7971 this = self._find_sql(heredoc_start, self._prev) 7972 self._advance(len(tags)) 7973 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7974 7975 self._advance() 7976 7977 self.raise_error(f"No closing {''.join(tags)} found") 7978 return None 7979 7980 def _find_parser( 7981 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7982 ) -> t.Optional[t.Callable]: 7983 if not self._curr: 7984 return None 7985 7986 index = self._index 7987 this = [] 7988 while True: 7989 # The current token might be multiple words 7990 curr = self._curr.text.upper() 7991 key = curr.split(" ") 7992 this.append(curr) 7993 7994 self._advance() 7995 result, trie = in_trie(trie, key) 7996 if result == TrieResult.FAILED: 7997 break 7998 7999 if result == TrieResult.EXISTS: 8000 subparser = parsers[" ".join(this)] 8001 return subparser 8002 8003 self._retreat(index) 8004 return None 8005 8006 def _match(self, token_type, advance=True, expression=None): 8007 if not self._curr: 8008 return None 8009 8010 if self._curr.token_type == token_type: 8011 if advance: 8012 self._advance() 8013 self._add_comments(expression) 8014 return True 8015 8016 return None 8017 8018 def _match_set(self, types, advance=True): 8019 if not self._curr: 8020 return None 8021 8022 if self._curr.token_type in types: 8023 if advance: 8024 self._advance() 8025 return True 8026 8027 return None 8028 8029 def _match_pair(self, token_type_a, token_type_b, advance=True): 8030 if not self._curr or not self._next: 8031 return None 8032 8033 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8034 if advance: 8035 self._advance(2) 8036 return True 8037 8038 return None 8039 8040 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8041 if not self._match(TokenType.L_PAREN, expression=expression): 8042 self.raise_error("Expecting (") 8043 8044 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8045 if not self._match(TokenType.R_PAREN, expression=expression): 8046 self.raise_error("Expecting )") 8047 8048 def _match_texts(self, texts, advance=True): 8049 if ( 8050 self._curr 8051 and self._curr.token_type != TokenType.STRING 8052 and self._curr.text.upper() in texts 8053 ): 8054 if advance: 8055 self._advance() 8056 return True 8057 return None 8058 8059 def _match_text_seq(self, *texts, advance=True): 8060 index = self._index 8061 for text in texts: 8062 if ( 8063 self._curr 8064 and self._curr.token_type != TokenType.STRING 8065 and self._curr.text.upper() == text 8066 ): 8067 self._advance() 8068 else: 8069 self._retreat(index) 8070 return None 8071 8072 if not advance: 8073 self._retreat(index) 8074 8075 return True 8076 8077 def _replace_lambda( 8078 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8079 ) -> t.Optional[exp.Expression]: 8080 if not node: 8081 return node 8082 8083 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8084 8085 for column in node.find_all(exp.Column): 8086 typ = lambda_types.get(column.parts[0].name) 8087 if typ is not None: 8088 dot_or_id = column.to_dot() if column.table else column.this 8089 8090 if typ: 8091 dot_or_id = self.expression( 8092 exp.Cast, 8093 this=dot_or_id, 8094 to=typ, 8095 ) 8096 8097 parent = column.parent 8098 8099 while isinstance(parent, exp.Dot): 8100 if not isinstance(parent.parent, exp.Dot): 8101 parent.replace(dot_or_id) 8102 break 8103 parent = parent.parent 8104 else: 8105 if column is node: 8106 node = dot_or_id 8107 else: 8108 column.replace(dot_or_id) 8109 return node 8110 8111 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8112 start = self._prev 8113 8114 # Not to be confused with TRUNCATE(number, decimals) function call 8115 if self._match(TokenType.L_PAREN): 8116 self._retreat(self._index - 2) 8117 return self._parse_function() 8118 8119 # Clickhouse supports TRUNCATE DATABASE as well 8120 is_database = self._match(TokenType.DATABASE) 8121 8122 self._match(TokenType.TABLE) 8123 8124 exists = self._parse_exists(not_=False) 8125 8126 expressions = self._parse_csv( 8127 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8128 ) 8129 8130 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8131 8132 if self._match_text_seq("RESTART", "IDENTITY"): 8133 identity = "RESTART" 8134 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8135 identity = "CONTINUE" 8136 else: 8137 identity = None 8138 8139 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8140 option = self._prev.text 8141 else: 8142 option = None 8143 8144 partition = self._parse_partition() 8145 8146 # Fallback case 8147 if self._curr: 8148 return self._parse_as_command(start) 8149 8150 return self.expression( 8151 exp.TruncateTable, 8152 expressions=expressions, 8153 is_database=is_database, 8154 exists=exists, 8155 cluster=cluster, 8156 identity=identity, 8157 option=option, 8158 partition=partition, 8159 ) 8160 8161 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8162 this = self._parse_ordered(self._parse_opclass) 8163 8164 if not self._match(TokenType.WITH): 8165 return this 8166 8167 op = self._parse_var(any_token=True) 8168 8169 return self.expression(exp.WithOperator, this=this, op=op) 8170 8171 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8172 self._match(TokenType.EQ) 8173 self._match(TokenType.L_PAREN) 8174 8175 opts: t.List[t.Optional[exp.Expression]] = [] 8176 option: exp.Expression | None 8177 while self._curr and not self._match(TokenType.R_PAREN): 8178 if self._match_text_seq("FORMAT_NAME", "="): 8179 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8180 option = self._parse_format_name() 8181 else: 8182 option = self._parse_property() 8183 8184 if option is None: 8185 self.raise_error("Unable to parse option") 8186 break 8187 8188 opts.append(option) 8189 8190 return opts 8191 8192 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8193 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8194 8195 options = [] 8196 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8197 option = self._parse_var(any_token=True) 8198 prev = self._prev.text.upper() 8199 8200 # Different dialects might separate options and values by white space, "=" and "AS" 8201 self._match(TokenType.EQ) 8202 self._match(TokenType.ALIAS) 8203 8204 param = self.expression(exp.CopyParameter, this=option) 8205 8206 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8207 TokenType.L_PAREN, advance=False 8208 ): 8209 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8210 param.set("expressions", self._parse_wrapped_options()) 8211 elif prev == "FILE_FORMAT": 8212 # T-SQL's external file format case 8213 param.set("expression", self._parse_field()) 8214 else: 8215 param.set("expression", self._parse_unquoted_field()) 8216 8217 options.append(param) 8218 self._match(sep) 8219 8220 return options 8221 8222 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8223 expr = self.expression(exp.Credentials) 8224 8225 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8226 expr.set("storage", self._parse_field()) 8227 if self._match_text_seq("CREDENTIALS"): 8228 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8229 creds = ( 8230 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8231 ) 8232 expr.set("credentials", creds) 8233 if self._match_text_seq("ENCRYPTION"): 8234 expr.set("encryption", self._parse_wrapped_options()) 8235 if self._match_text_seq("IAM_ROLE"): 8236 expr.set("iam_role", self._parse_field()) 8237 if self._match_text_seq("REGION"): 8238 expr.set("region", self._parse_field()) 8239 8240 return expr 8241 8242 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8243 return self._parse_field() 8244 8245 def _parse_copy(self) -> exp.Copy | exp.Command: 8246 start = self._prev 8247 8248 self._match(TokenType.INTO) 8249 8250 this = ( 8251 self._parse_select(nested=True, parse_subquery_alias=False) 8252 if self._match(TokenType.L_PAREN, advance=False) 8253 else self._parse_table(schema=True) 8254 ) 8255 8256 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8257 8258 files = self._parse_csv(self._parse_file_location) 8259 credentials = self._parse_credentials() 8260 8261 self._match_text_seq("WITH") 8262 8263 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8264 8265 # Fallback case 8266 if self._curr: 8267 return self._parse_as_command(start) 8268 8269 return self.expression( 8270 exp.Copy, 8271 this=this, 8272 kind=kind, 8273 credentials=credentials, 8274 files=files, 8275 params=params, 8276 ) 8277 8278 def _parse_normalize(self) -> exp.Normalize: 8279 return self.expression( 8280 exp.Normalize, 8281 this=self._parse_bitwise(), 8282 form=self._match(TokenType.COMMA) and self._parse_var(), 8283 ) 8284 8285 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8286 args = self._parse_csv(lambda: self._parse_lambda()) 8287 8288 this = seq_get(args, 0) 8289 decimals = seq_get(args, 1) 8290 8291 return expr_type( 8292 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8293 ) 8294 8295 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8296 star_token = self._prev 8297 8298 if self._match_text_seq("COLUMNS", "(", advance=False): 8299 this = self._parse_function() 8300 if isinstance(this, exp.Columns): 8301 this.set("unpack", True) 8302 return this 8303 8304 return self.expression( 8305 exp.Star, 8306 **{ # type: ignore 8307 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8308 "replace": self._parse_star_op("REPLACE"), 8309 "rename": self._parse_star_op("RENAME"), 8310 }, 8311 ).update_positions(star_token) 8312 8313 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8314 privilege_parts = [] 8315 8316 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8317 # (end of privilege list) or L_PAREN (start of column list) are met 8318 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8319 privilege_parts.append(self._curr.text.upper()) 8320 self._advance() 8321 8322 this = exp.var(" ".join(privilege_parts)) 8323 expressions = ( 8324 self._parse_wrapped_csv(self._parse_column) 8325 if self._match(TokenType.L_PAREN, advance=False) 8326 else None 8327 ) 8328 8329 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8330 8331 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8332 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8333 principal = self._parse_id_var() 8334 8335 if not principal: 8336 return None 8337 8338 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8339 8340 def _parse_grant(self) -> exp.Grant | exp.Command: 8341 start = self._prev 8342 8343 privileges = self._parse_csv(self._parse_grant_privilege) 8344 8345 self._match(TokenType.ON) 8346 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8347 8348 # Attempt to parse the securable e.g. MySQL allows names 8349 # such as "foo.*", "*.*" which are not easily parseable yet 8350 securable = self._try_parse(self._parse_table_parts) 8351 8352 if not securable or not self._match_text_seq("TO"): 8353 return self._parse_as_command(start) 8354 8355 principals = self._parse_csv(self._parse_grant_principal) 8356 8357 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8358 8359 if self._curr: 8360 return self._parse_as_command(start) 8361 8362 return self.expression( 8363 exp.Grant, 8364 privileges=privileges, 8365 kind=kind, 8366 securable=securable, 8367 principals=principals, 8368 grant_option=grant_option, 8369 ) 8370 8371 def _parse_overlay(self) -> exp.Overlay: 8372 return self.expression( 8373 exp.Overlay, 8374 **{ # type: ignore 8375 "this": self._parse_bitwise(), 8376 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8377 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8378 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8379 }, 8380 ) 8381 8382 def _parse_format_name(self) -> exp.Property: 8383 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8384 # for FILE_FORMAT = <format_name> 8385 return self.expression( 8386 exp.Property, 8387 this=exp.var("FORMAT_NAME"), 8388 value=self._parse_string() or self._parse_table_parts(), 8389 ) 8390 8391 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8392 args: t.List[exp.Expression] = [] 8393 8394 if self._match(TokenType.DISTINCT): 8395 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8396 self._match(TokenType.COMMA) 8397 8398 args.extend(self._parse_csv(self._parse_assignment)) 8399 8400 return self.expression( 8401 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8402 ) 8403 8404 def _identifier_expression( 8405 self, token: t.Optional[Token] = None, **kwargs: t.Any 8406 ) -> exp.Identifier: 8407 token = token or self._prev 8408 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8409 expression.update_positions(token) 8410 return expression 8411 8412 def _build_pipe_cte( 8413 self, 8414 query: exp.Query, 8415 expressions: t.List[exp.Expression], 8416 alias_cte: t.Optional[exp.TableAlias] = None, 8417 ) -> exp.Select: 8418 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8419 if alias_cte: 8420 new_cte = alias_cte 8421 else: 8422 self._pipe_cte_counter += 1 8423 new_cte = f"__tmp{self._pipe_cte_counter}" 8424 8425 with_ = query.args.get("with") 8426 ctes = with_.pop() if with_ else None 8427 8428 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8429 if ctes: 8430 new_select.set("with", ctes) 8431 8432 return new_select.with_(new_cte, as_=query, copy=False) 8433 8434 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8435 select = self._parse_select(consume_pipe=False) 8436 if not select: 8437 return query 8438 8439 return self._build_pipe_cte( 8440 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8441 ) 8442 8443 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8444 limit = self._parse_limit() 8445 offset = self._parse_offset() 8446 if limit: 8447 curr_limit = query.args.get("limit", limit) 8448 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8449 query.limit(limit, copy=False) 8450 if offset: 8451 curr_offset = query.args.get("offset") 8452 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8453 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8454 8455 return query 8456 8457 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8458 this = self._parse_assignment() 8459 if self._match_text_seq("GROUP", "AND", advance=False): 8460 return this 8461 8462 this = self._parse_alias(this) 8463 8464 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8465 return self._parse_ordered(lambda: this) 8466 8467 return this 8468 8469 def _parse_pipe_syntax_aggregate_group_order_by( 8470 self, query: exp.Select, group_by_exists: bool = True 8471 ) -> exp.Select: 8472 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8473 aggregates_or_groups, orders = [], [] 8474 for element in expr: 8475 if isinstance(element, exp.Ordered): 8476 this = element.this 8477 if isinstance(this, exp.Alias): 8478 element.set("this", this.args["alias"]) 8479 orders.append(element) 8480 else: 8481 this = element 8482 aggregates_or_groups.append(this) 8483 8484 if group_by_exists: 8485 query.select(*aggregates_or_groups, copy=False).group_by( 8486 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8487 copy=False, 8488 ) 8489 else: 8490 query.select(*aggregates_or_groups, append=False, copy=False) 8491 8492 if orders: 8493 return query.order_by(*orders, append=False, copy=False) 8494 8495 return query 8496 8497 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8498 self._match_text_seq("AGGREGATE") 8499 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8500 8501 if self._match(TokenType.GROUP_BY) or ( 8502 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8503 ): 8504 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8505 8506 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8507 8508 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8509 first_setop = self.parse_set_operation(this=query) 8510 if not first_setop: 8511 return None 8512 8513 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8514 expr = self._parse_paren() 8515 return expr.assert_is(exp.Subquery).unnest() if expr else None 8516 8517 first_setop.this.pop() 8518 8519 setops = [ 8520 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8521 *self._parse_csv(_parse_and_unwrap_query), 8522 ] 8523 8524 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8525 with_ = query.args.get("with") 8526 ctes = with_.pop() if with_ else None 8527 8528 if isinstance(first_setop, exp.Union): 8529 query = query.union(*setops, copy=False, **first_setop.args) 8530 elif isinstance(first_setop, exp.Except): 8531 query = query.except_(*setops, copy=False, **first_setop.args) 8532 else: 8533 query = query.intersect(*setops, copy=False, **first_setop.args) 8534 8535 query.set("with", ctes) 8536 8537 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8538 8539 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8540 join = self._parse_join() 8541 if not join: 8542 return None 8543 8544 if isinstance(query, exp.Select): 8545 return query.join(join, copy=False) 8546 8547 return query 8548 8549 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8550 pivots = self._parse_pivots() 8551 if not pivots: 8552 return query 8553 8554 from_ = query.args.get("from") 8555 if from_: 8556 from_.this.set("pivots", pivots) 8557 8558 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8559 8560 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8561 self._match_text_seq("EXTEND") 8562 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8563 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8564 8565 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8566 sample = self._parse_table_sample() 8567 8568 with_ = query.args.get("with") 8569 if with_: 8570 with_.expressions[-1].this.set("sample", sample) 8571 else: 8572 query.set("sample", sample) 8573 8574 return query 8575 8576 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8577 if isinstance(query, exp.Subquery): 8578 query = exp.select("*").from_(query, copy=False) 8579 8580 if not query.args.get("from"): 8581 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8582 8583 while self._match(TokenType.PIPE_GT): 8584 start = self._curr 8585 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8586 if not parser: 8587 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8588 # keywords, making it tricky to disambiguate them without lookahead. The approach 8589 # here is to try and parse a set operation and if that fails, then try to parse a 8590 # join operator. If that fails as well, then the operator is not supported. 8591 parsed_query = self._parse_pipe_syntax_set_operator(query) 8592 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8593 if not parsed_query: 8594 self._retreat(start) 8595 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8596 break 8597 query = parsed_query 8598 else: 8599 query = parser(self, query) 8600 8601 return query
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEQUENCE, 456 TokenType.SINK, 457 TokenType.SOURCE, 458 TokenType.STAGE, 459 TokenType.STORAGE_INTEGRATION, 460 TokenType.STREAMLIT, 461 TokenType.TABLE, 462 TokenType.TAG, 463 TokenType.VIEW, 464 TokenType.WAREHOUSE, 465 } 466 467 CREATABLES = { 468 TokenType.COLUMN, 469 TokenType.CONSTRAINT, 470 TokenType.FOREIGN_KEY, 471 TokenType.FUNCTION, 472 TokenType.INDEX, 473 TokenType.PROCEDURE, 474 *DB_CREATABLES, 475 } 476 477 ALTERABLES = { 478 TokenType.INDEX, 479 TokenType.TABLE, 480 TokenType.VIEW, 481 } 482 483 # Tokens that can represent identifiers 484 ID_VAR_TOKENS = { 485 TokenType.ALL, 486 TokenType.ATTACH, 487 TokenType.VAR, 488 TokenType.ANTI, 489 TokenType.APPLY, 490 TokenType.ASC, 491 TokenType.ASOF, 492 TokenType.AUTO_INCREMENT, 493 TokenType.BEGIN, 494 TokenType.BPCHAR, 495 TokenType.CACHE, 496 TokenType.CASE, 497 TokenType.COLLATE, 498 TokenType.COMMAND, 499 TokenType.COMMENT, 500 TokenType.COMMIT, 501 TokenType.CONSTRAINT, 502 TokenType.COPY, 503 TokenType.CUBE, 504 TokenType.CURRENT_SCHEMA, 505 TokenType.DEFAULT, 506 TokenType.DELETE, 507 TokenType.DESC, 508 TokenType.DESCRIBE, 509 TokenType.DETACH, 510 TokenType.DICTIONARY, 511 TokenType.DIV, 512 TokenType.END, 513 TokenType.EXECUTE, 514 TokenType.EXPORT, 515 TokenType.ESCAPE, 516 TokenType.FALSE, 517 TokenType.FIRST, 518 TokenType.FILTER, 519 TokenType.FINAL, 520 TokenType.FORMAT, 521 TokenType.FULL, 522 TokenType.GET, 523 TokenType.IDENTIFIER, 524 TokenType.IS, 525 TokenType.ISNULL, 526 TokenType.INTERVAL, 527 TokenType.KEEP, 528 TokenType.KILL, 529 TokenType.LEFT, 530 TokenType.LIMIT, 531 TokenType.LOAD, 532 TokenType.MERGE, 533 TokenType.NATURAL, 534 TokenType.NEXT, 535 TokenType.OFFSET, 536 TokenType.OPERATOR, 537 TokenType.ORDINALITY, 538 TokenType.OVERLAPS, 539 TokenType.OVERWRITE, 540 TokenType.PARTITION, 541 TokenType.PERCENT, 542 TokenType.PIVOT, 543 TokenType.PRAGMA, 544 TokenType.PUT, 545 TokenType.RANGE, 546 TokenType.RECURSIVE, 547 TokenType.REFERENCES, 548 TokenType.REFRESH, 549 TokenType.RENAME, 550 TokenType.REPLACE, 551 TokenType.RIGHT, 552 TokenType.ROLLUP, 553 TokenType.ROW, 554 TokenType.ROWS, 555 TokenType.SEMI, 556 TokenType.SET, 557 TokenType.SETTINGS, 558 TokenType.SHOW, 559 TokenType.TEMPORARY, 560 TokenType.TOP, 561 TokenType.TRUE, 562 TokenType.TRUNCATE, 563 TokenType.UNIQUE, 564 TokenType.UNNEST, 565 TokenType.UNPIVOT, 566 TokenType.UPDATE, 567 TokenType.USE, 568 TokenType.VOLATILE, 569 TokenType.WINDOW, 570 *CREATABLES, 571 *SUBQUERY_PREDICATES, 572 *TYPE_TOKENS, 573 *NO_PAREN_FUNCTIONS, 574 } 575 ID_VAR_TOKENS.remove(TokenType.UNION) 576 577 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 578 TokenType.ANTI, 579 TokenType.APPLY, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.expression( 763 exp.Cast if self.STRICT_CAST else exp.TryCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 EXPRESSION_PARSERS = { 797 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 798 exp.Column: lambda self: self._parse_column(), 799 exp.Condition: lambda self: self._parse_assignment(), 800 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 801 exp.Expression: lambda self: self._parse_expression(), 802 exp.From: lambda self: self._parse_from(joins=True), 803 exp.Group: lambda self: self._parse_group(), 804 exp.Having: lambda self: self._parse_having(), 805 exp.Hint: lambda self: self._parse_hint_body(), 806 exp.Identifier: lambda self: self._parse_id_var(), 807 exp.Join: lambda self: self._parse_join(), 808 exp.Lambda: lambda self: self._parse_lambda(), 809 exp.Lateral: lambda self: self._parse_lateral(), 810 exp.Limit: lambda self: self._parse_limit(), 811 exp.Offset: lambda self: self._parse_offset(), 812 exp.Order: lambda self: self._parse_order(), 813 exp.Ordered: lambda self: self._parse_ordered(), 814 exp.Properties: lambda self: self._parse_properties(), 815 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 816 exp.Qualify: lambda self: self._parse_qualify(), 817 exp.Returning: lambda self: self._parse_returning(), 818 exp.Select: lambda self: self._parse_select(), 819 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 820 exp.Table: lambda self: self._parse_table_parts(), 821 exp.TableAlias: lambda self: self._parse_table_alias(), 822 exp.Tuple: lambda self: self._parse_value(values=False), 823 exp.Whens: lambda self: self._parse_when_matched(), 824 exp.Where: lambda self: self._parse_where(), 825 exp.Window: lambda self: self._parse_named_window(), 826 exp.With: lambda self: self._parse_with(), 827 "JOIN_TYPE": lambda self: self._parse_join_parts(), 828 } 829 830 STATEMENT_PARSERS = { 831 TokenType.ALTER: lambda self: self._parse_alter(), 832 TokenType.ANALYZE: lambda self: self._parse_analyze(), 833 TokenType.BEGIN: lambda self: self._parse_transaction(), 834 TokenType.CACHE: lambda self: self._parse_cache(), 835 TokenType.COMMENT: lambda self: self._parse_comment(), 836 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 837 TokenType.COPY: lambda self: self._parse_copy(), 838 TokenType.CREATE: lambda self: self._parse_create(), 839 TokenType.DELETE: lambda self: self._parse_delete(), 840 TokenType.DESC: lambda self: self._parse_describe(), 841 TokenType.DESCRIBE: lambda self: self._parse_describe(), 842 TokenType.DROP: lambda self: self._parse_drop(), 843 TokenType.GRANT: lambda self: self._parse_grant(), 844 TokenType.INSERT: lambda self: self._parse_insert(), 845 TokenType.KILL: lambda self: self._parse_kill(), 846 TokenType.LOAD: lambda self: self._parse_load(), 847 TokenType.MERGE: lambda self: self._parse_merge(), 848 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 849 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 850 TokenType.REFRESH: lambda self: self._parse_refresh(), 851 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 852 TokenType.SET: lambda self: self._parse_set(), 853 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 854 TokenType.UNCACHE: lambda self: self._parse_uncache(), 855 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 856 TokenType.UPDATE: lambda self: self._parse_update(), 857 TokenType.USE: lambda self: self._parse_use(), 858 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 859 } 860 861 UNARY_PARSERS = { 862 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 863 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 864 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 865 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 866 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 867 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 868 } 869 870 STRING_PARSERS = { 871 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 872 exp.RawString, this=token.text 873 ), 874 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 875 exp.National, this=token.text 876 ), 877 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 878 TokenType.STRING: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=True 880 ), 881 TokenType.UNICODE_STRING: lambda self, token: self.expression( 882 exp.UnicodeString, 883 this=token.text, 884 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 885 ), 886 } 887 888 NUMERIC_PARSERS = { 889 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 890 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 891 TokenType.HEX_STRING: lambda self, token: self.expression( 892 exp.HexString, 893 this=token.text, 894 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 895 ), 896 TokenType.NUMBER: lambda self, token: self.expression( 897 exp.Literal, this=token.text, is_string=False 898 ), 899 } 900 901 PRIMARY_PARSERS = { 902 **STRING_PARSERS, 903 **NUMERIC_PARSERS, 904 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 905 TokenType.NULL: lambda self, _: self.expression(exp.Null), 906 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 907 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 908 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 909 TokenType.STAR: lambda self, _: self._parse_star_ops(), 910 } 911 912 PLACEHOLDER_PARSERS = { 913 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 914 TokenType.PARAMETER: lambda self: self._parse_parameter(), 915 TokenType.COLON: lambda self: ( 916 self.expression(exp.Placeholder, this=self._prev.text) 917 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 918 else None 919 ), 920 } 921 922 RANGE_PARSERS = { 923 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 924 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 925 TokenType.GLOB: binary_range_parser(exp.Glob), 926 TokenType.ILIKE: binary_range_parser(exp.ILike), 927 TokenType.IN: lambda self, this: self._parse_in(this), 928 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 929 TokenType.IS: lambda self, this: self._parse_is(this), 930 TokenType.LIKE: binary_range_parser(exp.Like), 931 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 932 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 933 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 934 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 935 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 936 } 937 938 PIPE_SYNTAX_TRANSFORM_PARSERS = { 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 "AS": lambda self, query: self._build_pipe_cte( 941 query, [exp.Star()], self._parse_table_alias() 942 ), 943 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 944 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 945 "ORDER BY": lambda self, query: query.order_by( 946 self._parse_order(), append=False, copy=False 947 ), 948 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 950 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 951 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 953 } 954 955 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 956 "ALLOWED_VALUES": lambda self: self.expression( 957 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 958 ), 959 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 960 "AUTO": lambda self: self._parse_auto_property(), 961 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 962 "BACKUP": lambda self: self.expression( 963 exp.BackupProperty, this=self._parse_var(any_token=True) 964 ), 965 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 966 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 968 "CHECKSUM": lambda self: self._parse_checksum(), 969 "CLUSTER BY": lambda self: self._parse_cluster(), 970 "CLUSTERED": lambda self: self._parse_clustered_by(), 971 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 972 exp.CollateProperty, **kwargs 973 ), 974 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 975 "CONTAINS": lambda self: self._parse_contains_property(), 976 "COPY": lambda self: self._parse_copy_property(), 977 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 978 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 979 "DEFINER": lambda self: self._parse_definer(), 980 "DETERMINISTIC": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 984 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 985 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 986 "DISTKEY": lambda self: self._parse_distkey(), 987 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 988 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 989 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 990 "ENVIRONMENT": lambda self: self.expression( 991 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 992 ), 993 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 994 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 995 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 996 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "FREESPACE": lambda self: self._parse_freespace(), 998 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 999 "HEAP": lambda self: self.expression(exp.HeapProperty), 1000 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1001 "IMMUTABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1003 ), 1004 "INHERITS": lambda self: self.expression( 1005 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1006 ), 1007 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1008 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1009 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1010 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1011 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1014 "LOCK": lambda self: self._parse_locking(), 1015 "LOCKING": lambda self: self._parse_locking(), 1016 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1017 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1018 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1019 "MODIFIES": lambda self: self._parse_modifies_property(), 1020 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1021 "NO": lambda self: self._parse_no_property(), 1022 "ON": lambda self: self._parse_on_property(), 1023 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1024 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1025 "PARTITION": lambda self: self._parse_partitioned_of(), 1026 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1028 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1029 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1030 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1031 "READS": lambda self: self._parse_reads_property(), 1032 "REMOTE": lambda self: self._parse_remote_with_connection(), 1033 "RETURNS": lambda self: self._parse_returns(), 1034 "STRICT": lambda self: self.expression(exp.StrictProperty), 1035 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1036 "ROW": lambda self: self._parse_row(), 1037 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1038 "SAMPLE": lambda self: self.expression( 1039 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1040 ), 1041 "SECURE": lambda self: self.expression(exp.SecureProperty), 1042 "SECURITY": lambda self: self._parse_security(), 1043 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1044 "SETTINGS": lambda self: self._parse_settings_property(), 1045 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1046 "SORTKEY": lambda self: self._parse_sortkey(), 1047 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1048 "STABLE": lambda self: self.expression( 1049 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1050 ), 1051 "STORED": lambda self: self._parse_stored(), 1052 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1053 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1054 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1055 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1056 "TO": lambda self: self._parse_to_table(), 1057 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1058 "TRANSFORM": lambda self: self.expression( 1059 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1060 ), 1061 "TTL": lambda self: self._parse_ttl(), 1062 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1063 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1064 "VOLATILE": lambda self: self._parse_volatile_property(), 1065 "WITH": lambda self: self._parse_with_property(), 1066 } 1067 1068 CONSTRAINT_PARSERS = { 1069 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1070 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1071 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1072 "CHARACTER SET": lambda self: self.expression( 1073 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "CHECK": lambda self: self.expression( 1076 exp.CheckColumnConstraint, 1077 this=self._parse_wrapped(self._parse_assignment), 1078 enforced=self._match_text_seq("ENFORCED"), 1079 ), 1080 "COLLATE": lambda self: self.expression( 1081 exp.CollateColumnConstraint, 1082 this=self._parse_identifier() or self._parse_column(), 1083 ), 1084 "COMMENT": lambda self: self.expression( 1085 exp.CommentColumnConstraint, this=self._parse_string() 1086 ), 1087 "COMPRESS": lambda self: self._parse_compress(), 1088 "CLUSTERED": lambda self: self.expression( 1089 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "NONCLUSTERED": lambda self: self.expression( 1092 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "DEFAULT": lambda self: self.expression( 1095 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1096 ), 1097 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1098 "EPHEMERAL": lambda self: self.expression( 1099 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "EXCLUDE": lambda self: self.expression( 1102 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1103 ), 1104 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1105 "FORMAT": lambda self: self.expression( 1106 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1107 ), 1108 "GENERATED": lambda self: self._parse_generated_as_identity(), 1109 "IDENTITY": lambda self: self._parse_auto_increment(), 1110 "INLINE": lambda self: self._parse_inline(), 1111 "LIKE": lambda self: self._parse_create_like(), 1112 "NOT": lambda self: self._parse_not_constraint(), 1113 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1114 "ON": lambda self: ( 1115 self._match(TokenType.UPDATE) 1116 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1117 ) 1118 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1119 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1120 "PERIOD": lambda self: self._parse_period_for_system_time(), 1121 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1122 "REFERENCES": lambda self: self._parse_references(match=False), 1123 "TITLE": lambda self: self.expression( 1124 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1125 ), 1126 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1127 "UNIQUE": lambda self: self._parse_unique(), 1128 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1129 "WATERMARK": lambda self: self.expression( 1130 exp.WatermarkColumnConstraint, 1131 this=self._match(TokenType.FOR) and self._parse_column(), 1132 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1133 ), 1134 "WITH": lambda self: self.expression( 1135 exp.Properties, expressions=self._parse_wrapped_properties() 1136 ), 1137 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1139 } 1140 1141 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1142 klass = ( 1143 exp.PartitionedByBucket 1144 if self._prev.text.upper() == "BUCKET" 1145 else exp.PartitionByTruncate 1146 ) 1147 1148 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1149 this, expression = seq_get(args, 0), seq_get(args, 1) 1150 1151 if isinstance(this, exp.Literal): 1152 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1153 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1154 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1155 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1156 # 1157 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1158 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1159 this, expression = expression, this 1160 1161 return self.expression(klass, this=this, expression=expression) 1162 1163 ALTER_PARSERS = { 1164 "ADD": lambda self: self._parse_alter_table_add(), 1165 "AS": lambda self: self._parse_select(), 1166 "ALTER": lambda self: self._parse_alter_table_alter(), 1167 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1168 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1169 "DROP": lambda self: self._parse_alter_table_drop(), 1170 "RENAME": lambda self: self._parse_alter_table_rename(), 1171 "SET": lambda self: self._parse_alter_table_set(), 1172 "SWAP": lambda self: self.expression( 1173 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1174 ), 1175 } 1176 1177 ALTER_ALTER_PARSERS = { 1178 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1179 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1180 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1181 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1182 } 1183 1184 SCHEMA_UNNAMED_CONSTRAINTS = { 1185 "CHECK", 1186 "EXCLUDE", 1187 "FOREIGN KEY", 1188 "LIKE", 1189 "PERIOD", 1190 "PRIMARY KEY", 1191 "UNIQUE", 1192 "WATERMARK", 1193 "BUCKET", 1194 "TRUNCATE", 1195 } 1196 1197 NO_PAREN_FUNCTION_PARSERS = { 1198 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1199 "CASE": lambda self: self._parse_case(), 1200 "CONNECT_BY_ROOT": lambda self: self.expression( 1201 exp.ConnectByRoot, this=self._parse_column() 1202 ), 1203 "IF": lambda self: self._parse_if(), 1204 } 1205 1206 INVALID_FUNC_NAME_TOKENS = { 1207 TokenType.IDENTIFIER, 1208 TokenType.STRING, 1209 } 1210 1211 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1212 1213 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1214 1215 FUNCTION_PARSERS = { 1216 **{ 1217 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1218 }, 1219 **{ 1220 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1221 }, 1222 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1223 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1224 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1225 "DECODE": lambda self: self._parse_decode(), 1226 "EXTRACT": lambda self: self._parse_extract(), 1227 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1228 "GAP_FILL": lambda self: self._parse_gap_fill(), 1229 "JSON_OBJECT": lambda self: self._parse_json_object(), 1230 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1231 "JSON_TABLE": lambda self: self._parse_json_table(), 1232 "MATCH": lambda self: self._parse_match_against(), 1233 "NORMALIZE": lambda self: self._parse_normalize(), 1234 "OPENJSON": lambda self: self._parse_open_json(), 1235 "OVERLAY": lambda self: self._parse_overlay(), 1236 "POSITION": lambda self: self._parse_position(), 1237 "PREDICT": lambda self: self._parse_predict(), 1238 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1239 "STRING_AGG": lambda self: self._parse_string_agg(), 1240 "SUBSTRING": lambda self: self._parse_substring(), 1241 "TRIM": lambda self: self._parse_trim(), 1242 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1243 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1244 "XMLELEMENT": lambda self: self.expression( 1245 exp.XMLElement, 1246 this=self._match_text_seq("NAME") and self._parse_id_var(), 1247 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1248 ), 1249 "XMLTABLE": lambda self: self._parse_xml_table(), 1250 } 1251 1252 QUERY_MODIFIER_PARSERS = { 1253 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1254 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1255 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1256 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1257 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1258 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1259 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1260 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1261 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1262 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1263 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1264 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1265 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1266 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1268 TokenType.CLUSTER_BY: lambda self: ( 1269 "cluster", 1270 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1271 ), 1272 TokenType.DISTRIBUTE_BY: lambda self: ( 1273 "distribute", 1274 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1275 ), 1276 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1277 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1278 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1279 } 1280 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1281 1282 SET_PARSERS = { 1283 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1284 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1285 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1286 "TRANSACTION": lambda self: self._parse_set_transaction(), 1287 } 1288 1289 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1290 1291 TYPE_LITERAL_PARSERS = { 1292 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1293 } 1294 1295 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1296 1297 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1298 1299 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1300 1301 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1302 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1303 "ISOLATION": ( 1304 ("LEVEL", "REPEATABLE", "READ"), 1305 ("LEVEL", "READ", "COMMITTED"), 1306 ("LEVEL", "READ", "UNCOMITTED"), 1307 ("LEVEL", "SERIALIZABLE"), 1308 ), 1309 "READ": ("WRITE", "ONLY"), 1310 } 1311 1312 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1313 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1314 ) 1315 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1316 1317 CREATE_SEQUENCE: OPTIONS_TYPE = { 1318 "SCALE": ("EXTEND", "NOEXTEND"), 1319 "SHARD": ("EXTEND", "NOEXTEND"), 1320 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1321 **dict.fromkeys( 1322 ( 1323 "SESSION", 1324 "GLOBAL", 1325 "KEEP", 1326 "NOKEEP", 1327 "ORDER", 1328 "NOORDER", 1329 "NOCACHE", 1330 "CYCLE", 1331 "NOCYCLE", 1332 "NOMINVALUE", 1333 "NOMAXVALUE", 1334 "NOSCALE", 1335 "NOSHARD", 1336 ), 1337 tuple(), 1338 ), 1339 } 1340 1341 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1342 1343 USABLES: OPTIONS_TYPE = dict.fromkeys( 1344 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1345 ) 1346 1347 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1348 1349 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1350 "TYPE": ("EVOLUTION",), 1351 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1352 } 1353 1354 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1355 1356 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1357 1358 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1359 "NOT": ("ENFORCED",), 1360 "MATCH": ( 1361 "FULL", 1362 "PARTIAL", 1363 "SIMPLE", 1364 ), 1365 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1366 "USING": ( 1367 "BTREE", 1368 "HASH", 1369 ), 1370 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1371 } 1372 1373 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1374 "NO": ("OTHERS",), 1375 "CURRENT": ("ROW",), 1376 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1377 } 1378 1379 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1380 1381 CLONE_KEYWORDS = {"CLONE", "COPY"} 1382 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1383 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1384 1385 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1386 1387 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1388 1389 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1390 1391 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1392 1393 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1394 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1395 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1396 1397 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1398 1399 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1400 1401 ADD_CONSTRAINT_TOKENS = { 1402 TokenType.CONSTRAINT, 1403 TokenType.FOREIGN_KEY, 1404 TokenType.INDEX, 1405 TokenType.KEY, 1406 TokenType.PRIMARY_KEY, 1407 TokenType.UNIQUE, 1408 } 1409 1410 DISTINCT_TOKENS = {TokenType.DISTINCT} 1411 1412 NULL_TOKENS = {TokenType.NULL} 1413 1414 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1415 1416 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1417 1418 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1419 1420 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1421 1422 ODBC_DATETIME_LITERALS = { 1423 "d": exp.Date, 1424 "t": exp.Time, 1425 "ts": exp.Timestamp, 1426 } 1427 1428 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1429 1430 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1431 1432 # The style options for the DESCRIBE statement 1433 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1434 1435 # The style options for the ANALYZE statement 1436 ANALYZE_STYLES = { 1437 "BUFFER_USAGE_LIMIT", 1438 "FULL", 1439 "LOCAL", 1440 "NO_WRITE_TO_BINLOG", 1441 "SAMPLE", 1442 "SKIP_LOCKED", 1443 "VERBOSE", 1444 } 1445 1446 ANALYZE_EXPRESSION_PARSERS = { 1447 "ALL": lambda self: self._parse_analyze_columns(), 1448 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1449 "DELETE": lambda self: self._parse_analyze_delete(), 1450 "DROP": lambda self: self._parse_analyze_histogram(), 1451 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1452 "LIST": lambda self: self._parse_analyze_list(), 1453 "PREDICATE": lambda self: self._parse_analyze_columns(), 1454 "UPDATE": lambda self: self._parse_analyze_histogram(), 1455 "VALIDATE": lambda self: self._parse_analyze_validate(), 1456 } 1457 1458 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1459 1460 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1461 1462 OPERATION_MODIFIERS: t.Set[str] = set() 1463 1464 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1465 1466 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1467 1468 STRICT_CAST = True 1469 1470 PREFIXED_PIVOT_COLUMNS = False 1471 IDENTIFY_PIVOT_STRINGS = False 1472 1473 LOG_DEFAULTS_TO_LN = False 1474 1475 # Whether the table sample clause expects CSV syntax 1476 TABLESAMPLE_CSV = False 1477 1478 # The default method used for table sampling 1479 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1480 1481 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1482 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1483 1484 # Whether the TRIM function expects the characters to trim as its first argument 1485 TRIM_PATTERN_FIRST = False 1486 1487 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1488 STRING_ALIASES = False 1489 1490 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1491 MODIFIERS_ATTACHED_TO_SET_OP = True 1492 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1493 1494 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1495 NO_PAREN_IF_COMMANDS = True 1496 1497 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1498 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1499 1500 # Whether the `:` operator is used to extract a value from a VARIANT column 1501 COLON_IS_VARIANT_EXTRACT = False 1502 1503 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1504 # If this is True and '(' is not found, the keyword will be treated as an identifier 1505 VALUES_FOLLOWED_BY_PAREN = True 1506 1507 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1508 SUPPORTS_IMPLICIT_UNNEST = False 1509 1510 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1511 INTERVAL_SPANS = True 1512 1513 # Whether a PARTITION clause can follow a table reference 1514 SUPPORTS_PARTITION_SELECTION = False 1515 1516 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1517 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1518 1519 # Whether the 'AS' keyword is optional in the CTE definition syntax 1520 OPTIONAL_ALIAS_TOKEN_CTE = True 1521 1522 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1523 ALTER_RENAME_REQUIRES_COLUMN = True 1524 1525 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1526 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1527 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1528 # as BigQuery, where all joins have the same precedence. 1529 JOINS_HAVE_EQUAL_PRECEDENCE = False 1530 1531 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1532 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1533 1534 # Whether map literals support arbitrary expressions as keys. 1535 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1536 # When False, keys are typically restricted to identifiers. 1537 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1538 1539 __slots__ = ( 1540 "error_level", 1541 "error_message_context", 1542 "max_errors", 1543 "dialect", 1544 "sql", 1545 "errors", 1546 "_tokens", 1547 "_index", 1548 "_curr", 1549 "_next", 1550 "_prev", 1551 "_prev_comments", 1552 "_pipe_cte_counter", 1553 ) 1554 1555 # Autofilled 1556 SHOW_TRIE: t.Dict = {} 1557 SET_TRIE: t.Dict = {} 1558 1559 def __init__( 1560 self, 1561 error_level: t.Optional[ErrorLevel] = None, 1562 error_message_context: int = 100, 1563 max_errors: int = 3, 1564 dialect: DialectType = None, 1565 ): 1566 from sqlglot.dialects import Dialect 1567 1568 self.error_level = error_level or ErrorLevel.IMMEDIATE 1569 self.error_message_context = error_message_context 1570 self.max_errors = max_errors 1571 self.dialect = Dialect.get_or_raise(dialect) 1572 self.reset() 1573 1574 def reset(self): 1575 self.sql = "" 1576 self.errors = [] 1577 self._tokens = [] 1578 self._index = 0 1579 self._curr = None 1580 self._next = None 1581 self._prev = None 1582 self._prev_comments = None 1583 self._pipe_cte_counter = 0 1584 1585 def parse( 1586 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1587 ) -> t.List[t.Optional[exp.Expression]]: 1588 """ 1589 Parses a list of tokens and returns a list of syntax trees, one tree 1590 per parsed SQL statement. 1591 1592 Args: 1593 raw_tokens: The list of tokens. 1594 sql: The original SQL string, used to produce helpful debug messages. 1595 1596 Returns: 1597 The list of the produced syntax trees. 1598 """ 1599 return self._parse( 1600 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1601 ) 1602 1603 def parse_into( 1604 self, 1605 expression_types: exp.IntoType, 1606 raw_tokens: t.List[Token], 1607 sql: t.Optional[str] = None, 1608 ) -> t.List[t.Optional[exp.Expression]]: 1609 """ 1610 Parses a list of tokens into a given Expression type. If a collection of Expression 1611 types is given instead, this method will try to parse the token list into each one 1612 of them, stopping at the first for which the parsing succeeds. 1613 1614 Args: 1615 expression_types: The expression type(s) to try and parse the token list into. 1616 raw_tokens: The list of tokens. 1617 sql: The original SQL string, used to produce helpful debug messages. 1618 1619 Returns: 1620 The target Expression. 1621 """ 1622 errors = [] 1623 for expression_type in ensure_list(expression_types): 1624 parser = self.EXPRESSION_PARSERS.get(expression_type) 1625 if not parser: 1626 raise TypeError(f"No parser registered for {expression_type}") 1627 1628 try: 1629 return self._parse(parser, raw_tokens, sql) 1630 except ParseError as e: 1631 e.errors[0]["into_expression"] = expression_type 1632 errors.append(e) 1633 1634 raise ParseError( 1635 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1636 errors=merge_errors(errors), 1637 ) from errors[-1] 1638 1639 def _parse( 1640 self, 1641 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1642 raw_tokens: t.List[Token], 1643 sql: t.Optional[str] = None, 1644 ) -> t.List[t.Optional[exp.Expression]]: 1645 self.reset() 1646 self.sql = sql or "" 1647 1648 total = len(raw_tokens) 1649 chunks: t.List[t.List[Token]] = [[]] 1650 1651 for i, token in enumerate(raw_tokens): 1652 if token.token_type == TokenType.SEMICOLON: 1653 if token.comments: 1654 chunks.append([token]) 1655 1656 if i < total - 1: 1657 chunks.append([]) 1658 else: 1659 chunks[-1].append(token) 1660 1661 expressions = [] 1662 1663 for tokens in chunks: 1664 self._index = -1 1665 self._tokens = tokens 1666 self._advance() 1667 1668 expressions.append(parse_method(self)) 1669 1670 if self._index < len(self._tokens): 1671 self.raise_error("Invalid expression / Unexpected token") 1672 1673 self.check_errors() 1674 1675 return expressions 1676 1677 def check_errors(self) -> None: 1678 """Logs or raises any found errors, depending on the chosen error level setting.""" 1679 if self.error_level == ErrorLevel.WARN: 1680 for error in self.errors: 1681 logger.error(str(error)) 1682 elif self.error_level == ErrorLevel.RAISE and self.errors: 1683 raise ParseError( 1684 concat_messages(self.errors, self.max_errors), 1685 errors=merge_errors(self.errors), 1686 ) 1687 1688 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1689 """ 1690 Appends an error in the list of recorded errors or raises it, depending on the chosen 1691 error level setting. 1692 """ 1693 token = token or self._curr or self._prev or Token.string("") 1694 start = token.start 1695 end = token.end + 1 1696 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1697 highlight = self.sql[start:end] 1698 end_context = self.sql[end : end + self.error_message_context] 1699 1700 error = ParseError.new( 1701 f"{message}. Line {token.line}, Col: {token.col}.\n" 1702 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1703 description=message, 1704 line=token.line, 1705 col=token.col, 1706 start_context=start_context, 1707 highlight=highlight, 1708 end_context=end_context, 1709 ) 1710 1711 if self.error_level == ErrorLevel.IMMEDIATE: 1712 raise error 1713 1714 self.errors.append(error) 1715 1716 def expression( 1717 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1718 ) -> E: 1719 """ 1720 Creates a new, validated Expression. 1721 1722 Args: 1723 exp_class: The expression class to instantiate. 1724 comments: An optional list of comments to attach to the expression. 1725 kwargs: The arguments to set for the expression along with their respective values. 1726 1727 Returns: 1728 The target expression. 1729 """ 1730 instance = exp_class(**kwargs) 1731 instance.add_comments(comments) if comments else self._add_comments(instance) 1732 return self.validate_expression(instance) 1733 1734 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1735 if expression and self._prev_comments: 1736 expression.add_comments(self._prev_comments) 1737 self._prev_comments = None 1738 1739 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1740 """ 1741 Validates an Expression, making sure that all its mandatory arguments are set. 1742 1743 Args: 1744 expression: The expression to validate. 1745 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1746 1747 Returns: 1748 The validated expression. 1749 """ 1750 if self.error_level != ErrorLevel.IGNORE: 1751 for error_message in expression.error_messages(args): 1752 self.raise_error(error_message) 1753 1754 return expression 1755 1756 def _find_sql(self, start: Token, end: Token) -> str: 1757 return self.sql[start.start : end.end + 1] 1758 1759 def _is_connected(self) -> bool: 1760 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1761 1762 def _advance(self, times: int = 1) -> None: 1763 self._index += times 1764 self._curr = seq_get(self._tokens, self._index) 1765 self._next = seq_get(self._tokens, self._index + 1) 1766 1767 if self._index > 0: 1768 self._prev = self._tokens[self._index - 1] 1769 self._prev_comments = self._prev.comments 1770 else: 1771 self._prev = None 1772 self._prev_comments = None 1773 1774 def _retreat(self, index: int) -> None: 1775 if index != self._index: 1776 self._advance(index - self._index) 1777 1778 def _warn_unsupported(self) -> None: 1779 if len(self._tokens) <= 1: 1780 return 1781 1782 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1783 # interested in emitting a warning for the one being currently processed. 1784 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1785 1786 logger.warning( 1787 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1788 ) 1789 1790 def _parse_command(self) -> exp.Command: 1791 self._warn_unsupported() 1792 return self.expression( 1793 exp.Command, 1794 comments=self._prev_comments, 1795 this=self._prev.text.upper(), 1796 expression=self._parse_string(), 1797 ) 1798 1799 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1800 """ 1801 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1802 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1803 solve this by setting & resetting the parser state accordingly 1804 """ 1805 index = self._index 1806 error_level = self.error_level 1807 1808 self.error_level = ErrorLevel.IMMEDIATE 1809 try: 1810 this = parse_method() 1811 except ParseError: 1812 this = None 1813 finally: 1814 if not this or retreat: 1815 self._retreat(index) 1816 self.error_level = error_level 1817 1818 return this 1819 1820 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1821 start = self._prev 1822 exists = self._parse_exists() if allow_exists else None 1823 1824 self._match(TokenType.ON) 1825 1826 materialized = self._match_text_seq("MATERIALIZED") 1827 kind = self._match_set(self.CREATABLES) and self._prev 1828 if not kind: 1829 return self._parse_as_command(start) 1830 1831 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1832 this = self._parse_user_defined_function(kind=kind.token_type) 1833 elif kind.token_type == TokenType.TABLE: 1834 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1835 elif kind.token_type == TokenType.COLUMN: 1836 this = self._parse_column() 1837 else: 1838 this = self._parse_id_var() 1839 1840 self._match(TokenType.IS) 1841 1842 return self.expression( 1843 exp.Comment, 1844 this=this, 1845 kind=kind.text, 1846 expression=self._parse_string(), 1847 exists=exists, 1848 materialized=materialized, 1849 ) 1850 1851 def _parse_to_table( 1852 self, 1853 ) -> exp.ToTableProperty: 1854 table = self._parse_table_parts(schema=True) 1855 return self.expression(exp.ToTableProperty, this=table) 1856 1857 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1858 def _parse_ttl(self) -> exp.Expression: 1859 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1860 this = self._parse_bitwise() 1861 1862 if self._match_text_seq("DELETE"): 1863 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1864 if self._match_text_seq("RECOMPRESS"): 1865 return self.expression( 1866 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1867 ) 1868 if self._match_text_seq("TO", "DISK"): 1869 return self.expression( 1870 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1871 ) 1872 if self._match_text_seq("TO", "VOLUME"): 1873 return self.expression( 1874 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1875 ) 1876 1877 return this 1878 1879 expressions = self._parse_csv(_parse_ttl_action) 1880 where = self._parse_where() 1881 group = self._parse_group() 1882 1883 aggregates = None 1884 if group and self._match(TokenType.SET): 1885 aggregates = self._parse_csv(self._parse_set_item) 1886 1887 return self.expression( 1888 exp.MergeTreeTTL, 1889 expressions=expressions, 1890 where=where, 1891 group=group, 1892 aggregates=aggregates, 1893 ) 1894 1895 def _parse_statement(self) -> t.Optional[exp.Expression]: 1896 if self._curr is None: 1897 return None 1898 1899 if self._match_set(self.STATEMENT_PARSERS): 1900 comments = self._prev_comments 1901 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1902 stmt.add_comments(comments, prepend=True) 1903 return stmt 1904 1905 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1906 return self._parse_command() 1907 1908 expression = self._parse_expression() 1909 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1910 return self._parse_query_modifiers(expression) 1911 1912 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1913 start = self._prev 1914 temporary = self._match(TokenType.TEMPORARY) 1915 materialized = self._match_text_seq("MATERIALIZED") 1916 1917 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1918 if not kind: 1919 return self._parse_as_command(start) 1920 1921 concurrently = self._match_text_seq("CONCURRENTLY") 1922 if_exists = exists or self._parse_exists() 1923 1924 if kind == "COLUMN": 1925 this = self._parse_column() 1926 else: 1927 this = self._parse_table_parts( 1928 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1929 ) 1930 1931 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1932 1933 if self._match(TokenType.L_PAREN, advance=False): 1934 expressions = self._parse_wrapped_csv(self._parse_types) 1935 else: 1936 expressions = None 1937 1938 return self.expression( 1939 exp.Drop, 1940 exists=if_exists, 1941 this=this, 1942 expressions=expressions, 1943 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1944 temporary=temporary, 1945 materialized=materialized, 1946 cascade=self._match_text_seq("CASCADE"), 1947 constraints=self._match_text_seq("CONSTRAINTS"), 1948 purge=self._match_text_seq("PURGE"), 1949 cluster=cluster, 1950 concurrently=concurrently, 1951 ) 1952 1953 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1954 return ( 1955 self._match_text_seq("IF") 1956 and (not not_ or self._match(TokenType.NOT)) 1957 and self._match(TokenType.EXISTS) 1958 ) 1959 1960 def _parse_create(self) -> exp.Create | exp.Command: 1961 # Note: this can't be None because we've matched a statement parser 1962 start = self._prev 1963 1964 replace = ( 1965 start.token_type == TokenType.REPLACE 1966 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1967 or self._match_pair(TokenType.OR, TokenType.ALTER) 1968 ) 1969 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1970 1971 unique = self._match(TokenType.UNIQUE) 1972 1973 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1974 clustered = True 1975 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1976 "COLUMNSTORE" 1977 ): 1978 clustered = False 1979 else: 1980 clustered = None 1981 1982 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1983 self._advance() 1984 1985 properties = None 1986 create_token = self._match_set(self.CREATABLES) and self._prev 1987 1988 if not create_token: 1989 # exp.Properties.Location.POST_CREATE 1990 properties = self._parse_properties() 1991 create_token = self._match_set(self.CREATABLES) and self._prev 1992 1993 if not properties or not create_token: 1994 return self._parse_as_command(start) 1995 1996 concurrently = self._match_text_seq("CONCURRENTLY") 1997 exists = self._parse_exists(not_=True) 1998 this = None 1999 expression: t.Optional[exp.Expression] = None 2000 indexes = None 2001 no_schema_binding = None 2002 begin = None 2003 end = None 2004 clone = None 2005 2006 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2007 nonlocal properties 2008 if properties and temp_props: 2009 properties.expressions.extend(temp_props.expressions) 2010 elif temp_props: 2011 properties = temp_props 2012 2013 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2014 this = self._parse_user_defined_function(kind=create_token.token_type) 2015 2016 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2017 extend_props(self._parse_properties()) 2018 2019 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2020 extend_props(self._parse_properties()) 2021 2022 if not expression: 2023 if self._match(TokenType.COMMAND): 2024 expression = self._parse_as_command(self._prev) 2025 else: 2026 begin = self._match(TokenType.BEGIN) 2027 return_ = self._match_text_seq("RETURN") 2028 2029 if self._match(TokenType.STRING, advance=False): 2030 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2031 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2032 expression = self._parse_string() 2033 extend_props(self._parse_properties()) 2034 else: 2035 expression = self._parse_user_defined_function_expression() 2036 2037 end = self._match_text_seq("END") 2038 2039 if return_: 2040 expression = self.expression(exp.Return, this=expression) 2041 elif create_token.token_type == TokenType.INDEX: 2042 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2043 if not self._match(TokenType.ON): 2044 index = self._parse_id_var() 2045 anonymous = False 2046 else: 2047 index = None 2048 anonymous = True 2049 2050 this = self._parse_index(index=index, anonymous=anonymous) 2051 elif create_token.token_type in self.DB_CREATABLES: 2052 table_parts = self._parse_table_parts( 2053 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2054 ) 2055 2056 # exp.Properties.Location.POST_NAME 2057 self._match(TokenType.COMMA) 2058 extend_props(self._parse_properties(before=True)) 2059 2060 this = self._parse_schema(this=table_parts) 2061 2062 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2063 extend_props(self._parse_properties()) 2064 2065 has_alias = self._match(TokenType.ALIAS) 2066 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2067 # exp.Properties.Location.POST_ALIAS 2068 extend_props(self._parse_properties()) 2069 2070 if create_token.token_type == TokenType.SEQUENCE: 2071 expression = self._parse_types() 2072 extend_props(self._parse_properties()) 2073 else: 2074 expression = self._parse_ddl_select() 2075 2076 # Some dialects also support using a table as an alias instead of a SELECT. 2077 # Here we fallback to this as an alternative. 2078 if not expression and has_alias: 2079 expression = self._try_parse(self._parse_table_parts) 2080 2081 if create_token.token_type == TokenType.TABLE: 2082 # exp.Properties.Location.POST_EXPRESSION 2083 extend_props(self._parse_properties()) 2084 2085 indexes = [] 2086 while True: 2087 index = self._parse_index() 2088 2089 # exp.Properties.Location.POST_INDEX 2090 extend_props(self._parse_properties()) 2091 if not index: 2092 break 2093 else: 2094 self._match(TokenType.COMMA) 2095 indexes.append(index) 2096 elif create_token.token_type == TokenType.VIEW: 2097 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2098 no_schema_binding = True 2099 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2100 extend_props(self._parse_properties()) 2101 2102 shallow = self._match_text_seq("SHALLOW") 2103 2104 if self._match_texts(self.CLONE_KEYWORDS): 2105 copy = self._prev.text.lower() == "copy" 2106 clone = self.expression( 2107 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2108 ) 2109 2110 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2111 return self._parse_as_command(start) 2112 2113 create_kind_text = create_token.text.upper() 2114 return self.expression( 2115 exp.Create, 2116 this=this, 2117 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2118 replace=replace, 2119 refresh=refresh, 2120 unique=unique, 2121 expression=expression, 2122 exists=exists, 2123 properties=properties, 2124 indexes=indexes, 2125 no_schema_binding=no_schema_binding, 2126 begin=begin, 2127 end=end, 2128 clone=clone, 2129 concurrently=concurrently, 2130 clustered=clustered, 2131 ) 2132 2133 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2134 seq = exp.SequenceProperties() 2135 2136 options = [] 2137 index = self._index 2138 2139 while self._curr: 2140 self._match(TokenType.COMMA) 2141 if self._match_text_seq("INCREMENT"): 2142 self._match_text_seq("BY") 2143 self._match_text_seq("=") 2144 seq.set("increment", self._parse_term()) 2145 elif self._match_text_seq("MINVALUE"): 2146 seq.set("minvalue", self._parse_term()) 2147 elif self._match_text_seq("MAXVALUE"): 2148 seq.set("maxvalue", self._parse_term()) 2149 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2150 self._match_text_seq("=") 2151 seq.set("start", self._parse_term()) 2152 elif self._match_text_seq("CACHE"): 2153 # T-SQL allows empty CACHE which is initialized dynamically 2154 seq.set("cache", self._parse_number() or True) 2155 elif self._match_text_seq("OWNED", "BY"): 2156 # "OWNED BY NONE" is the default 2157 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2158 else: 2159 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2160 if opt: 2161 options.append(opt) 2162 else: 2163 break 2164 2165 seq.set("options", options if options else None) 2166 return None if self._index == index else seq 2167 2168 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2169 # only used for teradata currently 2170 self._match(TokenType.COMMA) 2171 2172 kwargs = { 2173 "no": self._match_text_seq("NO"), 2174 "dual": self._match_text_seq("DUAL"), 2175 "before": self._match_text_seq("BEFORE"), 2176 "default": self._match_text_seq("DEFAULT"), 2177 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2178 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2179 "after": self._match_text_seq("AFTER"), 2180 "minimum": self._match_texts(("MIN", "MINIMUM")), 2181 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2182 } 2183 2184 if self._match_texts(self.PROPERTY_PARSERS): 2185 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2186 try: 2187 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2188 except TypeError: 2189 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2190 2191 return None 2192 2193 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2194 return self._parse_wrapped_csv(self._parse_property) 2195 2196 def _parse_property(self) -> t.Optional[exp.Expression]: 2197 if self._match_texts(self.PROPERTY_PARSERS): 2198 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2199 2200 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2201 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2202 2203 if self._match_text_seq("COMPOUND", "SORTKEY"): 2204 return self._parse_sortkey(compound=True) 2205 2206 if self._match_text_seq("SQL", "SECURITY"): 2207 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2208 2209 index = self._index 2210 key = self._parse_column() 2211 2212 if not self._match(TokenType.EQ): 2213 self._retreat(index) 2214 return self._parse_sequence_properties() 2215 2216 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2217 if isinstance(key, exp.Column): 2218 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2219 2220 value = self._parse_bitwise() or self._parse_var(any_token=True) 2221 2222 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2223 if isinstance(value, exp.Column): 2224 value = exp.var(value.name) 2225 2226 return self.expression(exp.Property, this=key, value=value) 2227 2228 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2229 if self._match_text_seq("BY"): 2230 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2231 2232 self._match(TokenType.ALIAS) 2233 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2234 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2235 2236 return self.expression( 2237 exp.FileFormatProperty, 2238 this=( 2239 self.expression( 2240 exp.InputOutputFormat, 2241 input_format=input_format, 2242 output_format=output_format, 2243 ) 2244 if input_format or output_format 2245 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2246 ), 2247 hive_format=True, 2248 ) 2249 2250 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2251 field = self._parse_field() 2252 if isinstance(field, exp.Identifier) and not field.quoted: 2253 field = exp.var(field) 2254 2255 return field 2256 2257 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2258 self._match(TokenType.EQ) 2259 self._match(TokenType.ALIAS) 2260 2261 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2262 2263 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2264 properties = [] 2265 while True: 2266 if before: 2267 prop = self._parse_property_before() 2268 else: 2269 prop = self._parse_property() 2270 if not prop: 2271 break 2272 for p in ensure_list(prop): 2273 properties.append(p) 2274 2275 if properties: 2276 return self.expression(exp.Properties, expressions=properties) 2277 2278 return None 2279 2280 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2281 return self.expression( 2282 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2283 ) 2284 2285 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2286 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2287 security_specifier = self._prev.text.upper() 2288 return self.expression(exp.SecurityProperty, this=security_specifier) 2289 return None 2290 2291 def _parse_settings_property(self) -> exp.SettingsProperty: 2292 return self.expression( 2293 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2294 ) 2295 2296 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2297 if self._index >= 2: 2298 pre_volatile_token = self._tokens[self._index - 2] 2299 else: 2300 pre_volatile_token = None 2301 2302 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2303 return exp.VolatileProperty() 2304 2305 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2306 2307 def _parse_retention_period(self) -> exp.Var: 2308 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2309 number = self._parse_number() 2310 number_str = f"{number} " if number else "" 2311 unit = self._parse_var(any_token=True) 2312 return exp.var(f"{number_str}{unit}") 2313 2314 def _parse_system_versioning_property( 2315 self, with_: bool = False 2316 ) -> exp.WithSystemVersioningProperty: 2317 self._match(TokenType.EQ) 2318 prop = self.expression( 2319 exp.WithSystemVersioningProperty, 2320 **{ # type: ignore 2321 "on": True, 2322 "with": with_, 2323 }, 2324 ) 2325 2326 if self._match_text_seq("OFF"): 2327 prop.set("on", False) 2328 return prop 2329 2330 self._match(TokenType.ON) 2331 if self._match(TokenType.L_PAREN): 2332 while self._curr and not self._match(TokenType.R_PAREN): 2333 if self._match_text_seq("HISTORY_TABLE", "="): 2334 prop.set("this", self._parse_table_parts()) 2335 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2336 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2337 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2338 prop.set("retention_period", self._parse_retention_period()) 2339 2340 self._match(TokenType.COMMA) 2341 2342 return prop 2343 2344 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2345 self._match(TokenType.EQ) 2346 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2347 prop = self.expression(exp.DataDeletionProperty, on=on) 2348 2349 if self._match(TokenType.L_PAREN): 2350 while self._curr and not self._match(TokenType.R_PAREN): 2351 if self._match_text_seq("FILTER_COLUMN", "="): 2352 prop.set("filter_column", self._parse_column()) 2353 elif self._match_text_seq("RETENTION_PERIOD", "="): 2354 prop.set("retention_period", self._parse_retention_period()) 2355 2356 self._match(TokenType.COMMA) 2357 2358 return prop 2359 2360 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2361 kind = "HASH" 2362 expressions: t.Optional[t.List[exp.Expression]] = None 2363 if self._match_text_seq("BY", "HASH"): 2364 expressions = self._parse_wrapped_csv(self._parse_id_var) 2365 elif self._match_text_seq("BY", "RANDOM"): 2366 kind = "RANDOM" 2367 2368 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2369 buckets: t.Optional[exp.Expression] = None 2370 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2371 buckets = self._parse_number() 2372 2373 return self.expression( 2374 exp.DistributedByProperty, 2375 expressions=expressions, 2376 kind=kind, 2377 buckets=buckets, 2378 order=self._parse_order(), 2379 ) 2380 2381 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2382 self._match_text_seq("KEY") 2383 expressions = self._parse_wrapped_id_vars() 2384 return self.expression(expr_type, expressions=expressions) 2385 2386 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2387 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2388 prop = self._parse_system_versioning_property(with_=True) 2389 self._match_r_paren() 2390 return prop 2391 2392 if self._match(TokenType.L_PAREN, advance=False): 2393 return self._parse_wrapped_properties() 2394 2395 if self._match_text_seq("JOURNAL"): 2396 return self._parse_withjournaltable() 2397 2398 if self._match_texts(self.VIEW_ATTRIBUTES): 2399 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2400 2401 if self._match_text_seq("DATA"): 2402 return self._parse_withdata(no=False) 2403 elif self._match_text_seq("NO", "DATA"): 2404 return self._parse_withdata(no=True) 2405 2406 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2407 return self._parse_serde_properties(with_=True) 2408 2409 if self._match(TokenType.SCHEMA): 2410 return self.expression( 2411 exp.WithSchemaBindingProperty, 2412 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2413 ) 2414 2415 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2416 return self.expression( 2417 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2418 ) 2419 2420 if not self._next: 2421 return None 2422 2423 return self._parse_withisolatedloading() 2424 2425 def _parse_procedure_option(self) -> exp.Expression | None: 2426 if self._match_text_seq("EXECUTE", "AS"): 2427 return self.expression( 2428 exp.ExecuteAsProperty, 2429 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2430 or self._parse_string(), 2431 ) 2432 2433 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2434 2435 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2436 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2437 self._match(TokenType.EQ) 2438 2439 user = self._parse_id_var() 2440 self._match(TokenType.PARAMETER) 2441 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2442 2443 if not user or not host: 2444 return None 2445 2446 return exp.DefinerProperty(this=f"{user}@{host}") 2447 2448 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2449 self._match(TokenType.TABLE) 2450 self._match(TokenType.EQ) 2451 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2452 2453 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2454 return self.expression(exp.LogProperty, no=no) 2455 2456 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2457 return self.expression(exp.JournalProperty, **kwargs) 2458 2459 def _parse_checksum(self) -> exp.ChecksumProperty: 2460 self._match(TokenType.EQ) 2461 2462 on = None 2463 if self._match(TokenType.ON): 2464 on = True 2465 elif self._match_text_seq("OFF"): 2466 on = False 2467 2468 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2469 2470 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2471 return self.expression( 2472 exp.Cluster, 2473 expressions=( 2474 self._parse_wrapped_csv(self._parse_ordered) 2475 if wrapped 2476 else self._parse_csv(self._parse_ordered) 2477 ), 2478 ) 2479 2480 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2481 self._match_text_seq("BY") 2482 2483 self._match_l_paren() 2484 expressions = self._parse_csv(self._parse_column) 2485 self._match_r_paren() 2486 2487 if self._match_text_seq("SORTED", "BY"): 2488 self._match_l_paren() 2489 sorted_by = self._parse_csv(self._parse_ordered) 2490 self._match_r_paren() 2491 else: 2492 sorted_by = None 2493 2494 self._match(TokenType.INTO) 2495 buckets = self._parse_number() 2496 self._match_text_seq("BUCKETS") 2497 2498 return self.expression( 2499 exp.ClusteredByProperty, 2500 expressions=expressions, 2501 sorted_by=sorted_by, 2502 buckets=buckets, 2503 ) 2504 2505 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2506 if not self._match_text_seq("GRANTS"): 2507 self._retreat(self._index - 1) 2508 return None 2509 2510 return self.expression(exp.CopyGrantsProperty) 2511 2512 def _parse_freespace(self) -> exp.FreespaceProperty: 2513 self._match(TokenType.EQ) 2514 return self.expression( 2515 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2516 ) 2517 2518 def _parse_mergeblockratio( 2519 self, no: bool = False, default: bool = False 2520 ) -> exp.MergeBlockRatioProperty: 2521 if self._match(TokenType.EQ): 2522 return self.expression( 2523 exp.MergeBlockRatioProperty, 2524 this=self._parse_number(), 2525 percent=self._match(TokenType.PERCENT), 2526 ) 2527 2528 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2529 2530 def _parse_datablocksize( 2531 self, 2532 default: t.Optional[bool] = None, 2533 minimum: t.Optional[bool] = None, 2534 maximum: t.Optional[bool] = None, 2535 ) -> exp.DataBlocksizeProperty: 2536 self._match(TokenType.EQ) 2537 size = self._parse_number() 2538 2539 units = None 2540 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2541 units = self._prev.text 2542 2543 return self.expression( 2544 exp.DataBlocksizeProperty, 2545 size=size, 2546 units=units, 2547 default=default, 2548 minimum=minimum, 2549 maximum=maximum, 2550 ) 2551 2552 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2553 self._match(TokenType.EQ) 2554 always = self._match_text_seq("ALWAYS") 2555 manual = self._match_text_seq("MANUAL") 2556 never = self._match_text_seq("NEVER") 2557 default = self._match_text_seq("DEFAULT") 2558 2559 autotemp = None 2560 if self._match_text_seq("AUTOTEMP"): 2561 autotemp = self._parse_schema() 2562 2563 return self.expression( 2564 exp.BlockCompressionProperty, 2565 always=always, 2566 manual=manual, 2567 never=never, 2568 default=default, 2569 autotemp=autotemp, 2570 ) 2571 2572 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2573 index = self._index 2574 no = self._match_text_seq("NO") 2575 concurrent = self._match_text_seq("CONCURRENT") 2576 2577 if not self._match_text_seq("ISOLATED", "LOADING"): 2578 self._retreat(index) 2579 return None 2580 2581 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2582 return self.expression( 2583 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2584 ) 2585 2586 def _parse_locking(self) -> exp.LockingProperty: 2587 if self._match(TokenType.TABLE): 2588 kind = "TABLE" 2589 elif self._match(TokenType.VIEW): 2590 kind = "VIEW" 2591 elif self._match(TokenType.ROW): 2592 kind = "ROW" 2593 elif self._match_text_seq("DATABASE"): 2594 kind = "DATABASE" 2595 else: 2596 kind = None 2597 2598 if kind in ("DATABASE", "TABLE", "VIEW"): 2599 this = self._parse_table_parts() 2600 else: 2601 this = None 2602 2603 if self._match(TokenType.FOR): 2604 for_or_in = "FOR" 2605 elif self._match(TokenType.IN): 2606 for_or_in = "IN" 2607 else: 2608 for_or_in = None 2609 2610 if self._match_text_seq("ACCESS"): 2611 lock_type = "ACCESS" 2612 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2613 lock_type = "EXCLUSIVE" 2614 elif self._match_text_seq("SHARE"): 2615 lock_type = "SHARE" 2616 elif self._match_text_seq("READ"): 2617 lock_type = "READ" 2618 elif self._match_text_seq("WRITE"): 2619 lock_type = "WRITE" 2620 elif self._match_text_seq("CHECKSUM"): 2621 lock_type = "CHECKSUM" 2622 else: 2623 lock_type = None 2624 2625 override = self._match_text_seq("OVERRIDE") 2626 2627 return self.expression( 2628 exp.LockingProperty, 2629 this=this, 2630 kind=kind, 2631 for_or_in=for_or_in, 2632 lock_type=lock_type, 2633 override=override, 2634 ) 2635 2636 def _parse_partition_by(self) -> t.List[exp.Expression]: 2637 if self._match(TokenType.PARTITION_BY): 2638 return self._parse_csv(self._parse_assignment) 2639 return [] 2640 2641 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2642 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2643 if self._match_text_seq("MINVALUE"): 2644 return exp.var("MINVALUE") 2645 if self._match_text_seq("MAXVALUE"): 2646 return exp.var("MAXVALUE") 2647 return self._parse_bitwise() 2648 2649 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2650 expression = None 2651 from_expressions = None 2652 to_expressions = None 2653 2654 if self._match(TokenType.IN): 2655 this = self._parse_wrapped_csv(self._parse_bitwise) 2656 elif self._match(TokenType.FROM): 2657 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2658 self._match_text_seq("TO") 2659 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2660 elif self._match_text_seq("WITH", "(", "MODULUS"): 2661 this = self._parse_number() 2662 self._match_text_seq(",", "REMAINDER") 2663 expression = self._parse_number() 2664 self._match_r_paren() 2665 else: 2666 self.raise_error("Failed to parse partition bound spec.") 2667 2668 return self.expression( 2669 exp.PartitionBoundSpec, 2670 this=this, 2671 expression=expression, 2672 from_expressions=from_expressions, 2673 to_expressions=to_expressions, 2674 ) 2675 2676 # https://www.postgresql.org/docs/current/sql-createtable.html 2677 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2678 if not self._match_text_seq("OF"): 2679 self._retreat(self._index - 1) 2680 return None 2681 2682 this = self._parse_table(schema=True) 2683 2684 if self._match(TokenType.DEFAULT): 2685 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2686 elif self._match_text_seq("FOR", "VALUES"): 2687 expression = self._parse_partition_bound_spec() 2688 else: 2689 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2690 2691 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2692 2693 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2694 self._match(TokenType.EQ) 2695 return self.expression( 2696 exp.PartitionedByProperty, 2697 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2698 ) 2699 2700 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2701 if self._match_text_seq("AND", "STATISTICS"): 2702 statistics = True 2703 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2704 statistics = False 2705 else: 2706 statistics = None 2707 2708 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2709 2710 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2711 if self._match_text_seq("SQL"): 2712 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2713 return None 2714 2715 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2716 if self._match_text_seq("SQL", "DATA"): 2717 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2718 return None 2719 2720 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2721 if self._match_text_seq("PRIMARY", "INDEX"): 2722 return exp.NoPrimaryIndexProperty() 2723 if self._match_text_seq("SQL"): 2724 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2725 return None 2726 2727 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2728 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2729 return exp.OnCommitProperty() 2730 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2731 return exp.OnCommitProperty(delete=True) 2732 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2733 2734 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2735 if self._match_text_seq("SQL", "DATA"): 2736 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2737 return None 2738 2739 def _parse_distkey(self) -> exp.DistKeyProperty: 2740 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2741 2742 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2743 table = self._parse_table(schema=True) 2744 2745 options = [] 2746 while self._match_texts(("INCLUDING", "EXCLUDING")): 2747 this = self._prev.text.upper() 2748 2749 id_var = self._parse_id_var() 2750 if not id_var: 2751 return None 2752 2753 options.append( 2754 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2755 ) 2756 2757 return self.expression(exp.LikeProperty, this=table, expressions=options) 2758 2759 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2760 return self.expression( 2761 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2762 ) 2763 2764 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2765 self._match(TokenType.EQ) 2766 return self.expression( 2767 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2768 ) 2769 2770 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2771 self._match_text_seq("WITH", "CONNECTION") 2772 return self.expression( 2773 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2774 ) 2775 2776 def _parse_returns(self) -> exp.ReturnsProperty: 2777 value: t.Optional[exp.Expression] 2778 null = None 2779 is_table = self._match(TokenType.TABLE) 2780 2781 if is_table: 2782 if self._match(TokenType.LT): 2783 value = self.expression( 2784 exp.Schema, 2785 this="TABLE", 2786 expressions=self._parse_csv(self._parse_struct_types), 2787 ) 2788 if not self._match(TokenType.GT): 2789 self.raise_error("Expecting >") 2790 else: 2791 value = self._parse_schema(exp.var("TABLE")) 2792 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2793 null = True 2794 value = None 2795 else: 2796 value = self._parse_types() 2797 2798 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2799 2800 def _parse_describe(self) -> exp.Describe: 2801 kind = self._match_set(self.CREATABLES) and self._prev.text 2802 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2803 if self._match(TokenType.DOT): 2804 style = None 2805 self._retreat(self._index - 2) 2806 2807 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2808 2809 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2810 this = self._parse_statement() 2811 else: 2812 this = self._parse_table(schema=True) 2813 2814 properties = self._parse_properties() 2815 expressions = properties.expressions if properties else None 2816 partition = self._parse_partition() 2817 return self.expression( 2818 exp.Describe, 2819 this=this, 2820 style=style, 2821 kind=kind, 2822 expressions=expressions, 2823 partition=partition, 2824 format=format, 2825 ) 2826 2827 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2828 kind = self._prev.text.upper() 2829 expressions = [] 2830 2831 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2832 if self._match(TokenType.WHEN): 2833 expression = self._parse_disjunction() 2834 self._match(TokenType.THEN) 2835 else: 2836 expression = None 2837 2838 else_ = self._match(TokenType.ELSE) 2839 2840 if not self._match(TokenType.INTO): 2841 return None 2842 2843 return self.expression( 2844 exp.ConditionalInsert, 2845 this=self.expression( 2846 exp.Insert, 2847 this=self._parse_table(schema=True), 2848 expression=self._parse_derived_table_values(), 2849 ), 2850 expression=expression, 2851 else_=else_, 2852 ) 2853 2854 expression = parse_conditional_insert() 2855 while expression is not None: 2856 expressions.append(expression) 2857 expression = parse_conditional_insert() 2858 2859 return self.expression( 2860 exp.MultitableInserts, 2861 kind=kind, 2862 comments=comments, 2863 expressions=expressions, 2864 source=self._parse_table(), 2865 ) 2866 2867 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2868 comments = [] 2869 hint = self._parse_hint() 2870 overwrite = self._match(TokenType.OVERWRITE) 2871 ignore = self._match(TokenType.IGNORE) 2872 local = self._match_text_seq("LOCAL") 2873 alternative = None 2874 is_function = None 2875 2876 if self._match_text_seq("DIRECTORY"): 2877 this: t.Optional[exp.Expression] = self.expression( 2878 exp.Directory, 2879 this=self._parse_var_or_string(), 2880 local=local, 2881 row_format=self._parse_row_format(match_row=True), 2882 ) 2883 else: 2884 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2885 comments += ensure_list(self._prev_comments) 2886 return self._parse_multitable_inserts(comments) 2887 2888 if self._match(TokenType.OR): 2889 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2890 2891 self._match(TokenType.INTO) 2892 comments += ensure_list(self._prev_comments) 2893 self._match(TokenType.TABLE) 2894 is_function = self._match(TokenType.FUNCTION) 2895 2896 this = ( 2897 self._parse_table(schema=True, parse_partition=True) 2898 if not is_function 2899 else self._parse_function() 2900 ) 2901 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2902 this.set("alias", self._parse_table_alias()) 2903 2904 returning = self._parse_returning() 2905 2906 return self.expression( 2907 exp.Insert, 2908 comments=comments, 2909 hint=hint, 2910 is_function=is_function, 2911 this=this, 2912 stored=self._match_text_seq("STORED") and self._parse_stored(), 2913 by_name=self._match_text_seq("BY", "NAME"), 2914 exists=self._parse_exists(), 2915 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2916 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2917 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2918 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2919 conflict=self._parse_on_conflict(), 2920 returning=returning or self._parse_returning(), 2921 overwrite=overwrite, 2922 alternative=alternative, 2923 ignore=ignore, 2924 source=self._match(TokenType.TABLE) and self._parse_table(), 2925 ) 2926 2927 def _parse_kill(self) -> exp.Kill: 2928 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2929 2930 return self.expression( 2931 exp.Kill, 2932 this=self._parse_primary(), 2933 kind=kind, 2934 ) 2935 2936 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2937 conflict = self._match_text_seq("ON", "CONFLICT") 2938 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2939 2940 if not conflict and not duplicate: 2941 return None 2942 2943 conflict_keys = None 2944 constraint = None 2945 2946 if conflict: 2947 if self._match_text_seq("ON", "CONSTRAINT"): 2948 constraint = self._parse_id_var() 2949 elif self._match(TokenType.L_PAREN): 2950 conflict_keys = self._parse_csv(self._parse_id_var) 2951 self._match_r_paren() 2952 2953 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2954 if self._prev.token_type == TokenType.UPDATE: 2955 self._match(TokenType.SET) 2956 expressions = self._parse_csv(self._parse_equality) 2957 else: 2958 expressions = None 2959 2960 return self.expression( 2961 exp.OnConflict, 2962 duplicate=duplicate, 2963 expressions=expressions, 2964 action=action, 2965 conflict_keys=conflict_keys, 2966 constraint=constraint, 2967 where=self._parse_where(), 2968 ) 2969 2970 def _parse_returning(self) -> t.Optional[exp.Returning]: 2971 if not self._match(TokenType.RETURNING): 2972 return None 2973 return self.expression( 2974 exp.Returning, 2975 expressions=self._parse_csv(self._parse_expression), 2976 into=self._match(TokenType.INTO) and self._parse_table_part(), 2977 ) 2978 2979 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2980 if not self._match(TokenType.FORMAT): 2981 return None 2982 return self._parse_row_format() 2983 2984 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2985 index = self._index 2986 with_ = with_ or self._match_text_seq("WITH") 2987 2988 if not self._match(TokenType.SERDE_PROPERTIES): 2989 self._retreat(index) 2990 return None 2991 return self.expression( 2992 exp.SerdeProperties, 2993 **{ # type: ignore 2994 "expressions": self._parse_wrapped_properties(), 2995 "with": with_, 2996 }, 2997 ) 2998 2999 def _parse_row_format( 3000 self, match_row: bool = False 3001 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3002 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3003 return None 3004 3005 if self._match_text_seq("SERDE"): 3006 this = self._parse_string() 3007 3008 serde_properties = self._parse_serde_properties() 3009 3010 return self.expression( 3011 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3012 ) 3013 3014 self._match_text_seq("DELIMITED") 3015 3016 kwargs = {} 3017 3018 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3019 kwargs["fields"] = self._parse_string() 3020 if self._match_text_seq("ESCAPED", "BY"): 3021 kwargs["escaped"] = self._parse_string() 3022 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3023 kwargs["collection_items"] = self._parse_string() 3024 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3025 kwargs["map_keys"] = self._parse_string() 3026 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3027 kwargs["lines"] = self._parse_string() 3028 if self._match_text_seq("NULL", "DEFINED", "AS"): 3029 kwargs["null"] = self._parse_string() 3030 3031 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3032 3033 def _parse_load(self) -> exp.LoadData | exp.Command: 3034 if self._match_text_seq("DATA"): 3035 local = self._match_text_seq("LOCAL") 3036 self._match_text_seq("INPATH") 3037 inpath = self._parse_string() 3038 overwrite = self._match(TokenType.OVERWRITE) 3039 self._match_pair(TokenType.INTO, TokenType.TABLE) 3040 3041 return self.expression( 3042 exp.LoadData, 3043 this=self._parse_table(schema=True), 3044 local=local, 3045 overwrite=overwrite, 3046 inpath=inpath, 3047 partition=self._parse_partition(), 3048 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3049 serde=self._match_text_seq("SERDE") and self._parse_string(), 3050 ) 3051 return self._parse_as_command(self._prev) 3052 3053 def _parse_delete(self) -> exp.Delete: 3054 # This handles MySQL's "Multiple-Table Syntax" 3055 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3056 tables = None 3057 if not self._match(TokenType.FROM, advance=False): 3058 tables = self._parse_csv(self._parse_table) or None 3059 3060 returning = self._parse_returning() 3061 3062 return self.expression( 3063 exp.Delete, 3064 tables=tables, 3065 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3066 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3067 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3068 where=self._parse_where(), 3069 returning=returning or self._parse_returning(), 3070 limit=self._parse_limit(), 3071 ) 3072 3073 def _parse_update(self) -> exp.Update: 3074 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3075 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3076 returning = self._parse_returning() 3077 return self.expression( 3078 exp.Update, 3079 **{ # type: ignore 3080 "this": this, 3081 "expressions": expressions, 3082 "from": self._parse_from(joins=True), 3083 "where": self._parse_where(), 3084 "returning": returning or self._parse_returning(), 3085 "order": self._parse_order(), 3086 "limit": self._parse_limit(), 3087 }, 3088 ) 3089 3090 def _parse_use(self) -> exp.Use: 3091 return self.expression( 3092 exp.Use, 3093 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3094 this=self._parse_table(schema=False), 3095 ) 3096 3097 def _parse_uncache(self) -> exp.Uncache: 3098 if not self._match(TokenType.TABLE): 3099 self.raise_error("Expecting TABLE after UNCACHE") 3100 3101 return self.expression( 3102 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3103 ) 3104 3105 def _parse_cache(self) -> exp.Cache: 3106 lazy = self._match_text_seq("LAZY") 3107 self._match(TokenType.TABLE) 3108 table = self._parse_table(schema=True) 3109 3110 options = [] 3111 if self._match_text_seq("OPTIONS"): 3112 self._match_l_paren() 3113 k = self._parse_string() 3114 self._match(TokenType.EQ) 3115 v = self._parse_string() 3116 options = [k, v] 3117 self._match_r_paren() 3118 3119 self._match(TokenType.ALIAS) 3120 return self.expression( 3121 exp.Cache, 3122 this=table, 3123 lazy=lazy, 3124 options=options, 3125 expression=self._parse_select(nested=True), 3126 ) 3127 3128 def _parse_partition(self) -> t.Optional[exp.Partition]: 3129 if not self._match_texts(self.PARTITION_KEYWORDS): 3130 return None 3131 3132 return self.expression( 3133 exp.Partition, 3134 subpartition=self._prev.text.upper() == "SUBPARTITION", 3135 expressions=self._parse_wrapped_csv(self._parse_assignment), 3136 ) 3137 3138 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3139 def _parse_value_expression() -> t.Optional[exp.Expression]: 3140 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3141 return exp.var(self._prev.text.upper()) 3142 return self._parse_expression() 3143 3144 if self._match(TokenType.L_PAREN): 3145 expressions = self._parse_csv(_parse_value_expression) 3146 self._match_r_paren() 3147 return self.expression(exp.Tuple, expressions=expressions) 3148 3149 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3150 expression = self._parse_expression() 3151 if expression: 3152 return self.expression(exp.Tuple, expressions=[expression]) 3153 return None 3154 3155 def _parse_projections(self) -> t.List[exp.Expression]: 3156 return self._parse_expressions() 3157 3158 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3159 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3160 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3161 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3162 ) 3163 elif self._match(TokenType.FROM): 3164 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3165 # Support parentheses for duckdb FROM-first syntax 3166 select = self._parse_select() 3167 if select: 3168 select.set("from", from_) 3169 this = select 3170 else: 3171 this = exp.select("*").from_(t.cast(exp.From, from_)) 3172 else: 3173 this = ( 3174 self._parse_table(consume_pipe=True) 3175 if table 3176 else self._parse_select(nested=True, parse_set_operation=False) 3177 ) 3178 3179 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3180 # in case a modifier (e.g. join) is following 3181 if table and isinstance(this, exp.Values) and this.alias: 3182 alias = this.args["alias"].pop() 3183 this = exp.Table(this=this, alias=alias) 3184 3185 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3186 3187 return this 3188 3189 def _parse_select( 3190 self, 3191 nested: bool = False, 3192 table: bool = False, 3193 parse_subquery_alias: bool = True, 3194 parse_set_operation: bool = True, 3195 consume_pipe: bool = True, 3196 ) -> t.Optional[exp.Expression]: 3197 query = self._parse_select_query( 3198 nested=nested, 3199 table=table, 3200 parse_subquery_alias=parse_subquery_alias, 3201 parse_set_operation=parse_set_operation, 3202 ) 3203 3204 if ( 3205 consume_pipe 3206 and self._match(TokenType.PIPE_GT, advance=False) 3207 and isinstance(query, exp.Query) 3208 ): 3209 query = self._parse_pipe_syntax_query(query) 3210 query = query.subquery(copy=False) if query and table else query 3211 3212 return query 3213 3214 def _parse_select_query( 3215 self, 3216 nested: bool = False, 3217 table: bool = False, 3218 parse_subquery_alias: bool = True, 3219 parse_set_operation: bool = True, 3220 ) -> t.Optional[exp.Expression]: 3221 cte = self._parse_with() 3222 3223 if cte: 3224 this = self._parse_statement() 3225 3226 if not this: 3227 self.raise_error("Failed to parse any statement following CTE") 3228 return cte 3229 3230 if "with" in this.arg_types: 3231 this.set("with", cte) 3232 else: 3233 self.raise_error(f"{this.key} does not support CTE") 3234 this = cte 3235 3236 return this 3237 3238 # duckdb supports leading with FROM x 3239 from_ = ( 3240 self._parse_from(consume_pipe=True) 3241 if self._match(TokenType.FROM, advance=False) 3242 else None 3243 ) 3244 3245 if self._match(TokenType.SELECT): 3246 comments = self._prev_comments 3247 3248 hint = self._parse_hint() 3249 3250 if self._next and not self._next.token_type == TokenType.DOT: 3251 all_ = self._match(TokenType.ALL) 3252 distinct = self._match_set(self.DISTINCT_TOKENS) 3253 else: 3254 all_, distinct = None, None 3255 3256 kind = ( 3257 self._match(TokenType.ALIAS) 3258 and self._match_texts(("STRUCT", "VALUE")) 3259 and self._prev.text.upper() 3260 ) 3261 3262 if distinct: 3263 distinct = self.expression( 3264 exp.Distinct, 3265 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3266 ) 3267 3268 if all_ and distinct: 3269 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3270 3271 operation_modifiers = [] 3272 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3273 operation_modifiers.append(exp.var(self._prev.text.upper())) 3274 3275 limit = self._parse_limit(top=True) 3276 projections = self._parse_projections() 3277 3278 this = self.expression( 3279 exp.Select, 3280 kind=kind, 3281 hint=hint, 3282 distinct=distinct, 3283 expressions=projections, 3284 limit=limit, 3285 operation_modifiers=operation_modifiers or None, 3286 ) 3287 this.comments = comments 3288 3289 into = self._parse_into() 3290 if into: 3291 this.set("into", into) 3292 3293 if not from_: 3294 from_ = self._parse_from() 3295 3296 if from_: 3297 this.set("from", from_) 3298 3299 this = self._parse_query_modifiers(this) 3300 elif (table or nested) and self._match(TokenType.L_PAREN): 3301 this = self._parse_wrapped_select(table=table) 3302 3303 # We return early here so that the UNION isn't attached to the subquery by the 3304 # following call to _parse_set_operations, but instead becomes the parent node 3305 self._match_r_paren() 3306 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3307 elif self._match(TokenType.VALUES, advance=False): 3308 this = self._parse_derived_table_values() 3309 elif from_: 3310 this = exp.select("*").from_(from_.this, copy=False) 3311 elif self._match(TokenType.SUMMARIZE): 3312 table = self._match(TokenType.TABLE) 3313 this = self._parse_select() or self._parse_string() or self._parse_table() 3314 return self.expression(exp.Summarize, this=this, table=table) 3315 elif self._match(TokenType.DESCRIBE): 3316 this = self._parse_describe() 3317 elif self._match_text_seq("STREAM"): 3318 this = self._parse_function() 3319 if this: 3320 this = self.expression(exp.Stream, this=this) 3321 else: 3322 self._retreat(self._index - 1) 3323 else: 3324 this = None 3325 3326 return self._parse_set_operations(this) if parse_set_operation else this 3327 3328 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3329 self._match_text_seq("SEARCH") 3330 3331 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3332 3333 if not kind: 3334 return None 3335 3336 self._match_text_seq("FIRST", "BY") 3337 3338 return self.expression( 3339 exp.RecursiveWithSearch, 3340 kind=kind, 3341 this=self._parse_id_var(), 3342 expression=self._match_text_seq("SET") and self._parse_id_var(), 3343 using=self._match_text_seq("USING") and self._parse_id_var(), 3344 ) 3345 3346 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3347 if not skip_with_token and not self._match(TokenType.WITH): 3348 return None 3349 3350 comments = self._prev_comments 3351 recursive = self._match(TokenType.RECURSIVE) 3352 3353 last_comments = None 3354 expressions = [] 3355 while True: 3356 cte = self._parse_cte() 3357 if isinstance(cte, exp.CTE): 3358 expressions.append(cte) 3359 if last_comments: 3360 cte.add_comments(last_comments) 3361 3362 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3363 break 3364 else: 3365 self._match(TokenType.WITH) 3366 3367 last_comments = self._prev_comments 3368 3369 return self.expression( 3370 exp.With, 3371 comments=comments, 3372 expressions=expressions, 3373 recursive=recursive, 3374 search=self._parse_recursive_with_search(), 3375 ) 3376 3377 def _parse_cte(self) -> t.Optional[exp.CTE]: 3378 index = self._index 3379 3380 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3381 if not alias or not alias.this: 3382 self.raise_error("Expected CTE to have alias") 3383 3384 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3385 self._retreat(index) 3386 return None 3387 3388 comments = self._prev_comments 3389 3390 if self._match_text_seq("NOT", "MATERIALIZED"): 3391 materialized = False 3392 elif self._match_text_seq("MATERIALIZED"): 3393 materialized = True 3394 else: 3395 materialized = None 3396 3397 cte = self.expression( 3398 exp.CTE, 3399 this=self._parse_wrapped(self._parse_statement), 3400 alias=alias, 3401 materialized=materialized, 3402 comments=comments, 3403 ) 3404 3405 values = cte.this 3406 if isinstance(values, exp.Values): 3407 if values.alias: 3408 cte.set("this", exp.select("*").from_(values)) 3409 else: 3410 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3411 3412 return cte 3413 3414 def _parse_table_alias( 3415 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3416 ) -> t.Optional[exp.TableAlias]: 3417 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3418 # so this section tries to parse the clause version and if it fails, it treats the token 3419 # as an identifier (alias) 3420 if self._can_parse_limit_or_offset(): 3421 return None 3422 3423 any_token = self._match(TokenType.ALIAS) 3424 alias = ( 3425 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3426 or self._parse_string_as_identifier() 3427 ) 3428 3429 index = self._index 3430 if self._match(TokenType.L_PAREN): 3431 columns = self._parse_csv(self._parse_function_parameter) 3432 self._match_r_paren() if columns else self._retreat(index) 3433 else: 3434 columns = None 3435 3436 if not alias and not columns: 3437 return None 3438 3439 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3440 3441 # We bubble up comments from the Identifier to the TableAlias 3442 if isinstance(alias, exp.Identifier): 3443 table_alias.add_comments(alias.pop_comments()) 3444 3445 return table_alias 3446 3447 def _parse_subquery( 3448 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3449 ) -> t.Optional[exp.Subquery]: 3450 if not this: 3451 return None 3452 3453 return self.expression( 3454 exp.Subquery, 3455 this=this, 3456 pivots=self._parse_pivots(), 3457 alias=self._parse_table_alias() if parse_alias else None, 3458 sample=self._parse_table_sample(), 3459 ) 3460 3461 def _implicit_unnests_to_explicit(self, this: E) -> E: 3462 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3463 3464 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3465 for i, join in enumerate(this.args.get("joins") or []): 3466 table = join.this 3467 normalized_table = table.copy() 3468 normalized_table.meta["maybe_column"] = True 3469 normalized_table = _norm(normalized_table, dialect=self.dialect) 3470 3471 if isinstance(table, exp.Table) and not join.args.get("on"): 3472 if normalized_table.parts[0].name in refs: 3473 table_as_column = table.to_column() 3474 unnest = exp.Unnest(expressions=[table_as_column]) 3475 3476 # Table.to_column creates a parent Alias node that we want to convert to 3477 # a TableAlias and attach to the Unnest, so it matches the parser's output 3478 if isinstance(table.args.get("alias"), exp.TableAlias): 3479 table_as_column.replace(table_as_column.this) 3480 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3481 3482 table.replace(unnest) 3483 3484 refs.add(normalized_table.alias_or_name) 3485 3486 return this 3487 3488 def _parse_query_modifiers( 3489 self, this: t.Optional[exp.Expression] 3490 ) -> t.Optional[exp.Expression]: 3491 if isinstance(this, self.MODIFIABLES): 3492 for join in self._parse_joins(): 3493 this.append("joins", join) 3494 for lateral in iter(self._parse_lateral, None): 3495 this.append("laterals", lateral) 3496 3497 while True: 3498 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3499 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3500 key, expression = parser(self) 3501 3502 if expression: 3503 this.set(key, expression) 3504 if key == "limit": 3505 offset = expression.args.pop("offset", None) 3506 3507 if offset: 3508 offset = exp.Offset(expression=offset) 3509 this.set("offset", offset) 3510 3511 limit_by_expressions = expression.expressions 3512 expression.set("expressions", None) 3513 offset.set("expressions", limit_by_expressions) 3514 continue 3515 break 3516 3517 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3518 this = self._implicit_unnests_to_explicit(this) 3519 3520 return this 3521 3522 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3523 start = self._curr 3524 while self._curr: 3525 self._advance() 3526 3527 end = self._tokens[self._index - 1] 3528 return exp.Hint(expressions=[self._find_sql(start, end)]) 3529 3530 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3531 return self._parse_function_call() 3532 3533 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3534 start_index = self._index 3535 should_fallback_to_string = False 3536 3537 hints = [] 3538 try: 3539 for hint in iter( 3540 lambda: self._parse_csv( 3541 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3542 ), 3543 [], 3544 ): 3545 hints.extend(hint) 3546 except ParseError: 3547 should_fallback_to_string = True 3548 3549 if should_fallback_to_string or self._curr: 3550 self._retreat(start_index) 3551 return self._parse_hint_fallback_to_string() 3552 3553 return self.expression(exp.Hint, expressions=hints) 3554 3555 def _parse_hint(self) -> t.Optional[exp.Hint]: 3556 if self._match(TokenType.HINT) and self._prev_comments: 3557 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3558 3559 return None 3560 3561 def _parse_into(self) -> t.Optional[exp.Into]: 3562 if not self._match(TokenType.INTO): 3563 return None 3564 3565 temp = self._match(TokenType.TEMPORARY) 3566 unlogged = self._match_text_seq("UNLOGGED") 3567 self._match(TokenType.TABLE) 3568 3569 return self.expression( 3570 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3571 ) 3572 3573 def _parse_from( 3574 self, 3575 joins: bool = False, 3576 skip_from_token: bool = False, 3577 consume_pipe: bool = False, 3578 ) -> t.Optional[exp.From]: 3579 if not skip_from_token and not self._match(TokenType.FROM): 3580 return None 3581 3582 return self.expression( 3583 exp.From, 3584 comments=self._prev_comments, 3585 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3586 ) 3587 3588 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3589 return self.expression( 3590 exp.MatchRecognizeMeasure, 3591 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3592 this=self._parse_expression(), 3593 ) 3594 3595 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3596 if not self._match(TokenType.MATCH_RECOGNIZE): 3597 return None 3598 3599 self._match_l_paren() 3600 3601 partition = self._parse_partition_by() 3602 order = self._parse_order() 3603 3604 measures = ( 3605 self._parse_csv(self._parse_match_recognize_measure) 3606 if self._match_text_seq("MEASURES") 3607 else None 3608 ) 3609 3610 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3611 rows = exp.var("ONE ROW PER MATCH") 3612 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3613 text = "ALL ROWS PER MATCH" 3614 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3615 text += " SHOW EMPTY MATCHES" 3616 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3617 text += " OMIT EMPTY MATCHES" 3618 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3619 text += " WITH UNMATCHED ROWS" 3620 rows = exp.var(text) 3621 else: 3622 rows = None 3623 3624 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3625 text = "AFTER MATCH SKIP" 3626 if self._match_text_seq("PAST", "LAST", "ROW"): 3627 text += " PAST LAST ROW" 3628 elif self._match_text_seq("TO", "NEXT", "ROW"): 3629 text += " TO NEXT ROW" 3630 elif self._match_text_seq("TO", "FIRST"): 3631 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3632 elif self._match_text_seq("TO", "LAST"): 3633 text += f" TO LAST {self._advance_any().text}" # type: ignore 3634 after = exp.var(text) 3635 else: 3636 after = None 3637 3638 if self._match_text_seq("PATTERN"): 3639 self._match_l_paren() 3640 3641 if not self._curr: 3642 self.raise_error("Expecting )", self._curr) 3643 3644 paren = 1 3645 start = self._curr 3646 3647 while self._curr and paren > 0: 3648 if self._curr.token_type == TokenType.L_PAREN: 3649 paren += 1 3650 if self._curr.token_type == TokenType.R_PAREN: 3651 paren -= 1 3652 3653 end = self._prev 3654 self._advance() 3655 3656 if paren > 0: 3657 self.raise_error("Expecting )", self._curr) 3658 3659 pattern = exp.var(self._find_sql(start, end)) 3660 else: 3661 pattern = None 3662 3663 define = ( 3664 self._parse_csv(self._parse_name_as_expression) 3665 if self._match_text_seq("DEFINE") 3666 else None 3667 ) 3668 3669 self._match_r_paren() 3670 3671 return self.expression( 3672 exp.MatchRecognize, 3673 partition_by=partition, 3674 order=order, 3675 measures=measures, 3676 rows=rows, 3677 after=after, 3678 pattern=pattern, 3679 define=define, 3680 alias=self._parse_table_alias(), 3681 ) 3682 3683 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3684 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3685 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3686 cross_apply = False 3687 3688 if cross_apply is not None: 3689 this = self._parse_select(table=True) 3690 view = None 3691 outer = None 3692 elif self._match(TokenType.LATERAL): 3693 this = self._parse_select(table=True) 3694 view = self._match(TokenType.VIEW) 3695 outer = self._match(TokenType.OUTER) 3696 else: 3697 return None 3698 3699 if not this: 3700 this = ( 3701 self._parse_unnest() 3702 or self._parse_function() 3703 or self._parse_id_var(any_token=False) 3704 ) 3705 3706 while self._match(TokenType.DOT): 3707 this = exp.Dot( 3708 this=this, 3709 expression=self._parse_function() or self._parse_id_var(any_token=False), 3710 ) 3711 3712 ordinality: t.Optional[bool] = None 3713 3714 if view: 3715 table = self._parse_id_var(any_token=False) 3716 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3717 table_alias: t.Optional[exp.TableAlias] = self.expression( 3718 exp.TableAlias, this=table, columns=columns 3719 ) 3720 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3721 # We move the alias from the lateral's child node to the lateral itself 3722 table_alias = this.args["alias"].pop() 3723 else: 3724 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3725 table_alias = self._parse_table_alias() 3726 3727 return self.expression( 3728 exp.Lateral, 3729 this=this, 3730 view=view, 3731 outer=outer, 3732 alias=table_alias, 3733 cross_apply=cross_apply, 3734 ordinality=ordinality, 3735 ) 3736 3737 def _parse_join_parts( 3738 self, 3739 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3740 return ( 3741 self._match_set(self.JOIN_METHODS) and self._prev, 3742 self._match_set(self.JOIN_SIDES) and self._prev, 3743 self._match_set(self.JOIN_KINDS) and self._prev, 3744 ) 3745 3746 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3747 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3748 this = self._parse_column() 3749 if isinstance(this, exp.Column): 3750 return this.this 3751 return this 3752 3753 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3754 3755 def _parse_join( 3756 self, skip_join_token: bool = False, parse_bracket: bool = False 3757 ) -> t.Optional[exp.Join]: 3758 if self._match(TokenType.COMMA): 3759 table = self._try_parse(self._parse_table) 3760 cross_join = self.expression(exp.Join, this=table) if table else None 3761 3762 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3763 cross_join.set("kind", "CROSS") 3764 3765 return cross_join 3766 3767 index = self._index 3768 method, side, kind = self._parse_join_parts() 3769 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3770 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3771 join_comments = self._prev_comments 3772 3773 if not skip_join_token and not join: 3774 self._retreat(index) 3775 kind = None 3776 method = None 3777 side = None 3778 3779 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3780 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3781 3782 if not skip_join_token and not join and not outer_apply and not cross_apply: 3783 return None 3784 3785 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3786 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3787 kwargs["expressions"] = self._parse_csv( 3788 lambda: self._parse_table(parse_bracket=parse_bracket) 3789 ) 3790 3791 if method: 3792 kwargs["method"] = method.text 3793 if side: 3794 kwargs["side"] = side.text 3795 if kind: 3796 kwargs["kind"] = kind.text 3797 if hint: 3798 kwargs["hint"] = hint 3799 3800 if self._match(TokenType.MATCH_CONDITION): 3801 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3802 3803 if self._match(TokenType.ON): 3804 kwargs["on"] = self._parse_assignment() 3805 elif self._match(TokenType.USING): 3806 kwargs["using"] = self._parse_using_identifiers() 3807 elif ( 3808 not (outer_apply or cross_apply) 3809 and not isinstance(kwargs["this"], exp.Unnest) 3810 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3811 ): 3812 index = self._index 3813 joins: t.Optional[list] = list(self._parse_joins()) 3814 3815 if joins and self._match(TokenType.ON): 3816 kwargs["on"] = self._parse_assignment() 3817 elif joins and self._match(TokenType.USING): 3818 kwargs["using"] = self._parse_using_identifiers() 3819 else: 3820 joins = None 3821 self._retreat(index) 3822 3823 kwargs["this"].set("joins", joins if joins else None) 3824 3825 kwargs["pivots"] = self._parse_pivots() 3826 3827 comments = [c for token in (method, side, kind) if token for c in token.comments] 3828 comments = (join_comments or []) + comments 3829 return self.expression(exp.Join, comments=comments, **kwargs) 3830 3831 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3832 this = self._parse_assignment() 3833 3834 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3835 return this 3836 3837 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3838 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3839 3840 return this 3841 3842 def _parse_index_params(self) -> exp.IndexParameters: 3843 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3844 3845 if self._match(TokenType.L_PAREN, advance=False): 3846 columns = self._parse_wrapped_csv(self._parse_with_operator) 3847 else: 3848 columns = None 3849 3850 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3851 partition_by = self._parse_partition_by() 3852 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3853 tablespace = ( 3854 self._parse_var(any_token=True) 3855 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3856 else None 3857 ) 3858 where = self._parse_where() 3859 3860 on = self._parse_field() if self._match(TokenType.ON) else None 3861 3862 return self.expression( 3863 exp.IndexParameters, 3864 using=using, 3865 columns=columns, 3866 include=include, 3867 partition_by=partition_by, 3868 where=where, 3869 with_storage=with_storage, 3870 tablespace=tablespace, 3871 on=on, 3872 ) 3873 3874 def _parse_index( 3875 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3876 ) -> t.Optional[exp.Index]: 3877 if index or anonymous: 3878 unique = None 3879 primary = None 3880 amp = None 3881 3882 self._match(TokenType.ON) 3883 self._match(TokenType.TABLE) # hive 3884 table = self._parse_table_parts(schema=True) 3885 else: 3886 unique = self._match(TokenType.UNIQUE) 3887 primary = self._match_text_seq("PRIMARY") 3888 amp = self._match_text_seq("AMP") 3889 3890 if not self._match(TokenType.INDEX): 3891 return None 3892 3893 index = self._parse_id_var() 3894 table = None 3895 3896 params = self._parse_index_params() 3897 3898 return self.expression( 3899 exp.Index, 3900 this=index, 3901 table=table, 3902 unique=unique, 3903 primary=primary, 3904 amp=amp, 3905 params=params, 3906 ) 3907 3908 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3909 hints: t.List[exp.Expression] = [] 3910 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3911 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3912 hints.append( 3913 self.expression( 3914 exp.WithTableHint, 3915 expressions=self._parse_csv( 3916 lambda: self._parse_function() or self._parse_var(any_token=True) 3917 ), 3918 ) 3919 ) 3920 self._match_r_paren() 3921 else: 3922 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3923 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3924 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3925 3926 self._match_set((TokenType.INDEX, TokenType.KEY)) 3927 if self._match(TokenType.FOR): 3928 hint.set("target", self._advance_any() and self._prev.text.upper()) 3929 3930 hint.set("expressions", self._parse_wrapped_id_vars()) 3931 hints.append(hint) 3932 3933 return hints or None 3934 3935 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3936 return ( 3937 (not schema and self._parse_function(optional_parens=False)) 3938 or self._parse_id_var(any_token=False) 3939 or self._parse_string_as_identifier() 3940 or self._parse_placeholder() 3941 ) 3942 3943 def _parse_table_parts( 3944 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3945 ) -> exp.Table: 3946 catalog = None 3947 db = None 3948 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3949 3950 while self._match(TokenType.DOT): 3951 if catalog: 3952 # This allows nesting the table in arbitrarily many dot expressions if needed 3953 table = self.expression( 3954 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3955 ) 3956 else: 3957 catalog = db 3958 db = table 3959 # "" used for tsql FROM a..b case 3960 table = self._parse_table_part(schema=schema) or "" 3961 3962 if ( 3963 wildcard 3964 and self._is_connected() 3965 and (isinstance(table, exp.Identifier) or not table) 3966 and self._match(TokenType.STAR) 3967 ): 3968 if isinstance(table, exp.Identifier): 3969 table.args["this"] += "*" 3970 else: 3971 table = exp.Identifier(this="*") 3972 3973 # We bubble up comments from the Identifier to the Table 3974 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3975 3976 if is_db_reference: 3977 catalog = db 3978 db = table 3979 table = None 3980 3981 if not table and not is_db_reference: 3982 self.raise_error(f"Expected table name but got {self._curr}") 3983 if not db and is_db_reference: 3984 self.raise_error(f"Expected database name but got {self._curr}") 3985 3986 table = self.expression( 3987 exp.Table, 3988 comments=comments, 3989 this=table, 3990 db=db, 3991 catalog=catalog, 3992 ) 3993 3994 changes = self._parse_changes() 3995 if changes: 3996 table.set("changes", changes) 3997 3998 at_before = self._parse_historical_data() 3999 if at_before: 4000 table.set("when", at_before) 4001 4002 pivots = self._parse_pivots() 4003 if pivots: 4004 table.set("pivots", pivots) 4005 4006 return table 4007 4008 def _parse_table( 4009 self, 4010 schema: bool = False, 4011 joins: bool = False, 4012 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4013 parse_bracket: bool = False, 4014 is_db_reference: bool = False, 4015 parse_partition: bool = False, 4016 consume_pipe: bool = False, 4017 ) -> t.Optional[exp.Expression]: 4018 lateral = self._parse_lateral() 4019 if lateral: 4020 return lateral 4021 4022 unnest = self._parse_unnest() 4023 if unnest: 4024 return unnest 4025 4026 values = self._parse_derived_table_values() 4027 if values: 4028 return values 4029 4030 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4031 if subquery: 4032 if not subquery.args.get("pivots"): 4033 subquery.set("pivots", self._parse_pivots()) 4034 return subquery 4035 4036 bracket = parse_bracket and self._parse_bracket(None) 4037 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4038 4039 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4040 self._parse_table 4041 ) 4042 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4043 4044 only = self._match(TokenType.ONLY) 4045 4046 this = t.cast( 4047 exp.Expression, 4048 bracket 4049 or rows_from 4050 or self._parse_bracket( 4051 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4052 ), 4053 ) 4054 4055 if only: 4056 this.set("only", only) 4057 4058 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4059 self._match_text_seq("*") 4060 4061 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4062 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4063 this.set("partition", self._parse_partition()) 4064 4065 if schema: 4066 return self._parse_schema(this=this) 4067 4068 version = self._parse_version() 4069 4070 if version: 4071 this.set("version", version) 4072 4073 if self.dialect.ALIAS_POST_TABLESAMPLE: 4074 this.set("sample", self._parse_table_sample()) 4075 4076 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4077 if alias: 4078 this.set("alias", alias) 4079 4080 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4081 return self.expression( 4082 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4083 ) 4084 4085 this.set("hints", self._parse_table_hints()) 4086 4087 if not this.args.get("pivots"): 4088 this.set("pivots", self._parse_pivots()) 4089 4090 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4091 this.set("sample", self._parse_table_sample()) 4092 4093 if joins: 4094 for join in self._parse_joins(): 4095 this.append("joins", join) 4096 4097 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4098 this.set("ordinality", True) 4099 this.set("alias", self._parse_table_alias()) 4100 4101 return this 4102 4103 def _parse_version(self) -> t.Optional[exp.Version]: 4104 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4105 this = "TIMESTAMP" 4106 elif self._match(TokenType.VERSION_SNAPSHOT): 4107 this = "VERSION" 4108 else: 4109 return None 4110 4111 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4112 kind = self._prev.text.upper() 4113 start = self._parse_bitwise() 4114 self._match_texts(("TO", "AND")) 4115 end = self._parse_bitwise() 4116 expression: t.Optional[exp.Expression] = self.expression( 4117 exp.Tuple, expressions=[start, end] 4118 ) 4119 elif self._match_text_seq("CONTAINED", "IN"): 4120 kind = "CONTAINED IN" 4121 expression = self.expression( 4122 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4123 ) 4124 elif self._match(TokenType.ALL): 4125 kind = "ALL" 4126 expression = None 4127 else: 4128 self._match_text_seq("AS", "OF") 4129 kind = "AS OF" 4130 expression = self._parse_type() 4131 4132 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4133 4134 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4135 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4136 index = self._index 4137 historical_data = None 4138 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4139 this = self._prev.text.upper() 4140 kind = ( 4141 self._match(TokenType.L_PAREN) 4142 and self._match_texts(self.HISTORICAL_DATA_KIND) 4143 and self._prev.text.upper() 4144 ) 4145 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4146 4147 if expression: 4148 self._match_r_paren() 4149 historical_data = self.expression( 4150 exp.HistoricalData, this=this, kind=kind, expression=expression 4151 ) 4152 else: 4153 self._retreat(index) 4154 4155 return historical_data 4156 4157 def _parse_changes(self) -> t.Optional[exp.Changes]: 4158 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4159 return None 4160 4161 information = self._parse_var(any_token=True) 4162 self._match_r_paren() 4163 4164 return self.expression( 4165 exp.Changes, 4166 information=information, 4167 at_before=self._parse_historical_data(), 4168 end=self._parse_historical_data(), 4169 ) 4170 4171 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4172 if not self._match(TokenType.UNNEST): 4173 return None 4174 4175 expressions = self._parse_wrapped_csv(self._parse_equality) 4176 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4177 4178 alias = self._parse_table_alias() if with_alias else None 4179 4180 if alias: 4181 if self.dialect.UNNEST_COLUMN_ONLY: 4182 if alias.args.get("columns"): 4183 self.raise_error("Unexpected extra column alias in unnest.") 4184 4185 alias.set("columns", [alias.this]) 4186 alias.set("this", None) 4187 4188 columns = alias.args.get("columns") or [] 4189 if offset and len(expressions) < len(columns): 4190 offset = columns.pop() 4191 4192 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4193 self._match(TokenType.ALIAS) 4194 offset = self._parse_id_var( 4195 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4196 ) or exp.to_identifier("offset") 4197 4198 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4199 4200 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4201 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4202 if not is_derived and not ( 4203 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4204 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4205 ): 4206 return None 4207 4208 expressions = self._parse_csv(self._parse_value) 4209 alias = self._parse_table_alias() 4210 4211 if is_derived: 4212 self._match_r_paren() 4213 4214 return self.expression( 4215 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4216 ) 4217 4218 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4219 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4220 as_modifier and self._match_text_seq("USING", "SAMPLE") 4221 ): 4222 return None 4223 4224 bucket_numerator = None 4225 bucket_denominator = None 4226 bucket_field = None 4227 percent = None 4228 size = None 4229 seed = None 4230 4231 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4232 matched_l_paren = self._match(TokenType.L_PAREN) 4233 4234 if self.TABLESAMPLE_CSV: 4235 num = None 4236 expressions = self._parse_csv(self._parse_primary) 4237 else: 4238 expressions = None 4239 num = ( 4240 self._parse_factor() 4241 if self._match(TokenType.NUMBER, advance=False) 4242 else self._parse_primary() or self._parse_placeholder() 4243 ) 4244 4245 if self._match_text_seq("BUCKET"): 4246 bucket_numerator = self._parse_number() 4247 self._match_text_seq("OUT", "OF") 4248 bucket_denominator = bucket_denominator = self._parse_number() 4249 self._match(TokenType.ON) 4250 bucket_field = self._parse_field() 4251 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4252 percent = num 4253 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4254 size = num 4255 else: 4256 percent = num 4257 4258 if matched_l_paren: 4259 self._match_r_paren() 4260 4261 if self._match(TokenType.L_PAREN): 4262 method = self._parse_var(upper=True) 4263 seed = self._match(TokenType.COMMA) and self._parse_number() 4264 self._match_r_paren() 4265 elif self._match_texts(("SEED", "REPEATABLE")): 4266 seed = self._parse_wrapped(self._parse_number) 4267 4268 if not method and self.DEFAULT_SAMPLING_METHOD: 4269 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4270 4271 return self.expression( 4272 exp.TableSample, 4273 expressions=expressions, 4274 method=method, 4275 bucket_numerator=bucket_numerator, 4276 bucket_denominator=bucket_denominator, 4277 bucket_field=bucket_field, 4278 percent=percent, 4279 size=size, 4280 seed=seed, 4281 ) 4282 4283 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4284 return list(iter(self._parse_pivot, None)) or None 4285 4286 def _parse_joins(self) -> t.Iterator[exp.Join]: 4287 return iter(self._parse_join, None) 4288 4289 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4290 if not self._match(TokenType.INTO): 4291 return None 4292 4293 return self.expression( 4294 exp.UnpivotColumns, 4295 this=self._match_text_seq("NAME") and self._parse_column(), 4296 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4297 ) 4298 4299 # https://duckdb.org/docs/sql/statements/pivot 4300 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4301 def _parse_on() -> t.Optional[exp.Expression]: 4302 this = self._parse_bitwise() 4303 4304 if self._match(TokenType.IN): 4305 # PIVOT ... ON col IN (row_val1, row_val2) 4306 return self._parse_in(this) 4307 if self._match(TokenType.ALIAS, advance=False): 4308 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4309 return self._parse_alias(this) 4310 4311 return this 4312 4313 this = self._parse_table() 4314 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4315 into = self._parse_unpivot_columns() 4316 using = self._match(TokenType.USING) and self._parse_csv( 4317 lambda: self._parse_alias(self._parse_function()) 4318 ) 4319 group = self._parse_group() 4320 4321 return self.expression( 4322 exp.Pivot, 4323 this=this, 4324 expressions=expressions, 4325 using=using, 4326 group=group, 4327 unpivot=is_unpivot, 4328 into=into, 4329 ) 4330 4331 def _parse_pivot_in(self) -> exp.In: 4332 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4333 this = self._parse_select_or_expression() 4334 4335 self._match(TokenType.ALIAS) 4336 alias = self._parse_bitwise() 4337 if alias: 4338 if isinstance(alias, exp.Column) and not alias.db: 4339 alias = alias.this 4340 return self.expression(exp.PivotAlias, this=this, alias=alias) 4341 4342 return this 4343 4344 value = self._parse_column() 4345 4346 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4347 self.raise_error("Expecting IN (") 4348 4349 if self._match(TokenType.ANY): 4350 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4351 else: 4352 exprs = self._parse_csv(_parse_aliased_expression) 4353 4354 self._match_r_paren() 4355 return self.expression(exp.In, this=value, expressions=exprs) 4356 4357 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4358 func = self._parse_function() 4359 if not func: 4360 self.raise_error("Expecting an aggregation function in PIVOT") 4361 4362 return self._parse_alias(func) 4363 4364 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4365 index = self._index 4366 include_nulls = None 4367 4368 if self._match(TokenType.PIVOT): 4369 unpivot = False 4370 elif self._match(TokenType.UNPIVOT): 4371 unpivot = True 4372 4373 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4374 if self._match_text_seq("INCLUDE", "NULLS"): 4375 include_nulls = True 4376 elif self._match_text_seq("EXCLUDE", "NULLS"): 4377 include_nulls = False 4378 else: 4379 return None 4380 4381 expressions = [] 4382 4383 if not self._match(TokenType.L_PAREN): 4384 self._retreat(index) 4385 return None 4386 4387 if unpivot: 4388 expressions = self._parse_csv(self._parse_column) 4389 else: 4390 expressions = self._parse_csv(self._parse_pivot_aggregation) 4391 4392 if not expressions: 4393 self.raise_error("Failed to parse PIVOT's aggregation list") 4394 4395 if not self._match(TokenType.FOR): 4396 self.raise_error("Expecting FOR") 4397 4398 fields = [] 4399 while True: 4400 field = self._try_parse(self._parse_pivot_in) 4401 if not field: 4402 break 4403 fields.append(field) 4404 4405 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4406 self._parse_bitwise 4407 ) 4408 4409 group = self._parse_group() 4410 4411 self._match_r_paren() 4412 4413 pivot = self.expression( 4414 exp.Pivot, 4415 expressions=expressions, 4416 fields=fields, 4417 unpivot=unpivot, 4418 include_nulls=include_nulls, 4419 default_on_null=default_on_null, 4420 group=group, 4421 ) 4422 4423 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4424 pivot.set("alias", self._parse_table_alias()) 4425 4426 if not unpivot: 4427 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4428 4429 columns: t.List[exp.Expression] = [] 4430 all_fields = [] 4431 for pivot_field in pivot.fields: 4432 pivot_field_expressions = pivot_field.expressions 4433 4434 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4435 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4436 continue 4437 4438 all_fields.append( 4439 [ 4440 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4441 for fld in pivot_field_expressions 4442 ] 4443 ) 4444 4445 if all_fields: 4446 if names: 4447 all_fields.append(names) 4448 4449 # Generate all possible combinations of the pivot columns 4450 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4451 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4452 for fld_parts_tuple in itertools.product(*all_fields): 4453 fld_parts = list(fld_parts_tuple) 4454 4455 if names and self.PREFIXED_PIVOT_COLUMNS: 4456 # Move the "name" to the front of the list 4457 fld_parts.insert(0, fld_parts.pop(-1)) 4458 4459 columns.append(exp.to_identifier("_".join(fld_parts))) 4460 4461 pivot.set("columns", columns) 4462 4463 return pivot 4464 4465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4466 return [agg.alias for agg in aggregations if agg.alias] 4467 4468 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4469 if not skip_where_token and not self._match(TokenType.PREWHERE): 4470 return None 4471 4472 return self.expression( 4473 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4474 ) 4475 4476 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4477 if not skip_where_token and not self._match(TokenType.WHERE): 4478 return None 4479 4480 return self.expression( 4481 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4482 ) 4483 4484 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4485 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4486 return None 4487 comments = self._prev_comments 4488 4489 elements: t.Dict[str, t.Any] = defaultdict(list) 4490 4491 if self._match(TokenType.ALL): 4492 elements["all"] = True 4493 elif self._match(TokenType.DISTINCT): 4494 elements["all"] = False 4495 4496 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4497 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4498 4499 while True: 4500 index = self._index 4501 4502 elements["expressions"].extend( 4503 self._parse_csv( 4504 lambda: None 4505 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4506 else self._parse_assignment() 4507 ) 4508 ) 4509 4510 before_with_index = self._index 4511 with_prefix = self._match(TokenType.WITH) 4512 4513 if self._match(TokenType.ROLLUP): 4514 elements["rollup"].append( 4515 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4516 ) 4517 elif self._match(TokenType.CUBE): 4518 elements["cube"].append( 4519 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4520 ) 4521 elif self._match(TokenType.GROUPING_SETS): 4522 elements["grouping_sets"].append( 4523 self.expression( 4524 exp.GroupingSets, 4525 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4526 ) 4527 ) 4528 elif self._match_text_seq("TOTALS"): 4529 elements["totals"] = True # type: ignore 4530 4531 if before_with_index <= self._index <= before_with_index + 1: 4532 self._retreat(before_with_index) 4533 break 4534 4535 if index == self._index: 4536 break 4537 4538 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4539 4540 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4541 return self.expression( 4542 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4543 ) 4544 4545 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4546 if self._match(TokenType.L_PAREN): 4547 grouping_set = self._parse_csv(self._parse_column) 4548 self._match_r_paren() 4549 return self.expression(exp.Tuple, expressions=grouping_set) 4550 4551 return self._parse_column() 4552 4553 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4554 if not skip_having_token and not self._match(TokenType.HAVING): 4555 return None 4556 return self.expression( 4557 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4558 ) 4559 4560 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4561 if not self._match(TokenType.QUALIFY): 4562 return None 4563 return self.expression(exp.Qualify, this=self._parse_assignment()) 4564 4565 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4566 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4567 exp.Prior, this=self._parse_bitwise() 4568 ) 4569 connect = self._parse_assignment() 4570 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4571 return connect 4572 4573 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4574 if skip_start_token: 4575 start = None 4576 elif self._match(TokenType.START_WITH): 4577 start = self._parse_assignment() 4578 else: 4579 return None 4580 4581 self._match(TokenType.CONNECT_BY) 4582 nocycle = self._match_text_seq("NOCYCLE") 4583 connect = self._parse_connect_with_prior() 4584 4585 if not start and self._match(TokenType.START_WITH): 4586 start = self._parse_assignment() 4587 4588 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4589 4590 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4591 this = self._parse_id_var(any_token=True) 4592 if self._match(TokenType.ALIAS): 4593 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4594 return this 4595 4596 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4597 if self._match_text_seq("INTERPOLATE"): 4598 return self._parse_wrapped_csv(self._parse_name_as_expression) 4599 return None 4600 4601 def _parse_order( 4602 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4603 ) -> t.Optional[exp.Expression]: 4604 siblings = None 4605 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4606 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4607 return this 4608 4609 siblings = True 4610 4611 return self.expression( 4612 exp.Order, 4613 comments=self._prev_comments, 4614 this=this, 4615 expressions=self._parse_csv(self._parse_ordered), 4616 siblings=siblings, 4617 ) 4618 4619 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4620 if not self._match(token): 4621 return None 4622 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4623 4624 def _parse_ordered( 4625 self, parse_method: t.Optional[t.Callable] = None 4626 ) -> t.Optional[exp.Ordered]: 4627 this = parse_method() if parse_method else self._parse_assignment() 4628 if not this: 4629 return None 4630 4631 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4632 this = exp.var("ALL") 4633 4634 asc = self._match(TokenType.ASC) 4635 desc = self._match(TokenType.DESC) or (asc and False) 4636 4637 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4638 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4639 4640 nulls_first = is_nulls_first or False 4641 explicitly_null_ordered = is_nulls_first or is_nulls_last 4642 4643 if ( 4644 not explicitly_null_ordered 4645 and ( 4646 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4647 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4648 ) 4649 and self.dialect.NULL_ORDERING != "nulls_are_last" 4650 ): 4651 nulls_first = True 4652 4653 if self._match_text_seq("WITH", "FILL"): 4654 with_fill = self.expression( 4655 exp.WithFill, 4656 **{ # type: ignore 4657 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4658 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4659 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4660 "interpolate": self._parse_interpolate(), 4661 }, 4662 ) 4663 else: 4664 with_fill = None 4665 4666 return self.expression( 4667 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4668 ) 4669 4670 def _parse_limit_options(self) -> exp.LimitOptions: 4671 percent = self._match(TokenType.PERCENT) 4672 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4673 self._match_text_seq("ONLY") 4674 with_ties = self._match_text_seq("WITH", "TIES") 4675 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4676 4677 def _parse_limit( 4678 self, 4679 this: t.Optional[exp.Expression] = None, 4680 top: bool = False, 4681 skip_limit_token: bool = False, 4682 ) -> t.Optional[exp.Expression]: 4683 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4684 comments = self._prev_comments 4685 if top: 4686 limit_paren = self._match(TokenType.L_PAREN) 4687 expression = self._parse_term() if limit_paren else self._parse_number() 4688 4689 if limit_paren: 4690 self._match_r_paren() 4691 4692 limit_options = self._parse_limit_options() 4693 else: 4694 limit_options = None 4695 expression = self._parse_term() 4696 4697 if self._match(TokenType.COMMA): 4698 offset = expression 4699 expression = self._parse_term() 4700 else: 4701 offset = None 4702 4703 limit_exp = self.expression( 4704 exp.Limit, 4705 this=this, 4706 expression=expression, 4707 offset=offset, 4708 comments=comments, 4709 limit_options=limit_options, 4710 expressions=self._parse_limit_by(), 4711 ) 4712 4713 return limit_exp 4714 4715 if self._match(TokenType.FETCH): 4716 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4717 direction = self._prev.text.upper() if direction else "FIRST" 4718 4719 count = self._parse_field(tokens=self.FETCH_TOKENS) 4720 4721 return self.expression( 4722 exp.Fetch, 4723 direction=direction, 4724 count=count, 4725 limit_options=self._parse_limit_options(), 4726 ) 4727 4728 return this 4729 4730 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4731 if not self._match(TokenType.OFFSET): 4732 return this 4733 4734 count = self._parse_term() 4735 self._match_set((TokenType.ROW, TokenType.ROWS)) 4736 4737 return self.expression( 4738 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4739 ) 4740 4741 def _can_parse_limit_or_offset(self) -> bool: 4742 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4743 return False 4744 4745 index = self._index 4746 result = bool( 4747 self._try_parse(self._parse_limit, retreat=True) 4748 or self._try_parse(self._parse_offset, retreat=True) 4749 ) 4750 self._retreat(index) 4751 return result 4752 4753 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4754 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4755 4756 def _parse_locks(self) -> t.List[exp.Lock]: 4757 locks = [] 4758 while True: 4759 update, key = None, None 4760 if self._match_text_seq("FOR", "UPDATE"): 4761 update = True 4762 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4763 "LOCK", "IN", "SHARE", "MODE" 4764 ): 4765 update = False 4766 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4767 update, key = False, True 4768 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4769 update, key = True, True 4770 else: 4771 break 4772 4773 expressions = None 4774 if self._match_text_seq("OF"): 4775 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4776 4777 wait: t.Optional[bool | exp.Expression] = None 4778 if self._match_text_seq("NOWAIT"): 4779 wait = True 4780 elif self._match_text_seq("WAIT"): 4781 wait = self._parse_primary() 4782 elif self._match_text_seq("SKIP", "LOCKED"): 4783 wait = False 4784 4785 locks.append( 4786 self.expression( 4787 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4788 ) 4789 ) 4790 4791 return locks 4792 4793 def parse_set_operation( 4794 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4795 ) -> t.Optional[exp.Expression]: 4796 start = self._index 4797 _, side_token, kind_token = self._parse_join_parts() 4798 4799 side = side_token.text if side_token else None 4800 kind = kind_token.text if kind_token else None 4801 4802 if not self._match_set(self.SET_OPERATIONS): 4803 self._retreat(start) 4804 return None 4805 4806 token_type = self._prev.token_type 4807 4808 if token_type == TokenType.UNION: 4809 operation: t.Type[exp.SetOperation] = exp.Union 4810 elif token_type == TokenType.EXCEPT: 4811 operation = exp.Except 4812 else: 4813 operation = exp.Intersect 4814 4815 comments = self._prev.comments 4816 4817 if self._match(TokenType.DISTINCT): 4818 distinct: t.Optional[bool] = True 4819 elif self._match(TokenType.ALL): 4820 distinct = False 4821 else: 4822 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4823 if distinct is None: 4824 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4825 4826 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4827 "STRICT", "CORRESPONDING" 4828 ) 4829 if self._match_text_seq("CORRESPONDING"): 4830 by_name = True 4831 if not side and not kind: 4832 kind = "INNER" 4833 4834 on_column_list = None 4835 if by_name and self._match_texts(("ON", "BY")): 4836 on_column_list = self._parse_wrapped_csv(self._parse_column) 4837 4838 expression = self._parse_select( 4839 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4840 ) 4841 4842 return self.expression( 4843 operation, 4844 comments=comments, 4845 this=this, 4846 distinct=distinct, 4847 by_name=by_name, 4848 expression=expression, 4849 side=side, 4850 kind=kind, 4851 on=on_column_list, 4852 ) 4853 4854 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4855 while this: 4856 setop = self.parse_set_operation(this) 4857 if not setop: 4858 break 4859 this = setop 4860 4861 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4862 expression = this.expression 4863 4864 if expression: 4865 for arg in self.SET_OP_MODIFIERS: 4866 expr = expression.args.get(arg) 4867 if expr: 4868 this.set(arg, expr.pop()) 4869 4870 return this 4871 4872 def _parse_expression(self) -> t.Optional[exp.Expression]: 4873 return self._parse_alias(self._parse_assignment()) 4874 4875 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4876 this = self._parse_disjunction() 4877 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4878 # This allows us to parse <non-identifier token> := <expr> 4879 this = exp.column( 4880 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4881 ) 4882 4883 while self._match_set(self.ASSIGNMENT): 4884 if isinstance(this, exp.Column) and len(this.parts) == 1: 4885 this = this.this 4886 4887 this = self.expression( 4888 self.ASSIGNMENT[self._prev.token_type], 4889 this=this, 4890 comments=self._prev_comments, 4891 expression=self._parse_assignment(), 4892 ) 4893 4894 return this 4895 4896 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4897 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4898 4899 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4900 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4901 4902 def _parse_equality(self) -> t.Optional[exp.Expression]: 4903 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4904 4905 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4906 return self._parse_tokens(self._parse_range, self.COMPARISON) 4907 4908 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4909 this = this or self._parse_bitwise() 4910 negate = self._match(TokenType.NOT) 4911 4912 if self._match_set(self.RANGE_PARSERS): 4913 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4914 if not expression: 4915 return this 4916 4917 this = expression 4918 elif self._match(TokenType.ISNULL): 4919 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4920 4921 # Postgres supports ISNULL and NOTNULL for conditions. 4922 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4923 if self._match(TokenType.NOTNULL): 4924 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4925 this = self.expression(exp.Not, this=this) 4926 4927 if negate: 4928 this = self._negate_range(this) 4929 4930 if self._match(TokenType.IS): 4931 this = self._parse_is(this) 4932 4933 return this 4934 4935 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4936 if not this: 4937 return this 4938 4939 return self.expression(exp.Not, this=this) 4940 4941 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4942 index = self._index - 1 4943 negate = self._match(TokenType.NOT) 4944 4945 if self._match_text_seq("DISTINCT", "FROM"): 4946 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4947 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4948 4949 if self._match(TokenType.JSON): 4950 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4951 4952 if self._match_text_seq("WITH"): 4953 _with = True 4954 elif self._match_text_seq("WITHOUT"): 4955 _with = False 4956 else: 4957 _with = None 4958 4959 unique = self._match(TokenType.UNIQUE) 4960 self._match_text_seq("KEYS") 4961 expression: t.Optional[exp.Expression] = self.expression( 4962 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4963 ) 4964 else: 4965 expression = self._parse_primary() or self._parse_null() 4966 if not expression: 4967 self._retreat(index) 4968 return None 4969 4970 this = self.expression(exp.Is, this=this, expression=expression) 4971 return self.expression(exp.Not, this=this) if negate else this 4972 4973 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4974 unnest = self._parse_unnest(with_alias=False) 4975 if unnest: 4976 this = self.expression(exp.In, this=this, unnest=unnest) 4977 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4978 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4979 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4980 4981 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4982 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4983 else: 4984 this = self.expression(exp.In, this=this, expressions=expressions) 4985 4986 if matched_l_paren: 4987 self._match_r_paren(this) 4988 elif not self._match(TokenType.R_BRACKET, expression=this): 4989 self.raise_error("Expecting ]") 4990 else: 4991 this = self.expression(exp.In, this=this, field=self._parse_column()) 4992 4993 return this 4994 4995 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4996 low = self._parse_bitwise() 4997 self._match(TokenType.AND) 4998 high = self._parse_bitwise() 4999 return self.expression(exp.Between, this=this, low=low, high=high) 5000 5001 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5002 if not self._match(TokenType.ESCAPE): 5003 return this 5004 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5005 5006 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5007 index = self._index 5008 5009 if not self._match(TokenType.INTERVAL) and match_interval: 5010 return None 5011 5012 if self._match(TokenType.STRING, advance=False): 5013 this = self._parse_primary() 5014 else: 5015 this = self._parse_term() 5016 5017 if not this or ( 5018 isinstance(this, exp.Column) 5019 and not this.table 5020 and not this.this.quoted 5021 and this.name.upper() == "IS" 5022 ): 5023 self._retreat(index) 5024 return None 5025 5026 unit = self._parse_function() or ( 5027 not self._match(TokenType.ALIAS, advance=False) 5028 and self._parse_var(any_token=True, upper=True) 5029 ) 5030 5031 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5032 # each INTERVAL expression into this canonical form so it's easy to transpile 5033 if this and this.is_number: 5034 this = exp.Literal.string(this.to_py()) 5035 elif this and this.is_string: 5036 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5037 if parts and unit: 5038 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5039 unit = None 5040 self._retreat(self._index - 1) 5041 5042 if len(parts) == 1: 5043 this = exp.Literal.string(parts[0][0]) 5044 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5045 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5046 unit = self.expression( 5047 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5048 ) 5049 5050 interval = self.expression(exp.Interval, this=this, unit=unit) 5051 5052 index = self._index 5053 self._match(TokenType.PLUS) 5054 5055 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5056 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5057 return self.expression( 5058 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5059 ) 5060 5061 self._retreat(index) 5062 return interval 5063 5064 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5065 this = self._parse_term() 5066 5067 while True: 5068 if self._match_set(self.BITWISE): 5069 this = self.expression( 5070 self.BITWISE[self._prev.token_type], 5071 this=this, 5072 expression=self._parse_term(), 5073 ) 5074 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5075 this = self.expression( 5076 exp.DPipe, 5077 this=this, 5078 expression=self._parse_term(), 5079 safe=not self.dialect.STRICT_STRING_CONCAT, 5080 ) 5081 elif self._match(TokenType.DQMARK): 5082 this = self.expression( 5083 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5084 ) 5085 elif self._match_pair(TokenType.LT, TokenType.LT): 5086 this = self.expression( 5087 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5088 ) 5089 elif self._match_pair(TokenType.GT, TokenType.GT): 5090 this = self.expression( 5091 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5092 ) 5093 else: 5094 break 5095 5096 return this 5097 5098 def _parse_term(self) -> t.Optional[exp.Expression]: 5099 this = self._parse_factor() 5100 5101 while self._match_set(self.TERM): 5102 klass = self.TERM[self._prev.token_type] 5103 comments = self._prev_comments 5104 expression = self._parse_factor() 5105 5106 this = self.expression(klass, this=this, comments=comments, expression=expression) 5107 5108 if isinstance(this, exp.Collate): 5109 expr = this.expression 5110 5111 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5112 # fallback to Identifier / Var 5113 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5114 ident = expr.this 5115 if isinstance(ident, exp.Identifier): 5116 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5117 5118 return this 5119 5120 def _parse_factor(self) -> t.Optional[exp.Expression]: 5121 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5122 this = parse_method() 5123 5124 while self._match_set(self.FACTOR): 5125 klass = self.FACTOR[self._prev.token_type] 5126 comments = self._prev_comments 5127 expression = parse_method() 5128 5129 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5130 self._retreat(self._index - 1) 5131 return this 5132 5133 this = self.expression(klass, this=this, comments=comments, expression=expression) 5134 5135 if isinstance(this, exp.Div): 5136 this.args["typed"] = self.dialect.TYPED_DIVISION 5137 this.args["safe"] = self.dialect.SAFE_DIVISION 5138 5139 return this 5140 5141 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5142 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5143 5144 def _parse_unary(self) -> t.Optional[exp.Expression]: 5145 if self._match_set(self.UNARY_PARSERS): 5146 return self.UNARY_PARSERS[self._prev.token_type](self) 5147 return self._parse_at_time_zone(self._parse_type()) 5148 5149 def _parse_type( 5150 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5151 ) -> t.Optional[exp.Expression]: 5152 interval = parse_interval and self._parse_interval() 5153 if interval: 5154 return interval 5155 5156 index = self._index 5157 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5158 5159 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5160 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5161 if isinstance(data_type, exp.Cast): 5162 # This constructor can contain ops directly after it, for instance struct unnesting: 5163 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5164 return self._parse_column_ops(data_type) 5165 5166 if data_type: 5167 index2 = self._index 5168 this = self._parse_primary() 5169 5170 if isinstance(this, exp.Literal): 5171 literal = this.name 5172 this = self._parse_column_ops(this) 5173 5174 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5175 if parser: 5176 return parser(self, this, data_type) 5177 5178 if ( 5179 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5180 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5181 and TIME_ZONE_RE.search(literal) 5182 ): 5183 data_type = exp.DataType.build("TIMESTAMPTZ") 5184 5185 return self.expression(exp.Cast, this=this, to=data_type) 5186 5187 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5188 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5189 # 5190 # If the index difference here is greater than 1, that means the parser itself must have 5191 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5192 # 5193 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5194 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5195 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5196 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5197 # 5198 # In these cases, we don't really want to return the converted type, but instead retreat 5199 # and try to parse a Column or Identifier in the section below. 5200 if data_type.expressions and index2 - index > 1: 5201 self._retreat(index2) 5202 return self._parse_column_ops(data_type) 5203 5204 self._retreat(index) 5205 5206 if fallback_to_identifier: 5207 return self._parse_id_var() 5208 5209 this = self._parse_column() 5210 return this and self._parse_column_ops(this) 5211 5212 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5213 this = self._parse_type() 5214 if not this: 5215 return None 5216 5217 if isinstance(this, exp.Column) and not this.table: 5218 this = exp.var(this.name.upper()) 5219 5220 return self.expression( 5221 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5222 ) 5223 5224 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5225 type_name = identifier.name 5226 5227 while self._match(TokenType.DOT): 5228 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5229 5230 return exp.DataType.build(type_name, udt=True) 5231 5232 def _parse_types( 5233 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5234 ) -> t.Optional[exp.Expression]: 5235 index = self._index 5236 5237 this: t.Optional[exp.Expression] = None 5238 prefix = self._match_text_seq("SYSUDTLIB", ".") 5239 5240 if not self._match_set(self.TYPE_TOKENS): 5241 identifier = allow_identifiers and self._parse_id_var( 5242 any_token=False, tokens=(TokenType.VAR,) 5243 ) 5244 if isinstance(identifier, exp.Identifier): 5245 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5246 5247 if len(tokens) != 1: 5248 self.raise_error("Unexpected identifier", self._prev) 5249 5250 if tokens[0].token_type in self.TYPE_TOKENS: 5251 self._prev = tokens[0] 5252 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5253 this = self._parse_user_defined_type(identifier) 5254 else: 5255 self._retreat(self._index - 1) 5256 return None 5257 else: 5258 return None 5259 5260 type_token = self._prev.token_type 5261 5262 if type_token == TokenType.PSEUDO_TYPE: 5263 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5264 5265 if type_token == TokenType.OBJECT_IDENTIFIER: 5266 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5267 5268 # https://materialize.com/docs/sql/types/map/ 5269 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5270 key_type = self._parse_types( 5271 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5272 ) 5273 if not self._match(TokenType.FARROW): 5274 self._retreat(index) 5275 return None 5276 5277 value_type = self._parse_types( 5278 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5279 ) 5280 if not self._match(TokenType.R_BRACKET): 5281 self._retreat(index) 5282 return None 5283 5284 return exp.DataType( 5285 this=exp.DataType.Type.MAP, 5286 expressions=[key_type, value_type], 5287 nested=True, 5288 prefix=prefix, 5289 ) 5290 5291 nested = type_token in self.NESTED_TYPE_TOKENS 5292 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5293 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5294 expressions = None 5295 maybe_func = False 5296 5297 if self._match(TokenType.L_PAREN): 5298 if is_struct: 5299 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5300 elif nested: 5301 expressions = self._parse_csv( 5302 lambda: self._parse_types( 5303 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5304 ) 5305 ) 5306 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5307 this = expressions[0] 5308 this.set("nullable", True) 5309 self._match_r_paren() 5310 return this 5311 elif type_token in self.ENUM_TYPE_TOKENS: 5312 expressions = self._parse_csv(self._parse_equality) 5313 elif is_aggregate: 5314 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5315 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5316 ) 5317 if not func_or_ident: 5318 return None 5319 expressions = [func_or_ident] 5320 if self._match(TokenType.COMMA): 5321 expressions.extend( 5322 self._parse_csv( 5323 lambda: self._parse_types( 5324 check_func=check_func, 5325 schema=schema, 5326 allow_identifiers=allow_identifiers, 5327 ) 5328 ) 5329 ) 5330 else: 5331 expressions = self._parse_csv(self._parse_type_size) 5332 5333 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5334 if type_token == TokenType.VECTOR and len(expressions) == 2: 5335 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5336 5337 if not expressions or not self._match(TokenType.R_PAREN): 5338 self._retreat(index) 5339 return None 5340 5341 maybe_func = True 5342 5343 values: t.Optional[t.List[exp.Expression]] = None 5344 5345 if nested and self._match(TokenType.LT): 5346 if is_struct: 5347 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5348 else: 5349 expressions = self._parse_csv( 5350 lambda: self._parse_types( 5351 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5352 ) 5353 ) 5354 5355 if not self._match(TokenType.GT): 5356 self.raise_error("Expecting >") 5357 5358 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5359 values = self._parse_csv(self._parse_assignment) 5360 if not values and is_struct: 5361 values = None 5362 self._retreat(self._index - 1) 5363 else: 5364 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5365 5366 if type_token in self.TIMESTAMPS: 5367 if self._match_text_seq("WITH", "TIME", "ZONE"): 5368 maybe_func = False 5369 tz_type = ( 5370 exp.DataType.Type.TIMETZ 5371 if type_token in self.TIMES 5372 else exp.DataType.Type.TIMESTAMPTZ 5373 ) 5374 this = exp.DataType(this=tz_type, expressions=expressions) 5375 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5376 maybe_func = False 5377 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5378 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5379 maybe_func = False 5380 elif type_token == TokenType.INTERVAL: 5381 unit = self._parse_var(upper=True) 5382 if unit: 5383 if self._match_text_seq("TO"): 5384 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5385 5386 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5387 else: 5388 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5389 elif type_token == TokenType.VOID: 5390 this = exp.DataType(this=exp.DataType.Type.NULL) 5391 5392 if maybe_func and check_func: 5393 index2 = self._index 5394 peek = self._parse_string() 5395 5396 if not peek: 5397 self._retreat(index) 5398 return None 5399 5400 self._retreat(index2) 5401 5402 if not this: 5403 if self._match_text_seq("UNSIGNED"): 5404 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5405 if not unsigned_type_token: 5406 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5407 5408 type_token = unsigned_type_token or type_token 5409 5410 this = exp.DataType( 5411 this=exp.DataType.Type[type_token.value], 5412 expressions=expressions, 5413 nested=nested, 5414 prefix=prefix, 5415 ) 5416 5417 # Empty arrays/structs are allowed 5418 if values is not None: 5419 cls = exp.Struct if is_struct else exp.Array 5420 this = exp.cast(cls(expressions=values), this, copy=False) 5421 5422 elif expressions: 5423 this.set("expressions", expressions) 5424 5425 # https://materialize.com/docs/sql/types/list/#type-name 5426 while self._match(TokenType.LIST): 5427 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5428 5429 index = self._index 5430 5431 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5432 matched_array = self._match(TokenType.ARRAY) 5433 5434 while self._curr: 5435 datatype_token = self._prev.token_type 5436 matched_l_bracket = self._match(TokenType.L_BRACKET) 5437 5438 if (not matched_l_bracket and not matched_array) or ( 5439 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5440 ): 5441 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5442 # not to be confused with the fixed size array parsing 5443 break 5444 5445 matched_array = False 5446 values = self._parse_csv(self._parse_assignment) or None 5447 if ( 5448 values 5449 and not schema 5450 and ( 5451 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5452 ) 5453 ): 5454 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5455 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5456 self._retreat(index) 5457 break 5458 5459 this = exp.DataType( 5460 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5461 ) 5462 self._match(TokenType.R_BRACKET) 5463 5464 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5465 converter = self.TYPE_CONVERTERS.get(this.this) 5466 if converter: 5467 this = converter(t.cast(exp.DataType, this)) 5468 5469 return this 5470 5471 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5472 index = self._index 5473 5474 if ( 5475 self._curr 5476 and self._next 5477 and self._curr.token_type in self.TYPE_TOKENS 5478 and self._next.token_type in self.TYPE_TOKENS 5479 ): 5480 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5481 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5482 this = self._parse_id_var() 5483 else: 5484 this = ( 5485 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5486 or self._parse_id_var() 5487 ) 5488 5489 self._match(TokenType.COLON) 5490 5491 if ( 5492 type_required 5493 and not isinstance(this, exp.DataType) 5494 and not self._match_set(self.TYPE_TOKENS, advance=False) 5495 ): 5496 self._retreat(index) 5497 return self._parse_types() 5498 5499 return self._parse_column_def(this) 5500 5501 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5502 if not self._match_text_seq("AT", "TIME", "ZONE"): 5503 return this 5504 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5505 5506 def _parse_column(self) -> t.Optional[exp.Expression]: 5507 this = self._parse_column_reference() 5508 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5509 5510 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5511 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5512 5513 return column 5514 5515 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5516 this = self._parse_field() 5517 if ( 5518 not this 5519 and self._match(TokenType.VALUES, advance=False) 5520 and self.VALUES_FOLLOWED_BY_PAREN 5521 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5522 ): 5523 this = self._parse_id_var() 5524 5525 if isinstance(this, exp.Identifier): 5526 # We bubble up comments from the Identifier to the Column 5527 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5528 5529 return this 5530 5531 def _parse_colon_as_variant_extract( 5532 self, this: t.Optional[exp.Expression] 5533 ) -> t.Optional[exp.Expression]: 5534 casts = [] 5535 json_path = [] 5536 escape = None 5537 5538 while self._match(TokenType.COLON): 5539 start_index = self._index 5540 5541 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5542 path = self._parse_column_ops( 5543 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5544 ) 5545 5546 # The cast :: operator has a lower precedence than the extraction operator :, so 5547 # we rearrange the AST appropriately to avoid casting the JSON path 5548 while isinstance(path, exp.Cast): 5549 casts.append(path.to) 5550 path = path.this 5551 5552 if casts: 5553 dcolon_offset = next( 5554 i 5555 for i, t in enumerate(self._tokens[start_index:]) 5556 if t.token_type == TokenType.DCOLON 5557 ) 5558 end_token = self._tokens[start_index + dcolon_offset - 1] 5559 else: 5560 end_token = self._prev 5561 5562 if path: 5563 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5564 # it'll roundtrip to a string literal in GET_PATH 5565 if isinstance(path, exp.Identifier) and path.quoted: 5566 escape = True 5567 5568 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5569 5570 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5571 # Databricks transforms it back to the colon/dot notation 5572 if json_path: 5573 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5574 5575 if json_path_expr: 5576 json_path_expr.set("escape", escape) 5577 5578 this = self.expression( 5579 exp.JSONExtract, 5580 this=this, 5581 expression=json_path_expr, 5582 variant_extract=True, 5583 ) 5584 5585 while casts: 5586 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5587 5588 return this 5589 5590 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5591 return self._parse_types() 5592 5593 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5594 this = self._parse_bracket(this) 5595 5596 while self._match_set(self.COLUMN_OPERATORS): 5597 op_token = self._prev.token_type 5598 op = self.COLUMN_OPERATORS.get(op_token) 5599 5600 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5601 field = self._parse_dcolon() 5602 if not field: 5603 self.raise_error("Expected type") 5604 elif op and self._curr: 5605 field = self._parse_column_reference() or self._parse_bracket() 5606 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5607 field = self._parse_column_ops(field) 5608 else: 5609 field = self._parse_field(any_token=True, anonymous_func=True) 5610 5611 # Function calls can be qualified, e.g., x.y.FOO() 5612 # This converts the final AST to a series of Dots leading to the function call 5613 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5614 if isinstance(field, (exp.Func, exp.Window)) and this: 5615 this = this.transform( 5616 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5617 ) 5618 5619 if op: 5620 this = op(self, this, field) 5621 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5622 this = self.expression( 5623 exp.Column, 5624 comments=this.comments, 5625 this=field, 5626 table=this.this, 5627 db=this.args.get("table"), 5628 catalog=this.args.get("db"), 5629 ) 5630 elif isinstance(field, exp.Window): 5631 # Move the exp.Dot's to the window's function 5632 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5633 field.set("this", window_func) 5634 this = field 5635 else: 5636 this = self.expression(exp.Dot, this=this, expression=field) 5637 5638 if field and field.comments: 5639 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5640 5641 this = self._parse_bracket(this) 5642 5643 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5644 5645 def _parse_paren(self) -> t.Optional[exp.Expression]: 5646 if not self._match(TokenType.L_PAREN): 5647 return None 5648 5649 comments = self._prev_comments 5650 query = self._parse_select() 5651 5652 if query: 5653 expressions = [query] 5654 else: 5655 expressions = self._parse_expressions() 5656 5657 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5658 5659 if not this and self._match(TokenType.R_PAREN, advance=False): 5660 this = self.expression(exp.Tuple) 5661 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5662 this = self._parse_subquery(this=this, parse_alias=False) 5663 elif isinstance(this, exp.Subquery): 5664 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5665 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5666 this = self.expression(exp.Tuple, expressions=expressions) 5667 else: 5668 this = self.expression(exp.Paren, this=this) 5669 5670 if this: 5671 this.add_comments(comments) 5672 5673 self._match_r_paren(expression=this) 5674 return this 5675 5676 def _parse_primary(self) -> t.Optional[exp.Expression]: 5677 if self._match_set(self.PRIMARY_PARSERS): 5678 token_type = self._prev.token_type 5679 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5680 5681 if token_type == TokenType.STRING: 5682 expressions = [primary] 5683 while self._match(TokenType.STRING): 5684 expressions.append(exp.Literal.string(self._prev.text)) 5685 5686 if len(expressions) > 1: 5687 return self.expression(exp.Concat, expressions=expressions) 5688 5689 return primary 5690 5691 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5692 return exp.Literal.number(f"0.{self._prev.text}") 5693 5694 return self._parse_paren() 5695 5696 def _parse_field( 5697 self, 5698 any_token: bool = False, 5699 tokens: t.Optional[t.Collection[TokenType]] = None, 5700 anonymous_func: bool = False, 5701 ) -> t.Optional[exp.Expression]: 5702 if anonymous_func: 5703 field = ( 5704 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5705 or self._parse_primary() 5706 ) 5707 else: 5708 field = self._parse_primary() or self._parse_function( 5709 anonymous=anonymous_func, any_token=any_token 5710 ) 5711 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5712 5713 def _parse_function( 5714 self, 5715 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5716 anonymous: bool = False, 5717 optional_parens: bool = True, 5718 any_token: bool = False, 5719 ) -> t.Optional[exp.Expression]: 5720 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5721 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5722 fn_syntax = False 5723 if ( 5724 self._match(TokenType.L_BRACE, advance=False) 5725 and self._next 5726 and self._next.text.upper() == "FN" 5727 ): 5728 self._advance(2) 5729 fn_syntax = True 5730 5731 func = self._parse_function_call( 5732 functions=functions, 5733 anonymous=anonymous, 5734 optional_parens=optional_parens, 5735 any_token=any_token, 5736 ) 5737 5738 if fn_syntax: 5739 self._match(TokenType.R_BRACE) 5740 5741 return func 5742 5743 def _parse_function_call( 5744 self, 5745 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5746 anonymous: bool = False, 5747 optional_parens: bool = True, 5748 any_token: bool = False, 5749 ) -> t.Optional[exp.Expression]: 5750 if not self._curr: 5751 return None 5752 5753 comments = self._curr.comments 5754 token = self._curr 5755 token_type = self._curr.token_type 5756 this = self._curr.text 5757 upper = this.upper() 5758 5759 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5760 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5761 self._advance() 5762 return self._parse_window(parser(self)) 5763 5764 if not self._next or self._next.token_type != TokenType.L_PAREN: 5765 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5766 self._advance() 5767 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5768 5769 return None 5770 5771 if any_token: 5772 if token_type in self.RESERVED_TOKENS: 5773 return None 5774 elif token_type not in self.FUNC_TOKENS: 5775 return None 5776 5777 self._advance(2) 5778 5779 parser = self.FUNCTION_PARSERS.get(upper) 5780 if parser and not anonymous: 5781 this = parser(self) 5782 else: 5783 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5784 5785 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5786 this = self.expression( 5787 subquery_predicate, comments=comments, this=self._parse_select() 5788 ) 5789 self._match_r_paren() 5790 return this 5791 5792 if functions is None: 5793 functions = self.FUNCTIONS 5794 5795 function = functions.get(upper) 5796 known_function = function and not anonymous 5797 5798 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5799 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5800 5801 post_func_comments = self._curr and self._curr.comments 5802 if known_function and post_func_comments: 5803 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5804 # call we'll construct it as exp.Anonymous, even if it's "known" 5805 if any( 5806 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5807 for comment in post_func_comments 5808 ): 5809 known_function = False 5810 5811 if alias and known_function: 5812 args = self._kv_to_prop_eq(args) 5813 5814 if known_function: 5815 func_builder = t.cast(t.Callable, function) 5816 5817 if "dialect" in func_builder.__code__.co_varnames: 5818 func = func_builder(args, dialect=self.dialect) 5819 else: 5820 func = func_builder(args) 5821 5822 func = self.validate_expression(func, args) 5823 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5824 func.meta["name"] = this 5825 5826 this = func 5827 else: 5828 if token_type == TokenType.IDENTIFIER: 5829 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5830 5831 this = self.expression(exp.Anonymous, this=this, expressions=args) 5832 this = this.update_positions(token) 5833 5834 if isinstance(this, exp.Expression): 5835 this.add_comments(comments) 5836 5837 self._match_r_paren(this) 5838 return self._parse_window(this) 5839 5840 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5841 return expression 5842 5843 def _kv_to_prop_eq( 5844 self, expressions: t.List[exp.Expression], parse_map: bool = False 5845 ) -> t.List[exp.Expression]: 5846 transformed = [] 5847 5848 for index, e in enumerate(expressions): 5849 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5850 if isinstance(e, exp.Alias): 5851 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5852 5853 if not isinstance(e, exp.PropertyEQ): 5854 e = self.expression( 5855 exp.PropertyEQ, 5856 this=e.this if parse_map else exp.to_identifier(e.this.name), 5857 expression=e.expression, 5858 ) 5859 5860 if isinstance(e.this, exp.Column): 5861 e.this.replace(e.this.this) 5862 else: 5863 e = self._to_prop_eq(e, index) 5864 5865 transformed.append(e) 5866 5867 return transformed 5868 5869 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5870 return self._parse_statement() 5871 5872 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5873 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5874 5875 def _parse_user_defined_function( 5876 self, kind: t.Optional[TokenType] = None 5877 ) -> t.Optional[exp.Expression]: 5878 this = self._parse_table_parts(schema=True) 5879 5880 if not self._match(TokenType.L_PAREN): 5881 return this 5882 5883 expressions = self._parse_csv(self._parse_function_parameter) 5884 self._match_r_paren() 5885 return self.expression( 5886 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5887 ) 5888 5889 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5890 literal = self._parse_primary() 5891 if literal: 5892 return self.expression(exp.Introducer, this=token.text, expression=literal) 5893 5894 return self._identifier_expression(token) 5895 5896 def _parse_session_parameter(self) -> exp.SessionParameter: 5897 kind = None 5898 this = self._parse_id_var() or self._parse_primary() 5899 5900 if this and self._match(TokenType.DOT): 5901 kind = this.name 5902 this = self._parse_var() or self._parse_primary() 5903 5904 return self.expression(exp.SessionParameter, this=this, kind=kind) 5905 5906 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5907 return self._parse_id_var() 5908 5909 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5910 index = self._index 5911 5912 if self._match(TokenType.L_PAREN): 5913 expressions = t.cast( 5914 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5915 ) 5916 5917 if not self._match(TokenType.R_PAREN): 5918 self._retreat(index) 5919 else: 5920 expressions = [self._parse_lambda_arg()] 5921 5922 if self._match_set(self.LAMBDAS): 5923 return self.LAMBDAS[self._prev.token_type](self, expressions) 5924 5925 self._retreat(index) 5926 5927 this: t.Optional[exp.Expression] 5928 5929 if self._match(TokenType.DISTINCT): 5930 this = self.expression( 5931 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5932 ) 5933 else: 5934 this = self._parse_select_or_expression(alias=alias) 5935 5936 return self._parse_limit( 5937 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5938 ) 5939 5940 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5941 index = self._index 5942 if not self._match(TokenType.L_PAREN): 5943 return this 5944 5945 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5946 # expr can be of both types 5947 if self._match_set(self.SELECT_START_TOKENS): 5948 self._retreat(index) 5949 return this 5950 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5951 self._match_r_paren() 5952 return self.expression(exp.Schema, this=this, expressions=args) 5953 5954 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5955 return self._parse_column_def(self._parse_field(any_token=True)) 5956 5957 def _parse_column_def( 5958 self, this: t.Optional[exp.Expression], computed_column: bool = True 5959 ) -> t.Optional[exp.Expression]: 5960 # column defs are not really columns, they're identifiers 5961 if isinstance(this, exp.Column): 5962 this = this.this 5963 5964 if not computed_column: 5965 self._match(TokenType.ALIAS) 5966 5967 kind = self._parse_types(schema=True) 5968 5969 if self._match_text_seq("FOR", "ORDINALITY"): 5970 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5971 5972 constraints: t.List[exp.Expression] = [] 5973 5974 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5975 ("ALIAS", "MATERIALIZED") 5976 ): 5977 persisted = self._prev.text.upper() == "MATERIALIZED" 5978 constraint_kind = exp.ComputedColumnConstraint( 5979 this=self._parse_assignment(), 5980 persisted=persisted or self._match_text_seq("PERSISTED"), 5981 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5982 ) 5983 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5984 elif ( 5985 kind 5986 and self._match(TokenType.ALIAS, advance=False) 5987 and ( 5988 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5989 or (self._next and self._next.token_type == TokenType.L_PAREN) 5990 ) 5991 ): 5992 self._advance() 5993 constraints.append( 5994 self.expression( 5995 exp.ColumnConstraint, 5996 kind=exp.ComputedColumnConstraint( 5997 this=self._parse_disjunction(), 5998 persisted=self._match_texts(("STORED", "VIRTUAL")) 5999 and self._prev.text.upper() == "STORED", 6000 ), 6001 ) 6002 ) 6003 6004 while True: 6005 constraint = self._parse_column_constraint() 6006 if not constraint: 6007 break 6008 constraints.append(constraint) 6009 6010 if not kind and not constraints: 6011 return this 6012 6013 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6014 6015 def _parse_auto_increment( 6016 self, 6017 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6018 start = None 6019 increment = None 6020 order = None 6021 6022 if self._match(TokenType.L_PAREN, advance=False): 6023 args = self._parse_wrapped_csv(self._parse_bitwise) 6024 start = seq_get(args, 0) 6025 increment = seq_get(args, 1) 6026 elif self._match_text_seq("START"): 6027 start = self._parse_bitwise() 6028 self._match_text_seq("INCREMENT") 6029 increment = self._parse_bitwise() 6030 if self._match_text_seq("ORDER"): 6031 order = True 6032 elif self._match_text_seq("NOORDER"): 6033 order = False 6034 6035 if start and increment: 6036 return exp.GeneratedAsIdentityColumnConstraint( 6037 start=start, increment=increment, this=False, order=order 6038 ) 6039 6040 return exp.AutoIncrementColumnConstraint() 6041 6042 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6043 if not self._match_text_seq("REFRESH"): 6044 self._retreat(self._index - 1) 6045 return None 6046 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6047 6048 def _parse_compress(self) -> exp.CompressColumnConstraint: 6049 if self._match(TokenType.L_PAREN, advance=False): 6050 return self.expression( 6051 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6052 ) 6053 6054 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6055 6056 def _parse_generated_as_identity( 6057 self, 6058 ) -> ( 6059 exp.GeneratedAsIdentityColumnConstraint 6060 | exp.ComputedColumnConstraint 6061 | exp.GeneratedAsRowColumnConstraint 6062 ): 6063 if self._match_text_seq("BY", "DEFAULT"): 6064 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6065 this = self.expression( 6066 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6067 ) 6068 else: 6069 self._match_text_seq("ALWAYS") 6070 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6071 6072 self._match(TokenType.ALIAS) 6073 6074 if self._match_text_seq("ROW"): 6075 start = self._match_text_seq("START") 6076 if not start: 6077 self._match(TokenType.END) 6078 hidden = self._match_text_seq("HIDDEN") 6079 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6080 6081 identity = self._match_text_seq("IDENTITY") 6082 6083 if self._match(TokenType.L_PAREN): 6084 if self._match(TokenType.START_WITH): 6085 this.set("start", self._parse_bitwise()) 6086 if self._match_text_seq("INCREMENT", "BY"): 6087 this.set("increment", self._parse_bitwise()) 6088 if self._match_text_seq("MINVALUE"): 6089 this.set("minvalue", self._parse_bitwise()) 6090 if self._match_text_seq("MAXVALUE"): 6091 this.set("maxvalue", self._parse_bitwise()) 6092 6093 if self._match_text_seq("CYCLE"): 6094 this.set("cycle", True) 6095 elif self._match_text_seq("NO", "CYCLE"): 6096 this.set("cycle", False) 6097 6098 if not identity: 6099 this.set("expression", self._parse_range()) 6100 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6101 args = self._parse_csv(self._parse_bitwise) 6102 this.set("start", seq_get(args, 0)) 6103 this.set("increment", seq_get(args, 1)) 6104 6105 self._match_r_paren() 6106 6107 return this 6108 6109 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6110 self._match_text_seq("LENGTH") 6111 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6112 6113 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6114 if self._match_text_seq("NULL"): 6115 return self.expression(exp.NotNullColumnConstraint) 6116 if self._match_text_seq("CASESPECIFIC"): 6117 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6118 if self._match_text_seq("FOR", "REPLICATION"): 6119 return self.expression(exp.NotForReplicationColumnConstraint) 6120 6121 # Unconsume the `NOT` token 6122 self._retreat(self._index - 1) 6123 return None 6124 6125 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6126 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6127 6128 procedure_option_follows = ( 6129 self._match(TokenType.WITH, advance=False) 6130 and self._next 6131 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6132 ) 6133 6134 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6135 return self.expression( 6136 exp.ColumnConstraint, 6137 this=this, 6138 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6139 ) 6140 6141 return this 6142 6143 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6144 if not self._match(TokenType.CONSTRAINT): 6145 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6146 6147 return self.expression( 6148 exp.Constraint, 6149 this=self._parse_id_var(), 6150 expressions=self._parse_unnamed_constraints(), 6151 ) 6152 6153 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6154 constraints = [] 6155 while True: 6156 constraint = self._parse_unnamed_constraint() or self._parse_function() 6157 if not constraint: 6158 break 6159 constraints.append(constraint) 6160 6161 return constraints 6162 6163 def _parse_unnamed_constraint( 6164 self, constraints: t.Optional[t.Collection[str]] = None 6165 ) -> t.Optional[exp.Expression]: 6166 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6167 constraints or self.CONSTRAINT_PARSERS 6168 ): 6169 return None 6170 6171 constraint = self._prev.text.upper() 6172 if constraint not in self.CONSTRAINT_PARSERS: 6173 self.raise_error(f"No parser found for schema constraint {constraint}.") 6174 6175 return self.CONSTRAINT_PARSERS[constraint](self) 6176 6177 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6178 return self._parse_id_var(any_token=False) 6179 6180 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6181 self._match_text_seq("KEY") 6182 return self.expression( 6183 exp.UniqueColumnConstraint, 6184 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6185 this=self._parse_schema(self._parse_unique_key()), 6186 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6187 on_conflict=self._parse_on_conflict(), 6188 options=self._parse_key_constraint_options(), 6189 ) 6190 6191 def _parse_key_constraint_options(self) -> t.List[str]: 6192 options = [] 6193 while True: 6194 if not self._curr: 6195 break 6196 6197 if self._match(TokenType.ON): 6198 action = None 6199 on = self._advance_any() and self._prev.text 6200 6201 if self._match_text_seq("NO", "ACTION"): 6202 action = "NO ACTION" 6203 elif self._match_text_seq("CASCADE"): 6204 action = "CASCADE" 6205 elif self._match_text_seq("RESTRICT"): 6206 action = "RESTRICT" 6207 elif self._match_pair(TokenType.SET, TokenType.NULL): 6208 action = "SET NULL" 6209 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6210 action = "SET DEFAULT" 6211 else: 6212 self.raise_error("Invalid key constraint") 6213 6214 options.append(f"ON {on} {action}") 6215 else: 6216 var = self._parse_var_from_options( 6217 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6218 ) 6219 if not var: 6220 break 6221 options.append(var.name) 6222 6223 return options 6224 6225 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6226 if match and not self._match(TokenType.REFERENCES): 6227 return None 6228 6229 expressions = None 6230 this = self._parse_table(schema=True) 6231 options = self._parse_key_constraint_options() 6232 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6233 6234 def _parse_foreign_key(self) -> exp.ForeignKey: 6235 expressions = ( 6236 self._parse_wrapped_id_vars() 6237 if not self._match(TokenType.REFERENCES, advance=False) 6238 else None 6239 ) 6240 reference = self._parse_references() 6241 on_options = {} 6242 6243 while self._match(TokenType.ON): 6244 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6245 self.raise_error("Expected DELETE or UPDATE") 6246 6247 kind = self._prev.text.lower() 6248 6249 if self._match_text_seq("NO", "ACTION"): 6250 action = "NO ACTION" 6251 elif self._match(TokenType.SET): 6252 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6253 action = "SET " + self._prev.text.upper() 6254 else: 6255 self._advance() 6256 action = self._prev.text.upper() 6257 6258 on_options[kind] = action 6259 6260 return self.expression( 6261 exp.ForeignKey, 6262 expressions=expressions, 6263 reference=reference, 6264 options=self._parse_key_constraint_options(), 6265 **on_options, # type: ignore 6266 ) 6267 6268 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6269 return self._parse_ordered() or self._parse_field() 6270 6271 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6272 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6273 self._retreat(self._index - 1) 6274 return None 6275 6276 id_vars = self._parse_wrapped_id_vars() 6277 return self.expression( 6278 exp.PeriodForSystemTimeConstraint, 6279 this=seq_get(id_vars, 0), 6280 expression=seq_get(id_vars, 1), 6281 ) 6282 6283 def _parse_primary_key( 6284 self, wrapped_optional: bool = False, in_props: bool = False 6285 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6286 desc = ( 6287 self._match_set((TokenType.ASC, TokenType.DESC)) 6288 and self._prev.token_type == TokenType.DESC 6289 ) 6290 6291 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6292 return self.expression( 6293 exp.PrimaryKeyColumnConstraint, 6294 desc=desc, 6295 options=self._parse_key_constraint_options(), 6296 ) 6297 6298 expressions = self._parse_wrapped_csv( 6299 self._parse_primary_key_part, optional=wrapped_optional 6300 ) 6301 options = self._parse_key_constraint_options() 6302 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6303 6304 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6305 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6306 6307 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6308 """ 6309 Parses a datetime column in ODBC format. We parse the column into the corresponding 6310 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6311 same as we did for `DATE('yyyy-mm-dd')`. 6312 6313 Reference: 6314 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6315 """ 6316 self._match(TokenType.VAR) 6317 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6318 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6319 if not self._match(TokenType.R_BRACE): 6320 self.raise_error("Expected }") 6321 return expression 6322 6323 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6324 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6325 return this 6326 6327 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6328 map_token = seq_get(self._tokens, self._index - 2) 6329 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6330 else: 6331 parse_map = False 6332 6333 bracket_kind = self._prev.token_type 6334 if ( 6335 bracket_kind == TokenType.L_BRACE 6336 and self._curr 6337 and self._curr.token_type == TokenType.VAR 6338 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6339 ): 6340 return self._parse_odbc_datetime_literal() 6341 6342 expressions = self._parse_csv( 6343 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6344 ) 6345 6346 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6347 self.raise_error("Expected ]") 6348 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6349 self.raise_error("Expected }") 6350 6351 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6352 if bracket_kind == TokenType.L_BRACE: 6353 this = self.expression( 6354 exp.Struct, 6355 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6356 ) 6357 elif not this: 6358 this = build_array_constructor( 6359 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6360 ) 6361 else: 6362 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6363 if constructor_type: 6364 return build_array_constructor( 6365 constructor_type, 6366 args=expressions, 6367 bracket_kind=bracket_kind, 6368 dialect=self.dialect, 6369 ) 6370 6371 expressions = apply_index_offset( 6372 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6373 ) 6374 this = self.expression( 6375 exp.Bracket, 6376 this=this, 6377 expressions=expressions, 6378 comments=this.pop_comments(), 6379 ) 6380 6381 self._add_comments(this) 6382 return self._parse_bracket(this) 6383 6384 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6385 if self._match(TokenType.COLON): 6386 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6387 return this 6388 6389 def _parse_case(self) -> t.Optional[exp.Expression]: 6390 ifs = [] 6391 default = None 6392 6393 comments = self._prev_comments 6394 expression = self._parse_assignment() 6395 6396 while self._match(TokenType.WHEN): 6397 this = self._parse_assignment() 6398 self._match(TokenType.THEN) 6399 then = self._parse_assignment() 6400 ifs.append(self.expression(exp.If, this=this, true=then)) 6401 6402 if self._match(TokenType.ELSE): 6403 default = self._parse_assignment() 6404 6405 if not self._match(TokenType.END): 6406 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6407 default = exp.column("interval") 6408 else: 6409 self.raise_error("Expected END after CASE", self._prev) 6410 6411 return self.expression( 6412 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6413 ) 6414 6415 def _parse_if(self) -> t.Optional[exp.Expression]: 6416 if self._match(TokenType.L_PAREN): 6417 args = self._parse_csv( 6418 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6419 ) 6420 this = self.validate_expression(exp.If.from_arg_list(args), args) 6421 self._match_r_paren() 6422 else: 6423 index = self._index - 1 6424 6425 if self.NO_PAREN_IF_COMMANDS and index == 0: 6426 return self._parse_as_command(self._prev) 6427 6428 condition = self._parse_assignment() 6429 6430 if not condition: 6431 self._retreat(index) 6432 return None 6433 6434 self._match(TokenType.THEN) 6435 true = self._parse_assignment() 6436 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6437 self._match(TokenType.END) 6438 this = self.expression(exp.If, this=condition, true=true, false=false) 6439 6440 return this 6441 6442 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6443 if not self._match_text_seq("VALUE", "FOR"): 6444 self._retreat(self._index - 1) 6445 return None 6446 6447 return self.expression( 6448 exp.NextValueFor, 6449 this=self._parse_column(), 6450 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6451 ) 6452 6453 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6454 this = self._parse_function() or self._parse_var_or_string(upper=True) 6455 6456 if self._match(TokenType.FROM): 6457 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6458 6459 if not self._match(TokenType.COMMA): 6460 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6461 6462 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6463 6464 def _parse_gap_fill(self) -> exp.GapFill: 6465 self._match(TokenType.TABLE) 6466 this = self._parse_table() 6467 6468 self._match(TokenType.COMMA) 6469 args = [this, *self._parse_csv(self._parse_lambda)] 6470 6471 gap_fill = exp.GapFill.from_arg_list(args) 6472 return self.validate_expression(gap_fill, args) 6473 6474 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6475 this = self._parse_assignment() 6476 6477 if not self._match(TokenType.ALIAS): 6478 if self._match(TokenType.COMMA): 6479 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6480 6481 self.raise_error("Expected AS after CAST") 6482 6483 fmt = None 6484 to = self._parse_types() 6485 6486 default = self._match(TokenType.DEFAULT) 6487 if default: 6488 default = self._parse_bitwise() 6489 self._match_text_seq("ON", "CONVERSION", "ERROR") 6490 6491 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6492 fmt_string = self._parse_string() 6493 fmt = self._parse_at_time_zone(fmt_string) 6494 6495 if not to: 6496 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6497 if to.this in exp.DataType.TEMPORAL_TYPES: 6498 this = self.expression( 6499 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6500 this=this, 6501 format=exp.Literal.string( 6502 format_time( 6503 fmt_string.this if fmt_string else "", 6504 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6505 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6506 ) 6507 ), 6508 safe=safe, 6509 ) 6510 6511 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6512 this.set("zone", fmt.args["zone"]) 6513 return this 6514 elif not to: 6515 self.raise_error("Expected TYPE after CAST") 6516 elif isinstance(to, exp.Identifier): 6517 to = exp.DataType.build(to.name, udt=True) 6518 elif to.this == exp.DataType.Type.CHAR: 6519 if self._match(TokenType.CHARACTER_SET): 6520 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6521 6522 return self.expression( 6523 exp.Cast if strict else exp.TryCast, 6524 this=this, 6525 to=to, 6526 format=fmt, 6527 safe=safe, 6528 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6529 default=default, 6530 ) 6531 6532 def _parse_string_agg(self) -> exp.GroupConcat: 6533 if self._match(TokenType.DISTINCT): 6534 args: t.List[t.Optional[exp.Expression]] = [ 6535 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6536 ] 6537 if self._match(TokenType.COMMA): 6538 args.extend(self._parse_csv(self._parse_assignment)) 6539 else: 6540 args = self._parse_csv(self._parse_assignment) # type: ignore 6541 6542 if self._match_text_seq("ON", "OVERFLOW"): 6543 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6544 if self._match_text_seq("ERROR"): 6545 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6546 else: 6547 self._match_text_seq("TRUNCATE") 6548 on_overflow = self.expression( 6549 exp.OverflowTruncateBehavior, 6550 this=self._parse_string(), 6551 with_count=( 6552 self._match_text_seq("WITH", "COUNT") 6553 or not self._match_text_seq("WITHOUT", "COUNT") 6554 ), 6555 ) 6556 else: 6557 on_overflow = None 6558 6559 index = self._index 6560 if not self._match(TokenType.R_PAREN) and args: 6561 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6562 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6563 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6564 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6565 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6566 6567 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6568 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6569 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6570 if not self._match_text_seq("WITHIN", "GROUP"): 6571 self._retreat(index) 6572 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6573 6574 # The corresponding match_r_paren will be called in parse_function (caller) 6575 self._match_l_paren() 6576 6577 return self.expression( 6578 exp.GroupConcat, 6579 this=self._parse_order(this=seq_get(args, 0)), 6580 separator=seq_get(args, 1), 6581 on_overflow=on_overflow, 6582 ) 6583 6584 def _parse_convert( 6585 self, strict: bool, safe: t.Optional[bool] = None 6586 ) -> t.Optional[exp.Expression]: 6587 this = self._parse_bitwise() 6588 6589 if self._match(TokenType.USING): 6590 to: t.Optional[exp.Expression] = self.expression( 6591 exp.CharacterSet, this=self._parse_var() 6592 ) 6593 elif self._match(TokenType.COMMA): 6594 to = self._parse_types() 6595 else: 6596 to = None 6597 6598 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6599 6600 def _parse_xml_table(self) -> exp.XMLTable: 6601 namespaces = None 6602 passing = None 6603 columns = None 6604 6605 if self._match_text_seq("XMLNAMESPACES", "("): 6606 namespaces = self._parse_xml_namespace() 6607 self._match_text_seq(")", ",") 6608 6609 this = self._parse_string() 6610 6611 if self._match_text_seq("PASSING"): 6612 # The BY VALUE keywords are optional and are provided for semantic clarity 6613 self._match_text_seq("BY", "VALUE") 6614 passing = self._parse_csv(self._parse_column) 6615 6616 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6617 6618 if self._match_text_seq("COLUMNS"): 6619 columns = self._parse_csv(self._parse_field_def) 6620 6621 return self.expression( 6622 exp.XMLTable, 6623 this=this, 6624 namespaces=namespaces, 6625 passing=passing, 6626 columns=columns, 6627 by_ref=by_ref, 6628 ) 6629 6630 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6631 namespaces = [] 6632 6633 while True: 6634 if self._match(TokenType.DEFAULT): 6635 uri = self._parse_string() 6636 else: 6637 uri = self._parse_alias(self._parse_string()) 6638 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6639 if not self._match(TokenType.COMMA): 6640 break 6641 6642 return namespaces 6643 6644 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6645 args = self._parse_csv(self._parse_assignment) 6646 6647 if len(args) < 3: 6648 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6649 6650 return self.expression(exp.DecodeCase, expressions=args) 6651 6652 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6653 self._match_text_seq("KEY") 6654 key = self._parse_column() 6655 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6656 self._match_text_seq("VALUE") 6657 value = self._parse_bitwise() 6658 6659 if not key and not value: 6660 return None 6661 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6662 6663 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6664 if not this or not self._match_text_seq("FORMAT", "JSON"): 6665 return this 6666 6667 return self.expression(exp.FormatJson, this=this) 6668 6669 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6670 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6671 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6672 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6673 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6674 else: 6675 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6676 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6677 6678 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6679 6680 if not empty and not error and not null: 6681 return None 6682 6683 return self.expression( 6684 exp.OnCondition, 6685 empty=empty, 6686 error=error, 6687 null=null, 6688 ) 6689 6690 def _parse_on_handling( 6691 self, on: str, *values: str 6692 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6693 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6694 for value in values: 6695 if self._match_text_seq(value, "ON", on): 6696 return f"{value} ON {on}" 6697 6698 index = self._index 6699 if self._match(TokenType.DEFAULT): 6700 default_value = self._parse_bitwise() 6701 if self._match_text_seq("ON", on): 6702 return default_value 6703 6704 self._retreat(index) 6705 6706 return None 6707 6708 @t.overload 6709 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6710 6711 @t.overload 6712 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6713 6714 def _parse_json_object(self, agg=False): 6715 star = self._parse_star() 6716 expressions = ( 6717 [star] 6718 if star 6719 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6720 ) 6721 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6722 6723 unique_keys = None 6724 if self._match_text_seq("WITH", "UNIQUE"): 6725 unique_keys = True 6726 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6727 unique_keys = False 6728 6729 self._match_text_seq("KEYS") 6730 6731 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6732 self._parse_type() 6733 ) 6734 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6735 6736 return self.expression( 6737 exp.JSONObjectAgg if agg else exp.JSONObject, 6738 expressions=expressions, 6739 null_handling=null_handling, 6740 unique_keys=unique_keys, 6741 return_type=return_type, 6742 encoding=encoding, 6743 ) 6744 6745 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6746 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6747 if not self._match_text_seq("NESTED"): 6748 this = self._parse_id_var() 6749 kind = self._parse_types(allow_identifiers=False) 6750 nested = None 6751 else: 6752 this = None 6753 kind = None 6754 nested = True 6755 6756 path = self._match_text_seq("PATH") and self._parse_string() 6757 nested_schema = nested and self._parse_json_schema() 6758 6759 return self.expression( 6760 exp.JSONColumnDef, 6761 this=this, 6762 kind=kind, 6763 path=path, 6764 nested_schema=nested_schema, 6765 ) 6766 6767 def _parse_json_schema(self) -> exp.JSONSchema: 6768 self._match_text_seq("COLUMNS") 6769 return self.expression( 6770 exp.JSONSchema, 6771 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6772 ) 6773 6774 def _parse_json_table(self) -> exp.JSONTable: 6775 this = self._parse_format_json(self._parse_bitwise()) 6776 path = self._match(TokenType.COMMA) and self._parse_string() 6777 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6778 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6779 schema = self._parse_json_schema() 6780 6781 return exp.JSONTable( 6782 this=this, 6783 schema=schema, 6784 path=path, 6785 error_handling=error_handling, 6786 empty_handling=empty_handling, 6787 ) 6788 6789 def _parse_match_against(self) -> exp.MatchAgainst: 6790 expressions = self._parse_csv(self._parse_column) 6791 6792 self._match_text_seq(")", "AGAINST", "(") 6793 6794 this = self._parse_string() 6795 6796 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6797 modifier = "IN NATURAL LANGUAGE MODE" 6798 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6799 modifier = f"{modifier} WITH QUERY EXPANSION" 6800 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6801 modifier = "IN BOOLEAN MODE" 6802 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6803 modifier = "WITH QUERY EXPANSION" 6804 else: 6805 modifier = None 6806 6807 return self.expression( 6808 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6809 ) 6810 6811 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6812 def _parse_open_json(self) -> exp.OpenJSON: 6813 this = self._parse_bitwise() 6814 path = self._match(TokenType.COMMA) and self._parse_string() 6815 6816 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6817 this = self._parse_field(any_token=True) 6818 kind = self._parse_types() 6819 path = self._parse_string() 6820 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6821 6822 return self.expression( 6823 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6824 ) 6825 6826 expressions = None 6827 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6828 self._match_l_paren() 6829 expressions = self._parse_csv(_parse_open_json_column_def) 6830 6831 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6832 6833 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6834 args = self._parse_csv(self._parse_bitwise) 6835 6836 if self._match(TokenType.IN): 6837 return self.expression( 6838 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6839 ) 6840 6841 if haystack_first: 6842 haystack = seq_get(args, 0) 6843 needle = seq_get(args, 1) 6844 else: 6845 haystack = seq_get(args, 1) 6846 needle = seq_get(args, 0) 6847 6848 return self.expression( 6849 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6850 ) 6851 6852 def _parse_predict(self) -> exp.Predict: 6853 self._match_text_seq("MODEL") 6854 this = self._parse_table() 6855 6856 self._match(TokenType.COMMA) 6857 self._match_text_seq("TABLE") 6858 6859 return self.expression( 6860 exp.Predict, 6861 this=this, 6862 expression=self._parse_table(), 6863 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6864 ) 6865 6866 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6867 args = self._parse_csv(self._parse_table) 6868 return exp.JoinHint(this=func_name.upper(), expressions=args) 6869 6870 def _parse_substring(self) -> exp.Substring: 6871 # Postgres supports the form: substring(string [from int] [for int]) 6872 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6873 6874 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6875 6876 if self._match(TokenType.FROM): 6877 args.append(self._parse_bitwise()) 6878 if self._match(TokenType.FOR): 6879 if len(args) == 1: 6880 args.append(exp.Literal.number(1)) 6881 args.append(self._parse_bitwise()) 6882 6883 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6884 6885 def _parse_trim(self) -> exp.Trim: 6886 # https://www.w3resource.com/sql/character-functions/trim.php 6887 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6888 6889 position = None 6890 collation = None 6891 expression = None 6892 6893 if self._match_texts(self.TRIM_TYPES): 6894 position = self._prev.text.upper() 6895 6896 this = self._parse_bitwise() 6897 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6898 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6899 expression = self._parse_bitwise() 6900 6901 if invert_order: 6902 this, expression = expression, this 6903 6904 if self._match(TokenType.COLLATE): 6905 collation = self._parse_bitwise() 6906 6907 return self.expression( 6908 exp.Trim, this=this, position=position, expression=expression, collation=collation 6909 ) 6910 6911 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6912 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6913 6914 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6915 return self._parse_window(self._parse_id_var(), alias=True) 6916 6917 def _parse_respect_or_ignore_nulls( 6918 self, this: t.Optional[exp.Expression] 6919 ) -> t.Optional[exp.Expression]: 6920 if self._match_text_seq("IGNORE", "NULLS"): 6921 return self.expression(exp.IgnoreNulls, this=this) 6922 if self._match_text_seq("RESPECT", "NULLS"): 6923 return self.expression(exp.RespectNulls, this=this) 6924 return this 6925 6926 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6927 if self._match(TokenType.HAVING): 6928 self._match_texts(("MAX", "MIN")) 6929 max = self._prev.text.upper() != "MIN" 6930 return self.expression( 6931 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6932 ) 6933 6934 return this 6935 6936 def _parse_window( 6937 self, this: t.Optional[exp.Expression], alias: bool = False 6938 ) -> t.Optional[exp.Expression]: 6939 func = this 6940 comments = func.comments if isinstance(func, exp.Expression) else None 6941 6942 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6943 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6944 if self._match_text_seq("WITHIN", "GROUP"): 6945 order = self._parse_wrapped(self._parse_order) 6946 this = self.expression(exp.WithinGroup, this=this, expression=order) 6947 6948 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6949 self._match(TokenType.WHERE) 6950 this = self.expression( 6951 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6952 ) 6953 self._match_r_paren() 6954 6955 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6956 # Some dialects choose to implement and some do not. 6957 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6958 6959 # There is some code above in _parse_lambda that handles 6960 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6961 6962 # The below changes handle 6963 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6964 6965 # Oracle allows both formats 6966 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6967 # and Snowflake chose to do the same for familiarity 6968 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6969 if isinstance(this, exp.AggFunc): 6970 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6971 6972 if ignore_respect and ignore_respect is not this: 6973 ignore_respect.replace(ignore_respect.this) 6974 this = self.expression(ignore_respect.__class__, this=this) 6975 6976 this = self._parse_respect_or_ignore_nulls(this) 6977 6978 # bigquery select from window x AS (partition by ...) 6979 if alias: 6980 over = None 6981 self._match(TokenType.ALIAS) 6982 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6983 return this 6984 else: 6985 over = self._prev.text.upper() 6986 6987 if comments and isinstance(func, exp.Expression): 6988 func.pop_comments() 6989 6990 if not self._match(TokenType.L_PAREN): 6991 return self.expression( 6992 exp.Window, 6993 comments=comments, 6994 this=this, 6995 alias=self._parse_id_var(False), 6996 over=over, 6997 ) 6998 6999 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7000 7001 first = self._match(TokenType.FIRST) 7002 if self._match_text_seq("LAST"): 7003 first = False 7004 7005 partition, order = self._parse_partition_and_order() 7006 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7007 7008 if kind: 7009 self._match(TokenType.BETWEEN) 7010 start = self._parse_window_spec() 7011 self._match(TokenType.AND) 7012 end = self._parse_window_spec() 7013 exclude = ( 7014 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7015 if self._match_text_seq("EXCLUDE") 7016 else None 7017 ) 7018 7019 spec = self.expression( 7020 exp.WindowSpec, 7021 kind=kind, 7022 start=start["value"], 7023 start_side=start["side"], 7024 end=end["value"], 7025 end_side=end["side"], 7026 exclude=exclude, 7027 ) 7028 else: 7029 spec = None 7030 7031 self._match_r_paren() 7032 7033 window = self.expression( 7034 exp.Window, 7035 comments=comments, 7036 this=this, 7037 partition_by=partition, 7038 order=order, 7039 spec=spec, 7040 alias=window_alias, 7041 over=over, 7042 first=first, 7043 ) 7044 7045 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7046 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7047 return self._parse_window(window, alias=alias) 7048 7049 return window 7050 7051 def _parse_partition_and_order( 7052 self, 7053 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7054 return self._parse_partition_by(), self._parse_order() 7055 7056 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7057 self._match(TokenType.BETWEEN) 7058 7059 return { 7060 "value": ( 7061 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7062 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7063 or self._parse_bitwise() 7064 ), 7065 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7066 } 7067 7068 def _parse_alias( 7069 self, this: t.Optional[exp.Expression], explicit: bool = False 7070 ) -> t.Optional[exp.Expression]: 7071 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7072 # so this section tries to parse the clause version and if it fails, it treats the token 7073 # as an identifier (alias) 7074 if self._can_parse_limit_or_offset(): 7075 return this 7076 7077 any_token = self._match(TokenType.ALIAS) 7078 comments = self._prev_comments or [] 7079 7080 if explicit and not any_token: 7081 return this 7082 7083 if self._match(TokenType.L_PAREN): 7084 aliases = self.expression( 7085 exp.Aliases, 7086 comments=comments, 7087 this=this, 7088 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7089 ) 7090 self._match_r_paren(aliases) 7091 return aliases 7092 7093 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7094 self.STRING_ALIASES and self._parse_string_as_identifier() 7095 ) 7096 7097 if alias: 7098 comments.extend(alias.pop_comments()) 7099 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7100 column = this.this 7101 7102 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7103 if not this.comments and column and column.comments: 7104 this.comments = column.pop_comments() 7105 7106 return this 7107 7108 def _parse_id_var( 7109 self, 7110 any_token: bool = True, 7111 tokens: t.Optional[t.Collection[TokenType]] = None, 7112 ) -> t.Optional[exp.Expression]: 7113 expression = self._parse_identifier() 7114 if not expression and ( 7115 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7116 ): 7117 quoted = self._prev.token_type == TokenType.STRING 7118 expression = self._identifier_expression(quoted=quoted) 7119 7120 return expression 7121 7122 def _parse_string(self) -> t.Optional[exp.Expression]: 7123 if self._match_set(self.STRING_PARSERS): 7124 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7125 return self._parse_placeholder() 7126 7127 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7128 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7129 if output: 7130 output.update_positions(self._prev) 7131 return output 7132 7133 def _parse_number(self) -> t.Optional[exp.Expression]: 7134 if self._match_set(self.NUMERIC_PARSERS): 7135 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7136 return self._parse_placeholder() 7137 7138 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7139 if self._match(TokenType.IDENTIFIER): 7140 return self._identifier_expression(quoted=True) 7141 return self._parse_placeholder() 7142 7143 def _parse_var( 7144 self, 7145 any_token: bool = False, 7146 tokens: t.Optional[t.Collection[TokenType]] = None, 7147 upper: bool = False, 7148 ) -> t.Optional[exp.Expression]: 7149 if ( 7150 (any_token and self._advance_any()) 7151 or self._match(TokenType.VAR) 7152 or (self._match_set(tokens) if tokens else False) 7153 ): 7154 return self.expression( 7155 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7156 ) 7157 return self._parse_placeholder() 7158 7159 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7160 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7161 self._advance() 7162 return self._prev 7163 return None 7164 7165 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7166 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7167 7168 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7169 return self._parse_primary() or self._parse_var(any_token=True) 7170 7171 def _parse_null(self) -> t.Optional[exp.Expression]: 7172 if self._match_set(self.NULL_TOKENS): 7173 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7174 return self._parse_placeholder() 7175 7176 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7177 if self._match(TokenType.TRUE): 7178 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7179 if self._match(TokenType.FALSE): 7180 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7181 return self._parse_placeholder() 7182 7183 def _parse_star(self) -> t.Optional[exp.Expression]: 7184 if self._match(TokenType.STAR): 7185 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7186 return self._parse_placeholder() 7187 7188 def _parse_parameter(self) -> exp.Parameter: 7189 this = self._parse_identifier() or self._parse_primary_or_var() 7190 return self.expression(exp.Parameter, this=this) 7191 7192 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7193 if self._match_set(self.PLACEHOLDER_PARSERS): 7194 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7195 if placeholder: 7196 return placeholder 7197 self._advance(-1) 7198 return None 7199 7200 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7201 if not self._match_texts(keywords): 7202 return None 7203 if self._match(TokenType.L_PAREN, advance=False): 7204 return self._parse_wrapped_csv(self._parse_expression) 7205 7206 expression = self._parse_expression() 7207 return [expression] if expression else None 7208 7209 def _parse_csv( 7210 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7211 ) -> t.List[exp.Expression]: 7212 parse_result = parse_method() 7213 items = [parse_result] if parse_result is not None else [] 7214 7215 while self._match(sep): 7216 self._add_comments(parse_result) 7217 parse_result = parse_method() 7218 if parse_result is not None: 7219 items.append(parse_result) 7220 7221 return items 7222 7223 def _parse_tokens( 7224 self, parse_method: t.Callable, expressions: t.Dict 7225 ) -> t.Optional[exp.Expression]: 7226 this = parse_method() 7227 7228 while self._match_set(expressions): 7229 this = self.expression( 7230 expressions[self._prev.token_type], 7231 this=this, 7232 comments=self._prev_comments, 7233 expression=parse_method(), 7234 ) 7235 7236 return this 7237 7238 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7239 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7240 7241 def _parse_wrapped_csv( 7242 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7243 ) -> t.List[exp.Expression]: 7244 return self._parse_wrapped( 7245 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7246 ) 7247 7248 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7249 wrapped = self._match(TokenType.L_PAREN) 7250 if not wrapped and not optional: 7251 self.raise_error("Expecting (") 7252 parse_result = parse_method() 7253 if wrapped: 7254 self._match_r_paren() 7255 return parse_result 7256 7257 def _parse_expressions(self) -> t.List[exp.Expression]: 7258 return self._parse_csv(self._parse_expression) 7259 7260 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7261 return self._parse_select() or self._parse_set_operations( 7262 self._parse_alias(self._parse_assignment(), explicit=True) 7263 if alias 7264 else self._parse_assignment() 7265 ) 7266 7267 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7268 return self._parse_query_modifiers( 7269 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7270 ) 7271 7272 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7273 this = None 7274 if self._match_texts(self.TRANSACTION_KIND): 7275 this = self._prev.text 7276 7277 self._match_texts(("TRANSACTION", "WORK")) 7278 7279 modes = [] 7280 while True: 7281 mode = [] 7282 while self._match(TokenType.VAR): 7283 mode.append(self._prev.text) 7284 7285 if mode: 7286 modes.append(" ".join(mode)) 7287 if not self._match(TokenType.COMMA): 7288 break 7289 7290 return self.expression(exp.Transaction, this=this, modes=modes) 7291 7292 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7293 chain = None 7294 savepoint = None 7295 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7296 7297 self._match_texts(("TRANSACTION", "WORK")) 7298 7299 if self._match_text_seq("TO"): 7300 self._match_text_seq("SAVEPOINT") 7301 savepoint = self._parse_id_var() 7302 7303 if self._match(TokenType.AND): 7304 chain = not self._match_text_seq("NO") 7305 self._match_text_seq("CHAIN") 7306 7307 if is_rollback: 7308 return self.expression(exp.Rollback, savepoint=savepoint) 7309 7310 return self.expression(exp.Commit, chain=chain) 7311 7312 def _parse_refresh(self) -> exp.Refresh: 7313 self._match(TokenType.TABLE) 7314 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7315 7316 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7317 if not self._prev.text.upper() == "ADD": 7318 return None 7319 7320 start = self._index 7321 self._match(TokenType.COLUMN) 7322 7323 exists_column = self._parse_exists(not_=True) 7324 expression = self._parse_field_def() 7325 7326 if not isinstance(expression, exp.ColumnDef): 7327 self._retreat(start) 7328 return None 7329 7330 expression.set("exists", exists_column) 7331 7332 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7333 if self._match_texts(("FIRST", "AFTER")): 7334 position = self._prev.text 7335 column_position = self.expression( 7336 exp.ColumnPosition, this=self._parse_column(), position=position 7337 ) 7338 expression.set("position", column_position) 7339 7340 return expression 7341 7342 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7343 drop = self._match(TokenType.DROP) and self._parse_drop() 7344 if drop and not isinstance(drop, exp.Command): 7345 drop.set("kind", drop.args.get("kind", "COLUMN")) 7346 return drop 7347 7348 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7349 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7350 return self.expression( 7351 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7352 ) 7353 7354 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7355 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7356 self._match_text_seq("ADD") 7357 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7358 return self.expression( 7359 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7360 ) 7361 7362 column_def = self._parse_add_column() 7363 if isinstance(column_def, exp.ColumnDef): 7364 return column_def 7365 7366 exists = self._parse_exists(not_=True) 7367 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7368 return self.expression( 7369 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7370 ) 7371 7372 return None 7373 7374 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7375 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7376 or self._match_text_seq("COLUMNS") 7377 ): 7378 self._match(TokenType.COLUMN) 7379 7380 schema = self._parse_schema() 7381 7382 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7383 7384 return self._parse_csv(_parse_add_alteration) 7385 7386 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7387 if self._match_texts(self.ALTER_ALTER_PARSERS): 7388 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7389 7390 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7391 # keyword after ALTER we default to parsing this statement 7392 self._match(TokenType.COLUMN) 7393 column = self._parse_field(any_token=True) 7394 7395 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7396 return self.expression(exp.AlterColumn, this=column, drop=True) 7397 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7398 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7399 if self._match(TokenType.COMMENT): 7400 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7401 if self._match_text_seq("DROP", "NOT", "NULL"): 7402 return self.expression( 7403 exp.AlterColumn, 7404 this=column, 7405 drop=True, 7406 allow_null=True, 7407 ) 7408 if self._match_text_seq("SET", "NOT", "NULL"): 7409 return self.expression( 7410 exp.AlterColumn, 7411 this=column, 7412 allow_null=False, 7413 ) 7414 7415 if self._match_text_seq("SET", "VISIBLE"): 7416 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7417 if self._match_text_seq("SET", "INVISIBLE"): 7418 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7419 7420 self._match_text_seq("SET", "DATA") 7421 self._match_text_seq("TYPE") 7422 return self.expression( 7423 exp.AlterColumn, 7424 this=column, 7425 dtype=self._parse_types(), 7426 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7427 using=self._match(TokenType.USING) and self._parse_assignment(), 7428 ) 7429 7430 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7431 if self._match_texts(("ALL", "EVEN", "AUTO")): 7432 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7433 7434 self._match_text_seq("KEY", "DISTKEY") 7435 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7436 7437 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7438 if compound: 7439 self._match_text_seq("SORTKEY") 7440 7441 if self._match(TokenType.L_PAREN, advance=False): 7442 return self.expression( 7443 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7444 ) 7445 7446 self._match_texts(("AUTO", "NONE")) 7447 return self.expression( 7448 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7449 ) 7450 7451 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7452 index = self._index - 1 7453 7454 partition_exists = self._parse_exists() 7455 if self._match(TokenType.PARTITION, advance=False): 7456 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7457 7458 self._retreat(index) 7459 return self._parse_csv(self._parse_drop_column) 7460 7461 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7462 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7463 exists = self._parse_exists() 7464 old_column = self._parse_column() 7465 to = self._match_text_seq("TO") 7466 new_column = self._parse_column() 7467 7468 if old_column is None or to is None or new_column is None: 7469 return None 7470 7471 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7472 7473 self._match_text_seq("TO") 7474 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7475 7476 def _parse_alter_table_set(self) -> exp.AlterSet: 7477 alter_set = self.expression(exp.AlterSet) 7478 7479 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7480 "TABLE", "PROPERTIES" 7481 ): 7482 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7483 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7484 alter_set.set("expressions", [self._parse_assignment()]) 7485 elif self._match_texts(("LOGGED", "UNLOGGED")): 7486 alter_set.set("option", exp.var(self._prev.text.upper())) 7487 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7488 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7489 elif self._match_text_seq("LOCATION"): 7490 alter_set.set("location", self._parse_field()) 7491 elif self._match_text_seq("ACCESS", "METHOD"): 7492 alter_set.set("access_method", self._parse_field()) 7493 elif self._match_text_seq("TABLESPACE"): 7494 alter_set.set("tablespace", self._parse_field()) 7495 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7496 alter_set.set("file_format", [self._parse_field()]) 7497 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7498 alter_set.set("file_format", self._parse_wrapped_options()) 7499 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7500 alter_set.set("copy_options", self._parse_wrapped_options()) 7501 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7502 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7503 else: 7504 if self._match_text_seq("SERDE"): 7505 alter_set.set("serde", self._parse_field()) 7506 7507 properties = self._parse_wrapped(self._parse_properties, optional=True) 7508 alter_set.set("expressions", [properties]) 7509 7510 return alter_set 7511 7512 def _parse_alter(self) -> exp.Alter | exp.Command: 7513 start = self._prev 7514 7515 alter_token = self._match_set(self.ALTERABLES) and self._prev 7516 if not alter_token: 7517 return self._parse_as_command(start) 7518 7519 exists = self._parse_exists() 7520 only = self._match_text_seq("ONLY") 7521 this = self._parse_table(schema=True) 7522 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7523 7524 if self._next: 7525 self._advance() 7526 7527 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7528 if parser: 7529 actions = ensure_list(parser(self)) 7530 not_valid = self._match_text_seq("NOT", "VALID") 7531 options = self._parse_csv(self._parse_property) 7532 7533 if not self._curr and actions: 7534 return self.expression( 7535 exp.Alter, 7536 this=this, 7537 kind=alter_token.text.upper(), 7538 exists=exists, 7539 actions=actions, 7540 only=only, 7541 options=options, 7542 cluster=cluster, 7543 not_valid=not_valid, 7544 ) 7545 7546 return self._parse_as_command(start) 7547 7548 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7549 start = self._prev 7550 # https://duckdb.org/docs/sql/statements/analyze 7551 if not self._curr: 7552 return self.expression(exp.Analyze) 7553 7554 options = [] 7555 while self._match_texts(self.ANALYZE_STYLES): 7556 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7557 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7558 else: 7559 options.append(self._prev.text.upper()) 7560 7561 this: t.Optional[exp.Expression] = None 7562 inner_expression: t.Optional[exp.Expression] = None 7563 7564 kind = self._curr and self._curr.text.upper() 7565 7566 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7567 this = self._parse_table_parts() 7568 elif self._match_text_seq("TABLES"): 7569 if self._match_set((TokenType.FROM, TokenType.IN)): 7570 kind = f"{kind} {self._prev.text.upper()}" 7571 this = self._parse_table(schema=True, is_db_reference=True) 7572 elif self._match_text_seq("DATABASE"): 7573 this = self._parse_table(schema=True, is_db_reference=True) 7574 elif self._match_text_seq("CLUSTER"): 7575 this = self._parse_table() 7576 # Try matching inner expr keywords before fallback to parse table. 7577 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7578 kind = None 7579 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7580 else: 7581 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7582 kind = None 7583 this = self._parse_table_parts() 7584 7585 partition = self._try_parse(self._parse_partition) 7586 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7587 return self._parse_as_command(start) 7588 7589 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7590 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7591 "WITH", "ASYNC", "MODE" 7592 ): 7593 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7594 else: 7595 mode = None 7596 7597 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7598 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7599 7600 properties = self._parse_properties() 7601 return self.expression( 7602 exp.Analyze, 7603 kind=kind, 7604 this=this, 7605 mode=mode, 7606 partition=partition, 7607 properties=properties, 7608 expression=inner_expression, 7609 options=options, 7610 ) 7611 7612 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7613 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7614 this = None 7615 kind = self._prev.text.upper() 7616 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7617 expressions = [] 7618 7619 if not self._match_text_seq("STATISTICS"): 7620 self.raise_error("Expecting token STATISTICS") 7621 7622 if self._match_text_seq("NOSCAN"): 7623 this = "NOSCAN" 7624 elif self._match(TokenType.FOR): 7625 if self._match_text_seq("ALL", "COLUMNS"): 7626 this = "FOR ALL COLUMNS" 7627 if self._match_texts("COLUMNS"): 7628 this = "FOR COLUMNS" 7629 expressions = self._parse_csv(self._parse_column_reference) 7630 elif self._match_text_seq("SAMPLE"): 7631 sample = self._parse_number() 7632 expressions = [ 7633 self.expression( 7634 exp.AnalyzeSample, 7635 sample=sample, 7636 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7637 ) 7638 ] 7639 7640 return self.expression( 7641 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7642 ) 7643 7644 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7645 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7646 kind = None 7647 this = None 7648 expression: t.Optional[exp.Expression] = None 7649 if self._match_text_seq("REF", "UPDATE"): 7650 kind = "REF" 7651 this = "UPDATE" 7652 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7653 this = "UPDATE SET DANGLING TO NULL" 7654 elif self._match_text_seq("STRUCTURE"): 7655 kind = "STRUCTURE" 7656 if self._match_text_seq("CASCADE", "FAST"): 7657 this = "CASCADE FAST" 7658 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7659 ("ONLINE", "OFFLINE") 7660 ): 7661 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7662 expression = self._parse_into() 7663 7664 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7665 7666 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7667 this = self._prev.text.upper() 7668 if self._match_text_seq("COLUMNS"): 7669 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7670 return None 7671 7672 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7673 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7674 if self._match_text_seq("STATISTICS"): 7675 return self.expression(exp.AnalyzeDelete, kind=kind) 7676 return None 7677 7678 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7679 if self._match_text_seq("CHAINED", "ROWS"): 7680 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7681 return None 7682 7683 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7684 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7685 this = self._prev.text.upper() 7686 expression: t.Optional[exp.Expression] = None 7687 expressions = [] 7688 update_options = None 7689 7690 if self._match_text_seq("HISTOGRAM", "ON"): 7691 expressions = self._parse_csv(self._parse_column_reference) 7692 with_expressions = [] 7693 while self._match(TokenType.WITH): 7694 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7695 if self._match_texts(("SYNC", "ASYNC")): 7696 if self._match_text_seq("MODE", advance=False): 7697 with_expressions.append(f"{self._prev.text.upper()} MODE") 7698 self._advance() 7699 else: 7700 buckets = self._parse_number() 7701 if self._match_text_seq("BUCKETS"): 7702 with_expressions.append(f"{buckets} BUCKETS") 7703 if with_expressions: 7704 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7705 7706 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7707 TokenType.UPDATE, advance=False 7708 ): 7709 update_options = self._prev.text.upper() 7710 self._advance() 7711 elif self._match_text_seq("USING", "DATA"): 7712 expression = self.expression(exp.UsingData, this=self._parse_string()) 7713 7714 return self.expression( 7715 exp.AnalyzeHistogram, 7716 this=this, 7717 expressions=expressions, 7718 expression=expression, 7719 update_options=update_options, 7720 ) 7721 7722 def _parse_merge(self) -> exp.Merge: 7723 self._match(TokenType.INTO) 7724 target = self._parse_table() 7725 7726 if target and self._match(TokenType.ALIAS, advance=False): 7727 target.set("alias", self._parse_table_alias()) 7728 7729 self._match(TokenType.USING) 7730 using = self._parse_table() 7731 7732 self._match(TokenType.ON) 7733 on = self._parse_assignment() 7734 7735 return self.expression( 7736 exp.Merge, 7737 this=target, 7738 using=using, 7739 on=on, 7740 whens=self._parse_when_matched(), 7741 returning=self._parse_returning(), 7742 ) 7743 7744 def _parse_when_matched(self) -> exp.Whens: 7745 whens = [] 7746 7747 while self._match(TokenType.WHEN): 7748 matched = not self._match(TokenType.NOT) 7749 self._match_text_seq("MATCHED") 7750 source = ( 7751 False 7752 if self._match_text_seq("BY", "TARGET") 7753 else self._match_text_seq("BY", "SOURCE") 7754 ) 7755 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7756 7757 self._match(TokenType.THEN) 7758 7759 if self._match(TokenType.INSERT): 7760 this = self._parse_star() 7761 if this: 7762 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7763 else: 7764 then = self.expression( 7765 exp.Insert, 7766 this=exp.var("ROW") 7767 if self._match_text_seq("ROW") 7768 else self._parse_value(values=False), 7769 expression=self._match_text_seq("VALUES") and self._parse_value(), 7770 ) 7771 elif self._match(TokenType.UPDATE): 7772 expressions = self._parse_star() 7773 if expressions: 7774 then = self.expression(exp.Update, expressions=expressions) 7775 else: 7776 then = self.expression( 7777 exp.Update, 7778 expressions=self._match(TokenType.SET) 7779 and self._parse_csv(self._parse_equality), 7780 ) 7781 elif self._match(TokenType.DELETE): 7782 then = self.expression(exp.Var, this=self._prev.text) 7783 else: 7784 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7785 7786 whens.append( 7787 self.expression( 7788 exp.When, 7789 matched=matched, 7790 source=source, 7791 condition=condition, 7792 then=then, 7793 ) 7794 ) 7795 return self.expression(exp.Whens, expressions=whens) 7796 7797 def _parse_show(self) -> t.Optional[exp.Expression]: 7798 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7799 if parser: 7800 return parser(self) 7801 return self._parse_as_command(self._prev) 7802 7803 def _parse_set_item_assignment( 7804 self, kind: t.Optional[str] = None 7805 ) -> t.Optional[exp.Expression]: 7806 index = self._index 7807 7808 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7809 return self._parse_set_transaction(global_=kind == "GLOBAL") 7810 7811 left = self._parse_primary() or self._parse_column() 7812 assignment_delimiter = self._match_texts(("=", "TO")) 7813 7814 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7815 self._retreat(index) 7816 return None 7817 7818 right = self._parse_statement() or self._parse_id_var() 7819 if isinstance(right, (exp.Column, exp.Identifier)): 7820 right = exp.var(right.name) 7821 7822 this = self.expression(exp.EQ, this=left, expression=right) 7823 return self.expression(exp.SetItem, this=this, kind=kind) 7824 7825 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7826 self._match_text_seq("TRANSACTION") 7827 characteristics = self._parse_csv( 7828 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7829 ) 7830 return self.expression( 7831 exp.SetItem, 7832 expressions=characteristics, 7833 kind="TRANSACTION", 7834 **{"global": global_}, # type: ignore 7835 ) 7836 7837 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7838 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7839 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7840 7841 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7842 index = self._index 7843 set_ = self.expression( 7844 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7845 ) 7846 7847 if self._curr: 7848 self._retreat(index) 7849 return self._parse_as_command(self._prev) 7850 7851 return set_ 7852 7853 def _parse_var_from_options( 7854 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7855 ) -> t.Optional[exp.Var]: 7856 start = self._curr 7857 if not start: 7858 return None 7859 7860 option = start.text.upper() 7861 continuations = options.get(option) 7862 7863 index = self._index 7864 self._advance() 7865 for keywords in continuations or []: 7866 if isinstance(keywords, str): 7867 keywords = (keywords,) 7868 7869 if self._match_text_seq(*keywords): 7870 option = f"{option} {' '.join(keywords)}" 7871 break 7872 else: 7873 if continuations or continuations is None: 7874 if raise_unmatched: 7875 self.raise_error(f"Unknown option {option}") 7876 7877 self._retreat(index) 7878 return None 7879 7880 return exp.var(option) 7881 7882 def _parse_as_command(self, start: Token) -> exp.Command: 7883 while self._curr: 7884 self._advance() 7885 text = self._find_sql(start, self._prev) 7886 size = len(start.text) 7887 self._warn_unsupported() 7888 return exp.Command(this=text[:size], expression=text[size:]) 7889 7890 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7891 settings = [] 7892 7893 self._match_l_paren() 7894 kind = self._parse_id_var() 7895 7896 if self._match(TokenType.L_PAREN): 7897 while True: 7898 key = self._parse_id_var() 7899 value = self._parse_primary() 7900 if not key and value is None: 7901 break 7902 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7903 self._match(TokenType.R_PAREN) 7904 7905 self._match_r_paren() 7906 7907 return self.expression( 7908 exp.DictProperty, 7909 this=this, 7910 kind=kind.this if kind else None, 7911 settings=settings, 7912 ) 7913 7914 def _parse_dict_range(self, this: str) -> exp.DictRange: 7915 self._match_l_paren() 7916 has_min = self._match_text_seq("MIN") 7917 if has_min: 7918 min = self._parse_var() or self._parse_primary() 7919 self._match_text_seq("MAX") 7920 max = self._parse_var() or self._parse_primary() 7921 else: 7922 max = self._parse_var() or self._parse_primary() 7923 min = exp.Literal.number(0) 7924 self._match_r_paren() 7925 return self.expression(exp.DictRange, this=this, min=min, max=max) 7926 7927 def _parse_comprehension( 7928 self, this: t.Optional[exp.Expression] 7929 ) -> t.Optional[exp.Comprehension]: 7930 index = self._index 7931 expression = self._parse_column() 7932 if not self._match(TokenType.IN): 7933 self._retreat(index - 1) 7934 return None 7935 iterator = self._parse_column() 7936 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7937 return self.expression( 7938 exp.Comprehension, 7939 this=this, 7940 expression=expression, 7941 iterator=iterator, 7942 condition=condition, 7943 ) 7944 7945 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7946 if self._match(TokenType.HEREDOC_STRING): 7947 return self.expression(exp.Heredoc, this=self._prev.text) 7948 7949 if not self._match_text_seq("$"): 7950 return None 7951 7952 tags = ["$"] 7953 tag_text = None 7954 7955 if self._is_connected(): 7956 self._advance() 7957 tags.append(self._prev.text.upper()) 7958 else: 7959 self.raise_error("No closing $ found") 7960 7961 if tags[-1] != "$": 7962 if self._is_connected() and self._match_text_seq("$"): 7963 tag_text = tags[-1] 7964 tags.append("$") 7965 else: 7966 self.raise_error("No closing $ found") 7967 7968 heredoc_start = self._curr 7969 7970 while self._curr: 7971 if self._match_text_seq(*tags, advance=False): 7972 this = self._find_sql(heredoc_start, self._prev) 7973 self._advance(len(tags)) 7974 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7975 7976 self._advance() 7977 7978 self.raise_error(f"No closing {''.join(tags)} found") 7979 return None 7980 7981 def _find_parser( 7982 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7983 ) -> t.Optional[t.Callable]: 7984 if not self._curr: 7985 return None 7986 7987 index = self._index 7988 this = [] 7989 while True: 7990 # The current token might be multiple words 7991 curr = self._curr.text.upper() 7992 key = curr.split(" ") 7993 this.append(curr) 7994 7995 self._advance() 7996 result, trie = in_trie(trie, key) 7997 if result == TrieResult.FAILED: 7998 break 7999 8000 if result == TrieResult.EXISTS: 8001 subparser = parsers[" ".join(this)] 8002 return subparser 8003 8004 self._retreat(index) 8005 return None 8006 8007 def _match(self, token_type, advance=True, expression=None): 8008 if not self._curr: 8009 return None 8010 8011 if self._curr.token_type == token_type: 8012 if advance: 8013 self._advance() 8014 self._add_comments(expression) 8015 return True 8016 8017 return None 8018 8019 def _match_set(self, types, advance=True): 8020 if not self._curr: 8021 return None 8022 8023 if self._curr.token_type in types: 8024 if advance: 8025 self._advance() 8026 return True 8027 8028 return None 8029 8030 def _match_pair(self, token_type_a, token_type_b, advance=True): 8031 if not self._curr or not self._next: 8032 return None 8033 8034 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8035 if advance: 8036 self._advance(2) 8037 return True 8038 8039 return None 8040 8041 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8042 if not self._match(TokenType.L_PAREN, expression=expression): 8043 self.raise_error("Expecting (") 8044 8045 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8046 if not self._match(TokenType.R_PAREN, expression=expression): 8047 self.raise_error("Expecting )") 8048 8049 def _match_texts(self, texts, advance=True): 8050 if ( 8051 self._curr 8052 and self._curr.token_type != TokenType.STRING 8053 and self._curr.text.upper() in texts 8054 ): 8055 if advance: 8056 self._advance() 8057 return True 8058 return None 8059 8060 def _match_text_seq(self, *texts, advance=True): 8061 index = self._index 8062 for text in texts: 8063 if ( 8064 self._curr 8065 and self._curr.token_type != TokenType.STRING 8066 and self._curr.text.upper() == text 8067 ): 8068 self._advance() 8069 else: 8070 self._retreat(index) 8071 return None 8072 8073 if not advance: 8074 self._retreat(index) 8075 8076 return True 8077 8078 def _replace_lambda( 8079 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8080 ) -> t.Optional[exp.Expression]: 8081 if not node: 8082 return node 8083 8084 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8085 8086 for column in node.find_all(exp.Column): 8087 typ = lambda_types.get(column.parts[0].name) 8088 if typ is not None: 8089 dot_or_id = column.to_dot() if column.table else column.this 8090 8091 if typ: 8092 dot_or_id = self.expression( 8093 exp.Cast, 8094 this=dot_or_id, 8095 to=typ, 8096 ) 8097 8098 parent = column.parent 8099 8100 while isinstance(parent, exp.Dot): 8101 if not isinstance(parent.parent, exp.Dot): 8102 parent.replace(dot_or_id) 8103 break 8104 parent = parent.parent 8105 else: 8106 if column is node: 8107 node = dot_or_id 8108 else: 8109 column.replace(dot_or_id) 8110 return node 8111 8112 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8113 start = self._prev 8114 8115 # Not to be confused with TRUNCATE(number, decimals) function call 8116 if self._match(TokenType.L_PAREN): 8117 self._retreat(self._index - 2) 8118 return self._parse_function() 8119 8120 # Clickhouse supports TRUNCATE DATABASE as well 8121 is_database = self._match(TokenType.DATABASE) 8122 8123 self._match(TokenType.TABLE) 8124 8125 exists = self._parse_exists(not_=False) 8126 8127 expressions = self._parse_csv( 8128 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8129 ) 8130 8131 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8132 8133 if self._match_text_seq("RESTART", "IDENTITY"): 8134 identity = "RESTART" 8135 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8136 identity = "CONTINUE" 8137 else: 8138 identity = None 8139 8140 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8141 option = self._prev.text 8142 else: 8143 option = None 8144 8145 partition = self._parse_partition() 8146 8147 # Fallback case 8148 if self._curr: 8149 return self._parse_as_command(start) 8150 8151 return self.expression( 8152 exp.TruncateTable, 8153 expressions=expressions, 8154 is_database=is_database, 8155 exists=exists, 8156 cluster=cluster, 8157 identity=identity, 8158 option=option, 8159 partition=partition, 8160 ) 8161 8162 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8163 this = self._parse_ordered(self._parse_opclass) 8164 8165 if not self._match(TokenType.WITH): 8166 return this 8167 8168 op = self._parse_var(any_token=True) 8169 8170 return self.expression(exp.WithOperator, this=this, op=op) 8171 8172 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8173 self._match(TokenType.EQ) 8174 self._match(TokenType.L_PAREN) 8175 8176 opts: t.List[t.Optional[exp.Expression]] = [] 8177 option: exp.Expression | None 8178 while self._curr and not self._match(TokenType.R_PAREN): 8179 if self._match_text_seq("FORMAT_NAME", "="): 8180 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8181 option = self._parse_format_name() 8182 else: 8183 option = self._parse_property() 8184 8185 if option is None: 8186 self.raise_error("Unable to parse option") 8187 break 8188 8189 opts.append(option) 8190 8191 return opts 8192 8193 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8194 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8195 8196 options = [] 8197 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8198 option = self._parse_var(any_token=True) 8199 prev = self._prev.text.upper() 8200 8201 # Different dialects might separate options and values by white space, "=" and "AS" 8202 self._match(TokenType.EQ) 8203 self._match(TokenType.ALIAS) 8204 8205 param = self.expression(exp.CopyParameter, this=option) 8206 8207 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8208 TokenType.L_PAREN, advance=False 8209 ): 8210 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8211 param.set("expressions", self._parse_wrapped_options()) 8212 elif prev == "FILE_FORMAT": 8213 # T-SQL's external file format case 8214 param.set("expression", self._parse_field()) 8215 else: 8216 param.set("expression", self._parse_unquoted_field()) 8217 8218 options.append(param) 8219 self._match(sep) 8220 8221 return options 8222 8223 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8224 expr = self.expression(exp.Credentials) 8225 8226 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8227 expr.set("storage", self._parse_field()) 8228 if self._match_text_seq("CREDENTIALS"): 8229 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8230 creds = ( 8231 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8232 ) 8233 expr.set("credentials", creds) 8234 if self._match_text_seq("ENCRYPTION"): 8235 expr.set("encryption", self._parse_wrapped_options()) 8236 if self._match_text_seq("IAM_ROLE"): 8237 expr.set("iam_role", self._parse_field()) 8238 if self._match_text_seq("REGION"): 8239 expr.set("region", self._parse_field()) 8240 8241 return expr 8242 8243 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8244 return self._parse_field() 8245 8246 def _parse_copy(self) -> exp.Copy | exp.Command: 8247 start = self._prev 8248 8249 self._match(TokenType.INTO) 8250 8251 this = ( 8252 self._parse_select(nested=True, parse_subquery_alias=False) 8253 if self._match(TokenType.L_PAREN, advance=False) 8254 else self._parse_table(schema=True) 8255 ) 8256 8257 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8258 8259 files = self._parse_csv(self._parse_file_location) 8260 credentials = self._parse_credentials() 8261 8262 self._match_text_seq("WITH") 8263 8264 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8265 8266 # Fallback case 8267 if self._curr: 8268 return self._parse_as_command(start) 8269 8270 return self.expression( 8271 exp.Copy, 8272 this=this, 8273 kind=kind, 8274 credentials=credentials, 8275 files=files, 8276 params=params, 8277 ) 8278 8279 def _parse_normalize(self) -> exp.Normalize: 8280 return self.expression( 8281 exp.Normalize, 8282 this=self._parse_bitwise(), 8283 form=self._match(TokenType.COMMA) and self._parse_var(), 8284 ) 8285 8286 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8287 args = self._parse_csv(lambda: self._parse_lambda()) 8288 8289 this = seq_get(args, 0) 8290 decimals = seq_get(args, 1) 8291 8292 return expr_type( 8293 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8294 ) 8295 8296 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8297 star_token = self._prev 8298 8299 if self._match_text_seq("COLUMNS", "(", advance=False): 8300 this = self._parse_function() 8301 if isinstance(this, exp.Columns): 8302 this.set("unpack", True) 8303 return this 8304 8305 return self.expression( 8306 exp.Star, 8307 **{ # type: ignore 8308 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8309 "replace": self._parse_star_op("REPLACE"), 8310 "rename": self._parse_star_op("RENAME"), 8311 }, 8312 ).update_positions(star_token) 8313 8314 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8315 privilege_parts = [] 8316 8317 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8318 # (end of privilege list) or L_PAREN (start of column list) are met 8319 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8320 privilege_parts.append(self._curr.text.upper()) 8321 self._advance() 8322 8323 this = exp.var(" ".join(privilege_parts)) 8324 expressions = ( 8325 self._parse_wrapped_csv(self._parse_column) 8326 if self._match(TokenType.L_PAREN, advance=False) 8327 else None 8328 ) 8329 8330 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8331 8332 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8333 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8334 principal = self._parse_id_var() 8335 8336 if not principal: 8337 return None 8338 8339 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8340 8341 def _parse_grant(self) -> exp.Grant | exp.Command: 8342 start = self._prev 8343 8344 privileges = self._parse_csv(self._parse_grant_privilege) 8345 8346 self._match(TokenType.ON) 8347 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8348 8349 # Attempt to parse the securable e.g. MySQL allows names 8350 # such as "foo.*", "*.*" which are not easily parseable yet 8351 securable = self._try_parse(self._parse_table_parts) 8352 8353 if not securable or not self._match_text_seq("TO"): 8354 return self._parse_as_command(start) 8355 8356 principals = self._parse_csv(self._parse_grant_principal) 8357 8358 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8359 8360 if self._curr: 8361 return self._parse_as_command(start) 8362 8363 return self.expression( 8364 exp.Grant, 8365 privileges=privileges, 8366 kind=kind, 8367 securable=securable, 8368 principals=principals, 8369 grant_option=grant_option, 8370 ) 8371 8372 def _parse_overlay(self) -> exp.Overlay: 8373 return self.expression( 8374 exp.Overlay, 8375 **{ # type: ignore 8376 "this": self._parse_bitwise(), 8377 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8378 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8379 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8380 }, 8381 ) 8382 8383 def _parse_format_name(self) -> exp.Property: 8384 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8385 # for FILE_FORMAT = <format_name> 8386 return self.expression( 8387 exp.Property, 8388 this=exp.var("FORMAT_NAME"), 8389 value=self._parse_string() or self._parse_table_parts(), 8390 ) 8391 8392 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8393 args: t.List[exp.Expression] = [] 8394 8395 if self._match(TokenType.DISTINCT): 8396 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8397 self._match(TokenType.COMMA) 8398 8399 args.extend(self._parse_csv(self._parse_assignment)) 8400 8401 return self.expression( 8402 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8403 ) 8404 8405 def _identifier_expression( 8406 self, token: t.Optional[Token] = None, **kwargs: t.Any 8407 ) -> exp.Identifier: 8408 token = token or self._prev 8409 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8410 expression.update_positions(token) 8411 return expression 8412 8413 def _build_pipe_cte( 8414 self, 8415 query: exp.Query, 8416 expressions: t.List[exp.Expression], 8417 alias_cte: t.Optional[exp.TableAlias] = None, 8418 ) -> exp.Select: 8419 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8420 if alias_cte: 8421 new_cte = alias_cte 8422 else: 8423 self._pipe_cte_counter += 1 8424 new_cte = f"__tmp{self._pipe_cte_counter}" 8425 8426 with_ = query.args.get("with") 8427 ctes = with_.pop() if with_ else None 8428 8429 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8430 if ctes: 8431 new_select.set("with", ctes) 8432 8433 return new_select.with_(new_cte, as_=query, copy=False) 8434 8435 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8436 select = self._parse_select(consume_pipe=False) 8437 if not select: 8438 return query 8439 8440 return self._build_pipe_cte( 8441 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8442 ) 8443 8444 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8445 limit = self._parse_limit() 8446 offset = self._parse_offset() 8447 if limit: 8448 curr_limit = query.args.get("limit", limit) 8449 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8450 query.limit(limit, copy=False) 8451 if offset: 8452 curr_offset = query.args.get("offset") 8453 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8454 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8455 8456 return query 8457 8458 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8459 this = self._parse_assignment() 8460 if self._match_text_seq("GROUP", "AND", advance=False): 8461 return this 8462 8463 this = self._parse_alias(this) 8464 8465 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8466 return self._parse_ordered(lambda: this) 8467 8468 return this 8469 8470 def _parse_pipe_syntax_aggregate_group_order_by( 8471 self, query: exp.Select, group_by_exists: bool = True 8472 ) -> exp.Select: 8473 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8474 aggregates_or_groups, orders = [], [] 8475 for element in expr: 8476 if isinstance(element, exp.Ordered): 8477 this = element.this 8478 if isinstance(this, exp.Alias): 8479 element.set("this", this.args["alias"]) 8480 orders.append(element) 8481 else: 8482 this = element 8483 aggregates_or_groups.append(this) 8484 8485 if group_by_exists: 8486 query.select(*aggregates_or_groups, copy=False).group_by( 8487 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8488 copy=False, 8489 ) 8490 else: 8491 query.select(*aggregates_or_groups, append=False, copy=False) 8492 8493 if orders: 8494 return query.order_by(*orders, append=False, copy=False) 8495 8496 return query 8497 8498 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8499 self._match_text_seq("AGGREGATE") 8500 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8501 8502 if self._match(TokenType.GROUP_BY) or ( 8503 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8504 ): 8505 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8506 8507 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8508 8509 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8510 first_setop = self.parse_set_operation(this=query) 8511 if not first_setop: 8512 return None 8513 8514 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8515 expr = self._parse_paren() 8516 return expr.assert_is(exp.Subquery).unnest() if expr else None 8517 8518 first_setop.this.pop() 8519 8520 setops = [ 8521 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8522 *self._parse_csv(_parse_and_unwrap_query), 8523 ] 8524 8525 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8526 with_ = query.args.get("with") 8527 ctes = with_.pop() if with_ else None 8528 8529 if isinstance(first_setop, exp.Union): 8530 query = query.union(*setops, copy=False, **first_setop.args) 8531 elif isinstance(first_setop, exp.Except): 8532 query = query.except_(*setops, copy=False, **first_setop.args) 8533 else: 8534 query = query.intersect(*setops, copy=False, **first_setop.args) 8535 8536 query.set("with", ctes) 8537 8538 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8539 8540 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8541 join = self._parse_join() 8542 if not join: 8543 return None 8544 8545 if isinstance(query, exp.Select): 8546 return query.join(join, copy=False) 8547 8548 return query 8549 8550 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8551 pivots = self._parse_pivots() 8552 if not pivots: 8553 return query 8554 8555 from_ = query.args.get("from") 8556 if from_: 8557 from_.this.set("pivots", pivots) 8558 8559 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8560 8561 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8562 self._match_text_seq("EXTEND") 8563 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8564 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8565 8566 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8567 sample = self._parse_table_sample() 8568 8569 with_ = query.args.get("with") 8570 if with_: 8571 with_.expressions[-1].this.set("sample", sample) 8572 else: 8573 query.set("sample", sample) 8574 8575 return query 8576 8577 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8578 if isinstance(query, exp.Subquery): 8579 query = exp.select("*").from_(query, copy=False) 8580 8581 if not query.args.get("from"): 8582 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8583 8584 while self._match(TokenType.PIPE_GT): 8585 start = self._curr 8586 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8587 if not parser: 8588 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8589 # keywords, making it tricky to disambiguate them without lookahead. The approach 8590 # here is to try and parse a set operation and if that fails, then try to parse a 8591 # join operator. If that fails as well, then the operator is not supported. 8592 parsed_query = self._parse_pipe_syntax_set_operator(query) 8593 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8594 if not parsed_query: 8595 self._retreat(start) 8596 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8597 break 8598 query = parsed_query 8599 else: 8600 query = parser(self, query) 8601 8602 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1559 def __init__( 1560 self, 1561 error_level: t.Optional[ErrorLevel] = None, 1562 error_message_context: int = 100, 1563 max_errors: int = 3, 1564 dialect: DialectType = None, 1565 ): 1566 from sqlglot.dialects import Dialect 1567 1568 self.error_level = error_level or ErrorLevel.IMMEDIATE 1569 self.error_message_context = error_message_context 1570 self.max_errors = max_errors 1571 self.dialect = Dialect.get_or_raise(dialect) 1572 self.reset()
1585 def parse( 1586 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1587 ) -> t.List[t.Optional[exp.Expression]]: 1588 """ 1589 Parses a list of tokens and returns a list of syntax trees, one tree 1590 per parsed SQL statement. 1591 1592 Args: 1593 raw_tokens: The list of tokens. 1594 sql: The original SQL string, used to produce helpful debug messages. 1595 1596 Returns: 1597 The list of the produced syntax trees. 1598 """ 1599 return self._parse( 1600 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1601 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1603 def parse_into( 1604 self, 1605 expression_types: exp.IntoType, 1606 raw_tokens: t.List[Token], 1607 sql: t.Optional[str] = None, 1608 ) -> t.List[t.Optional[exp.Expression]]: 1609 """ 1610 Parses a list of tokens into a given Expression type. If a collection of Expression 1611 types is given instead, this method will try to parse the token list into each one 1612 of them, stopping at the first for which the parsing succeeds. 1613 1614 Args: 1615 expression_types: The expression type(s) to try and parse the token list into. 1616 raw_tokens: The list of tokens. 1617 sql: The original SQL string, used to produce helpful debug messages. 1618 1619 Returns: 1620 The target Expression. 1621 """ 1622 errors = [] 1623 for expression_type in ensure_list(expression_types): 1624 parser = self.EXPRESSION_PARSERS.get(expression_type) 1625 if not parser: 1626 raise TypeError(f"No parser registered for {expression_type}") 1627 1628 try: 1629 return self._parse(parser, raw_tokens, sql) 1630 except ParseError as e: 1631 e.errors[0]["into_expression"] = expression_type 1632 errors.append(e) 1633 1634 raise ParseError( 1635 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1636 errors=merge_errors(errors), 1637 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1677 def check_errors(self) -> None: 1678 """Logs or raises any found errors, depending on the chosen error level setting.""" 1679 if self.error_level == ErrorLevel.WARN: 1680 for error in self.errors: 1681 logger.error(str(error)) 1682 elif self.error_level == ErrorLevel.RAISE and self.errors: 1683 raise ParseError( 1684 concat_messages(self.errors, self.max_errors), 1685 errors=merge_errors(self.errors), 1686 )
Logs or raises any found errors, depending on the chosen error level setting.
1688 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1689 """ 1690 Appends an error in the list of recorded errors or raises it, depending on the chosen 1691 error level setting. 1692 """ 1693 token = token or self._curr or self._prev or Token.string("") 1694 start = token.start 1695 end = token.end + 1 1696 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1697 highlight = self.sql[start:end] 1698 end_context = self.sql[end : end + self.error_message_context] 1699 1700 error = ParseError.new( 1701 f"{message}. Line {token.line}, Col: {token.col}.\n" 1702 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1703 description=message, 1704 line=token.line, 1705 col=token.col, 1706 start_context=start_context, 1707 highlight=highlight, 1708 end_context=end_context, 1709 ) 1710 1711 if self.error_level == ErrorLevel.IMMEDIATE: 1712 raise error 1713 1714 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1716 def expression( 1717 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1718 ) -> E: 1719 """ 1720 Creates a new, validated Expression. 1721 1722 Args: 1723 exp_class: The expression class to instantiate. 1724 comments: An optional list of comments to attach to the expression. 1725 kwargs: The arguments to set for the expression along with their respective values. 1726 1727 Returns: 1728 The target expression. 1729 """ 1730 instance = exp_class(**kwargs) 1731 instance.add_comments(comments) if comments else self._add_comments(instance) 1732 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1739 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1740 """ 1741 Validates an Expression, making sure that all its mandatory arguments are set. 1742 1743 Args: 1744 expression: The expression to validate. 1745 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1746 1747 Returns: 1748 The validated expression. 1749 """ 1750 if self.error_level != ErrorLevel.IGNORE: 1751 for error_message in expression.error_messages(args): 1752 self.raise_error(error_message) 1753 1754 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4793 def parse_set_operation( 4794 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4795 ) -> t.Optional[exp.Expression]: 4796 start = self._index 4797 _, side_token, kind_token = self._parse_join_parts() 4798 4799 side = side_token.text if side_token else None 4800 kind = kind_token.text if kind_token else None 4801 4802 if not self._match_set(self.SET_OPERATIONS): 4803 self._retreat(start) 4804 return None 4805 4806 token_type = self._prev.token_type 4807 4808 if token_type == TokenType.UNION: 4809 operation: t.Type[exp.SetOperation] = exp.Union 4810 elif token_type == TokenType.EXCEPT: 4811 operation = exp.Except 4812 else: 4813 operation = exp.Intersect 4814 4815 comments = self._prev.comments 4816 4817 if self._match(TokenType.DISTINCT): 4818 distinct: t.Optional[bool] = True 4819 elif self._match(TokenType.ALL): 4820 distinct = False 4821 else: 4822 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4823 if distinct is None: 4824 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4825 4826 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4827 "STRICT", "CORRESPONDING" 4828 ) 4829 if self._match_text_seq("CORRESPONDING"): 4830 by_name = True 4831 if not side and not kind: 4832 kind = "INNER" 4833 4834 on_column_list = None 4835 if by_name and self._match_texts(("ON", "BY")): 4836 on_column_list = self._parse_wrapped_csv(self._parse_column) 4837 4838 expression = self._parse_select( 4839 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4840 ) 4841 4842 return self.expression( 4843 operation, 4844 comments=comments, 4845 this=this, 4846 distinct=distinct, 4847 by_name=by_name, 4848 expression=expression, 4849 side=side, 4850 kind=kind, 4851 on=on_column_list, 4852 )