sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 947 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 948 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 949 } 950 951 PIPE_SYNTAX_TRANSFORM_PARSERS = { 952 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 953 "AS": lambda self, query: self._build_pipe_cte( 954 query, [exp.Star()], self._parse_table_alias() 955 ), 956 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 957 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 958 "ORDER BY": lambda self, query: query.order_by( 959 self._parse_order(), append=False, copy=False 960 ), 961 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 963 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 964 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 966 } 967 968 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 969 "ALLOWED_VALUES": lambda self: self.expression( 970 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 971 ), 972 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 973 "AUTO": lambda self: self._parse_auto_property(), 974 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 975 "BACKUP": lambda self: self.expression( 976 exp.BackupProperty, this=self._parse_var(any_token=True) 977 ), 978 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 979 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHECKSUM": lambda self: self._parse_checksum(), 982 "CLUSTER BY": lambda self: self._parse_cluster(), 983 "CLUSTERED": lambda self: self._parse_clustered_by(), 984 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 985 exp.CollateProperty, **kwargs 986 ), 987 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 988 "CONTAINS": lambda self: self._parse_contains_property(), 989 "COPY": lambda self: self._parse_copy_property(), 990 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 991 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 992 "DEFINER": lambda self: self._parse_definer(), 993 "DETERMINISTIC": lambda self: self.expression( 994 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 995 ), 996 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 997 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 998 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 999 "DISTKEY": lambda self: self._parse_distkey(), 1000 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1001 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1002 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1003 "ENVIRONMENT": lambda self: self.expression( 1004 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1005 ), 1006 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1007 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1008 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1009 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1010 "FREESPACE": lambda self: self._parse_freespace(), 1011 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1012 "HEAP": lambda self: self.expression(exp.HeapProperty), 1013 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1014 "IMMUTABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1016 ), 1017 "INHERITS": lambda self: self.expression( 1018 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1019 ), 1020 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1021 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1022 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1023 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1024 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1027 "LOCK": lambda self: self._parse_locking(), 1028 "LOCKING": lambda self: self._parse_locking(), 1029 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1030 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1031 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1032 "MODIFIES": lambda self: self._parse_modifies_property(), 1033 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1034 "NO": lambda self: self._parse_no_property(), 1035 "ON": lambda self: self._parse_on_property(), 1036 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1037 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1038 "PARTITION": lambda self: self._parse_partitioned_of(), 1039 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1043 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1044 "READS": lambda self: self._parse_reads_property(), 1045 "REMOTE": lambda self: self._parse_remote_with_connection(), 1046 "RETURNS": lambda self: self._parse_returns(), 1047 "STRICT": lambda self: self.expression(exp.StrictProperty), 1048 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1049 "ROW": lambda self: self._parse_row(), 1050 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1051 "SAMPLE": lambda self: self.expression( 1052 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1053 ), 1054 "SECURE": lambda self: self.expression(exp.SecureProperty), 1055 "SECURITY": lambda self: self._parse_security(), 1056 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1057 "SETTINGS": lambda self: self._parse_settings_property(), 1058 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1059 "SORTKEY": lambda self: self._parse_sortkey(), 1060 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1061 "STABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1063 ), 1064 "STORED": lambda self: self._parse_stored(), 1065 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1066 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1067 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1068 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1069 "TO": lambda self: self._parse_to_table(), 1070 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1071 "TRANSFORM": lambda self: self.expression( 1072 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1073 ), 1074 "TTL": lambda self: self._parse_ttl(), 1075 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1076 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1077 "VOLATILE": lambda self: self._parse_volatile_property(), 1078 "WITH": lambda self: self._parse_with_property(), 1079 } 1080 1081 CONSTRAINT_PARSERS = { 1082 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1083 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1084 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1085 "CHARACTER SET": lambda self: self.expression( 1086 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "CHECK": lambda self: self.expression( 1089 exp.CheckColumnConstraint, 1090 this=self._parse_wrapped(self._parse_assignment), 1091 enforced=self._match_text_seq("ENFORCED"), 1092 ), 1093 "COLLATE": lambda self: self.expression( 1094 exp.CollateColumnConstraint, 1095 this=self._parse_identifier() or self._parse_column(), 1096 ), 1097 "COMMENT": lambda self: self.expression( 1098 exp.CommentColumnConstraint, this=self._parse_string() 1099 ), 1100 "COMPRESS": lambda self: self._parse_compress(), 1101 "CLUSTERED": lambda self: self.expression( 1102 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "NONCLUSTERED": lambda self: self.expression( 1105 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "DEFAULT": lambda self: self.expression( 1108 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1111 "EPHEMERAL": lambda self: self.expression( 1112 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1113 ), 1114 "EXCLUDE": lambda self: self.expression( 1115 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1116 ), 1117 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1118 "FORMAT": lambda self: self.expression( 1119 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1120 ), 1121 "GENERATED": lambda self: self._parse_generated_as_identity(), 1122 "IDENTITY": lambda self: self._parse_auto_increment(), 1123 "INLINE": lambda self: self._parse_inline(), 1124 "LIKE": lambda self: self._parse_create_like(), 1125 "NOT": lambda self: self._parse_not_constraint(), 1126 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1127 "ON": lambda self: ( 1128 self._match(TokenType.UPDATE) 1129 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1130 ) 1131 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1132 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1133 "PERIOD": lambda self: self._parse_period_for_system_time(), 1134 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1135 "REFERENCES": lambda self: self._parse_references(match=False), 1136 "TITLE": lambda self: self.expression( 1137 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1138 ), 1139 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1140 "UNIQUE": lambda self: self._parse_unique(), 1141 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1142 "WATERMARK": lambda self: self.expression( 1143 exp.WatermarkColumnConstraint, 1144 this=self._match(TokenType.FOR) and self._parse_column(), 1145 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1146 ), 1147 "WITH": lambda self: self.expression( 1148 exp.Properties, expressions=self._parse_wrapped_properties() 1149 ), 1150 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 } 1153 1154 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1155 if not self._match(TokenType.L_PAREN, advance=False): 1156 # Partitioning by bucket or truncate follows the syntax: 1157 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1158 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1159 self._retreat(self._index - 1) 1160 return None 1161 1162 klass = ( 1163 exp.PartitionedByBucket 1164 if self._prev.text.upper() == "BUCKET" 1165 else exp.PartitionByTruncate 1166 ) 1167 1168 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1169 this, expression = seq_get(args, 0), seq_get(args, 1) 1170 1171 if isinstance(this, exp.Literal): 1172 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1173 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1174 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1175 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1176 # 1177 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1178 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1179 this, expression = expression, this 1180 1181 return self.expression(klass, this=this, expression=expression) 1182 1183 ALTER_PARSERS = { 1184 "ADD": lambda self: self._parse_alter_table_add(), 1185 "AS": lambda self: self._parse_select(), 1186 "ALTER": lambda self: self._parse_alter_table_alter(), 1187 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1188 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1189 "DROP": lambda self: self._parse_alter_table_drop(), 1190 "RENAME": lambda self: self._parse_alter_table_rename(), 1191 "SET": lambda self: self._parse_alter_table_set(), 1192 "SWAP": lambda self: self.expression( 1193 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1194 ), 1195 } 1196 1197 ALTER_ALTER_PARSERS = { 1198 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1199 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1200 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1201 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1202 } 1203 1204 SCHEMA_UNNAMED_CONSTRAINTS = { 1205 "CHECK", 1206 "EXCLUDE", 1207 "FOREIGN KEY", 1208 "LIKE", 1209 "PERIOD", 1210 "PRIMARY KEY", 1211 "UNIQUE", 1212 "WATERMARK", 1213 "BUCKET", 1214 "TRUNCATE", 1215 } 1216 1217 NO_PAREN_FUNCTION_PARSERS = { 1218 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1219 "CASE": lambda self: self._parse_case(), 1220 "CONNECT_BY_ROOT": lambda self: self.expression( 1221 exp.ConnectByRoot, this=self._parse_column() 1222 ), 1223 "IF": lambda self: self._parse_if(), 1224 } 1225 1226 INVALID_FUNC_NAME_TOKENS = { 1227 TokenType.IDENTIFIER, 1228 TokenType.STRING, 1229 } 1230 1231 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1232 1233 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1234 1235 FUNCTION_PARSERS = { 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1238 }, 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1241 }, 1242 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1243 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1244 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1245 "DECODE": lambda self: self._parse_decode(), 1246 "EXTRACT": lambda self: self._parse_extract(), 1247 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1248 "GAP_FILL": lambda self: self._parse_gap_fill(), 1249 "JSON_OBJECT": lambda self: self._parse_json_object(), 1250 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1251 "JSON_TABLE": lambda self: self._parse_json_table(), 1252 "MATCH": lambda self: self._parse_match_against(), 1253 "NORMALIZE": lambda self: self._parse_normalize(), 1254 "OPENJSON": lambda self: self._parse_open_json(), 1255 "OVERLAY": lambda self: self._parse_overlay(), 1256 "POSITION": lambda self: self._parse_position(), 1257 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "STRING_AGG": lambda self: self._parse_string_agg(), 1259 "SUBSTRING": lambda self: self._parse_substring(), 1260 "TRIM": lambda self: self._parse_trim(), 1261 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1262 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1263 "XMLELEMENT": lambda self: self.expression( 1264 exp.XMLElement, 1265 this=self._match_text_seq("NAME") and self._parse_id_var(), 1266 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1267 ), 1268 "XMLTABLE": lambda self: self._parse_xml_table(), 1269 } 1270 1271 QUERY_MODIFIER_PARSERS = { 1272 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1273 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1274 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1275 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1276 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1277 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1278 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1279 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1280 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1281 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1282 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1283 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1284 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1285 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.CLUSTER_BY: lambda self: ( 1288 "cluster", 1289 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1290 ), 1291 TokenType.DISTRIBUTE_BY: lambda self: ( 1292 "distribute", 1293 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1294 ), 1295 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1296 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1297 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1298 } 1299 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1300 1301 SET_PARSERS = { 1302 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1303 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1304 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1305 "TRANSACTION": lambda self: self._parse_set_transaction(), 1306 } 1307 1308 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1309 1310 TYPE_LITERAL_PARSERS = { 1311 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1312 } 1313 1314 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1315 1316 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1317 1318 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1319 1320 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1321 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1322 "ISOLATION": ( 1323 ("LEVEL", "REPEATABLE", "READ"), 1324 ("LEVEL", "READ", "COMMITTED"), 1325 ("LEVEL", "READ", "UNCOMITTED"), 1326 ("LEVEL", "SERIALIZABLE"), 1327 ), 1328 "READ": ("WRITE", "ONLY"), 1329 } 1330 1331 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1332 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1333 ) 1334 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1335 1336 CREATE_SEQUENCE: OPTIONS_TYPE = { 1337 "SCALE": ("EXTEND", "NOEXTEND"), 1338 "SHARD": ("EXTEND", "NOEXTEND"), 1339 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1340 **dict.fromkeys( 1341 ( 1342 "SESSION", 1343 "GLOBAL", 1344 "KEEP", 1345 "NOKEEP", 1346 "ORDER", 1347 "NOORDER", 1348 "NOCACHE", 1349 "CYCLE", 1350 "NOCYCLE", 1351 "NOMINVALUE", 1352 "NOMAXVALUE", 1353 "NOSCALE", 1354 "NOSHARD", 1355 ), 1356 tuple(), 1357 ), 1358 } 1359 1360 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1361 1362 USABLES: OPTIONS_TYPE = dict.fromkeys( 1363 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1364 ) 1365 1366 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1367 1368 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1369 "TYPE": ("EVOLUTION",), 1370 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1371 } 1372 1373 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1374 1375 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1376 1377 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1378 "NOT": ("ENFORCED",), 1379 "MATCH": ( 1380 "FULL", 1381 "PARTIAL", 1382 "SIMPLE", 1383 ), 1384 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1385 "USING": ( 1386 "BTREE", 1387 "HASH", 1388 ), 1389 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1390 } 1391 1392 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1393 "NO": ("OTHERS",), 1394 "CURRENT": ("ROW",), 1395 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1396 } 1397 1398 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1399 1400 CLONE_KEYWORDS = {"CLONE", "COPY"} 1401 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1402 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1403 1404 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1405 1406 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1407 1408 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1409 1410 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1411 1412 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1413 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1414 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1415 1416 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1417 1418 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1419 1420 ADD_CONSTRAINT_TOKENS = { 1421 TokenType.CONSTRAINT, 1422 TokenType.FOREIGN_KEY, 1423 TokenType.INDEX, 1424 TokenType.KEY, 1425 TokenType.PRIMARY_KEY, 1426 TokenType.UNIQUE, 1427 } 1428 1429 DISTINCT_TOKENS = {TokenType.DISTINCT} 1430 1431 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1432 1433 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1434 1435 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1436 1437 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1438 1439 ODBC_DATETIME_LITERALS = { 1440 "d": exp.Date, 1441 "t": exp.Time, 1442 "ts": exp.Timestamp, 1443 } 1444 1445 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1446 1447 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1448 1449 # The style options for the DESCRIBE statement 1450 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1451 1452 # The style options for the ANALYZE statement 1453 ANALYZE_STYLES = { 1454 "BUFFER_USAGE_LIMIT", 1455 "FULL", 1456 "LOCAL", 1457 "NO_WRITE_TO_BINLOG", 1458 "SAMPLE", 1459 "SKIP_LOCKED", 1460 "VERBOSE", 1461 } 1462 1463 ANALYZE_EXPRESSION_PARSERS = { 1464 "ALL": lambda self: self._parse_analyze_columns(), 1465 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1466 "DELETE": lambda self: self._parse_analyze_delete(), 1467 "DROP": lambda self: self._parse_analyze_histogram(), 1468 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1469 "LIST": lambda self: self._parse_analyze_list(), 1470 "PREDICATE": lambda self: self._parse_analyze_columns(), 1471 "UPDATE": lambda self: self._parse_analyze_histogram(), 1472 "VALIDATE": lambda self: self._parse_analyze_validate(), 1473 } 1474 1475 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1476 1477 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1478 1479 OPERATION_MODIFIERS: t.Set[str] = set() 1480 1481 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1482 1483 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1484 1485 STRICT_CAST = True 1486 1487 PREFIXED_PIVOT_COLUMNS = False 1488 IDENTIFY_PIVOT_STRINGS = False 1489 1490 LOG_DEFAULTS_TO_LN = False 1491 1492 # Whether the table sample clause expects CSV syntax 1493 TABLESAMPLE_CSV = False 1494 1495 # The default method used for table sampling 1496 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1497 1498 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1499 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1500 1501 # Whether the TRIM function expects the characters to trim as its first argument 1502 TRIM_PATTERN_FIRST = False 1503 1504 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1505 STRING_ALIASES = False 1506 1507 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1508 MODIFIERS_ATTACHED_TO_SET_OP = True 1509 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1510 1511 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1512 NO_PAREN_IF_COMMANDS = True 1513 1514 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1515 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1516 1517 # Whether the `:` operator is used to extract a value from a VARIANT column 1518 COLON_IS_VARIANT_EXTRACT = False 1519 1520 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1521 # If this is True and '(' is not found, the keyword will be treated as an identifier 1522 VALUES_FOLLOWED_BY_PAREN = True 1523 1524 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1525 SUPPORTS_IMPLICIT_UNNEST = False 1526 1527 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1528 INTERVAL_SPANS = True 1529 1530 # Whether a PARTITION clause can follow a table reference 1531 SUPPORTS_PARTITION_SELECTION = False 1532 1533 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1534 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1535 1536 # Whether the 'AS' keyword is optional in the CTE definition syntax 1537 OPTIONAL_ALIAS_TOKEN_CTE = True 1538 1539 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1540 ALTER_RENAME_REQUIRES_COLUMN = True 1541 1542 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1543 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1544 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1545 # as BigQuery, where all joins have the same precedence. 1546 JOINS_HAVE_EQUAL_PRECEDENCE = False 1547 1548 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1549 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1550 1551 # Whether map literals support arbitrary expressions as keys. 1552 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1553 # When False, keys are typically restricted to identifiers. 1554 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1555 1556 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1557 # is true for Snowflake but not for BigQuery which can also process strings 1558 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1559 1560 # Dialects like Databricks support JOINS without join criteria 1561 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1562 ADD_JOIN_ON_TRUE = False 1563 1564 __slots__ = ( 1565 "error_level", 1566 "error_message_context", 1567 "max_errors", 1568 "dialect", 1569 "sql", 1570 "errors", 1571 "_tokens", 1572 "_index", 1573 "_curr", 1574 "_next", 1575 "_prev", 1576 "_prev_comments", 1577 "_pipe_cte_counter", 1578 ) 1579 1580 # Autofilled 1581 SHOW_TRIE: t.Dict = {} 1582 SET_TRIE: t.Dict = {} 1583 1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset() 1598 1599 def reset(self): 1600 self.sql = "" 1601 self.errors = [] 1602 self._tokens = [] 1603 self._index = 0 1604 self._curr = None 1605 self._next = None 1606 self._prev = None 1607 self._prev_comments = None 1608 self._pipe_cte_counter = 0 1609 1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 ) 1627 1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1] 1663 1664 def _parse( 1665 self, 1666 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1667 raw_tokens: t.List[Token], 1668 sql: t.Optional[str] = None, 1669 ) -> t.List[t.Optional[exp.Expression]]: 1670 self.reset() 1671 self.sql = sql or "" 1672 1673 total = len(raw_tokens) 1674 chunks: t.List[t.List[Token]] = [[]] 1675 1676 for i, token in enumerate(raw_tokens): 1677 if token.token_type == TokenType.SEMICOLON: 1678 if token.comments: 1679 chunks.append([token]) 1680 1681 if i < total - 1: 1682 chunks.append([]) 1683 else: 1684 chunks[-1].append(token) 1685 1686 expressions = [] 1687 1688 for tokens in chunks: 1689 self._index = -1 1690 self._tokens = tokens 1691 self._advance() 1692 1693 expressions.append(parse_method(self)) 1694 1695 if self._index < len(self._tokens): 1696 self.raise_error("Invalid expression / Unexpected token") 1697 1698 self.check_errors() 1699 1700 return expressions 1701 1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 ) 1712 1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error) 1740 1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance) 1758 1759 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1760 if expression and self._prev_comments: 1761 expression.add_comments(self._prev_comments) 1762 self._prev_comments = None 1763 1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression 1780 1781 def _find_sql(self, start: Token, end: Token) -> str: 1782 return self.sql[start.start : end.end + 1] 1783 1784 def _is_connected(self) -> bool: 1785 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1786 1787 def _advance(self, times: int = 1) -> None: 1788 self._index += times 1789 self._curr = seq_get(self._tokens, self._index) 1790 self._next = seq_get(self._tokens, self._index + 1) 1791 1792 if self._index > 0: 1793 self._prev = self._tokens[self._index - 1] 1794 self._prev_comments = self._prev.comments 1795 else: 1796 self._prev = None 1797 self._prev_comments = None 1798 1799 def _retreat(self, index: int) -> None: 1800 if index != self._index: 1801 self._advance(index - self._index) 1802 1803 def _warn_unsupported(self) -> None: 1804 if len(self._tokens) <= 1: 1805 return 1806 1807 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1808 # interested in emitting a warning for the one being currently processed. 1809 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1810 1811 logger.warning( 1812 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1813 ) 1814 1815 def _parse_command(self) -> exp.Command: 1816 self._warn_unsupported() 1817 return self.expression( 1818 exp.Command, 1819 comments=self._prev_comments, 1820 this=self._prev.text.upper(), 1821 expression=self._parse_string(), 1822 ) 1823 1824 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1825 """ 1826 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1827 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1828 solve this by setting & resetting the parser state accordingly 1829 """ 1830 index = self._index 1831 error_level = self.error_level 1832 1833 self.error_level = ErrorLevel.IMMEDIATE 1834 try: 1835 this = parse_method() 1836 except ParseError: 1837 this = None 1838 finally: 1839 if not this or retreat: 1840 self._retreat(index) 1841 self.error_level = error_level 1842 1843 return this 1844 1845 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1846 start = self._prev 1847 exists = self._parse_exists() if allow_exists else None 1848 1849 self._match(TokenType.ON) 1850 1851 materialized = self._match_text_seq("MATERIALIZED") 1852 kind = self._match_set(self.CREATABLES) and self._prev 1853 if not kind: 1854 return self._parse_as_command(start) 1855 1856 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1857 this = self._parse_user_defined_function(kind=kind.token_type) 1858 elif kind.token_type == TokenType.TABLE: 1859 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1860 elif kind.token_type == TokenType.COLUMN: 1861 this = self._parse_column() 1862 else: 1863 this = self._parse_id_var() 1864 1865 self._match(TokenType.IS) 1866 1867 return self.expression( 1868 exp.Comment, 1869 this=this, 1870 kind=kind.text, 1871 expression=self._parse_string(), 1872 exists=exists, 1873 materialized=materialized, 1874 ) 1875 1876 def _parse_to_table( 1877 self, 1878 ) -> exp.ToTableProperty: 1879 table = self._parse_table_parts(schema=True) 1880 return self.expression(exp.ToTableProperty, this=table) 1881 1882 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1883 def _parse_ttl(self) -> exp.Expression: 1884 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1885 this = self._parse_bitwise() 1886 1887 if self._match_text_seq("DELETE"): 1888 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1889 if self._match_text_seq("RECOMPRESS"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1892 ) 1893 if self._match_text_seq("TO", "DISK"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1896 ) 1897 if self._match_text_seq("TO", "VOLUME"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1900 ) 1901 1902 return this 1903 1904 expressions = self._parse_csv(_parse_ttl_action) 1905 where = self._parse_where() 1906 group = self._parse_group() 1907 1908 aggregates = None 1909 if group and self._match(TokenType.SET): 1910 aggregates = self._parse_csv(self._parse_set_item) 1911 1912 return self.expression( 1913 exp.MergeTreeTTL, 1914 expressions=expressions, 1915 where=where, 1916 group=group, 1917 aggregates=aggregates, 1918 ) 1919 1920 def _parse_statement(self) -> t.Optional[exp.Expression]: 1921 if self._curr is None: 1922 return None 1923 1924 if self._match_set(self.STATEMENT_PARSERS): 1925 comments = self._prev_comments 1926 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1927 stmt.add_comments(comments, prepend=True) 1928 return stmt 1929 1930 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1931 return self._parse_command() 1932 1933 expression = self._parse_expression() 1934 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1935 return self._parse_query_modifiers(expression) 1936 1937 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1938 start = self._prev 1939 temporary = self._match(TokenType.TEMPORARY) 1940 materialized = self._match_text_seq("MATERIALIZED") 1941 1942 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1943 if not kind: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 if_exists = exists or self._parse_exists() 1948 1949 if kind == "COLUMN": 1950 this = self._parse_column() 1951 else: 1952 this = self._parse_table_parts( 1953 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1954 ) 1955 1956 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1957 1958 if self._match(TokenType.L_PAREN, advance=False): 1959 expressions = self._parse_wrapped_csv(self._parse_types) 1960 else: 1961 expressions = None 1962 1963 return self.expression( 1964 exp.Drop, 1965 exists=if_exists, 1966 this=this, 1967 expressions=expressions, 1968 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1969 temporary=temporary, 1970 materialized=materialized, 1971 cascade=self._match_text_seq("CASCADE"), 1972 constraints=self._match_text_seq("CONSTRAINTS"), 1973 purge=self._match_text_seq("PURGE"), 1974 cluster=cluster, 1975 concurrently=concurrently, 1976 ) 1977 1978 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1979 return ( 1980 self._match_text_seq("IF") 1981 and (not not_ or self._match(TokenType.NOT)) 1982 and self._match(TokenType.EXISTS) 1983 ) 1984 1985 def _parse_create(self) -> exp.Create | exp.Command: 1986 # Note: this can't be None because we've matched a statement parser 1987 start = self._prev 1988 1989 replace = ( 1990 start.token_type == TokenType.REPLACE 1991 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1992 or self._match_pair(TokenType.OR, TokenType.ALTER) 1993 ) 1994 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1995 1996 unique = self._match(TokenType.UNIQUE) 1997 1998 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1999 clustered = True 2000 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2001 "COLUMNSTORE" 2002 ): 2003 clustered = False 2004 else: 2005 clustered = None 2006 2007 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2008 self._advance() 2009 2010 properties = None 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not create_token: 2014 # exp.Properties.Location.POST_CREATE 2015 properties = self._parse_properties() 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not properties or not create_token: 2019 return self._parse_as_command(start) 2020 2021 concurrently = self._match_text_seq("CONCURRENTLY") 2022 exists = self._parse_exists(not_=True) 2023 this = None 2024 expression: t.Optional[exp.Expression] = None 2025 indexes = None 2026 no_schema_binding = None 2027 begin = None 2028 end = None 2029 clone = None 2030 2031 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2032 nonlocal properties 2033 if properties and temp_props: 2034 properties.expressions.extend(temp_props.expressions) 2035 elif temp_props: 2036 properties = temp_props 2037 2038 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2039 this = self._parse_user_defined_function(kind=create_token.token_type) 2040 2041 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2042 extend_props(self._parse_properties()) 2043 2044 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2045 extend_props(self._parse_properties()) 2046 2047 if not expression: 2048 if self._match(TokenType.COMMAND): 2049 expression = self._parse_as_command(self._prev) 2050 else: 2051 begin = self._match(TokenType.BEGIN) 2052 return_ = self._match_text_seq("RETURN") 2053 2054 if self._match(TokenType.STRING, advance=False): 2055 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2056 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2057 expression = self._parse_string() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_user_defined_function_expression() 2061 2062 end = self._match_text_seq("END") 2063 2064 if return_: 2065 expression = self.expression(exp.Return, this=expression) 2066 elif create_token.token_type == TokenType.INDEX: 2067 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2068 if not self._match(TokenType.ON): 2069 index = self._parse_id_var() 2070 anonymous = False 2071 else: 2072 index = None 2073 anonymous = True 2074 2075 this = self._parse_index(index=index, anonymous=anonymous) 2076 elif create_token.token_type in self.DB_CREATABLES: 2077 table_parts = self._parse_table_parts( 2078 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2079 ) 2080 2081 # exp.Properties.Location.POST_NAME 2082 self._match(TokenType.COMMA) 2083 extend_props(self._parse_properties(before=True)) 2084 2085 this = self._parse_schema(this=table_parts) 2086 2087 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2088 extend_props(self._parse_properties()) 2089 2090 has_alias = self._match(TokenType.ALIAS) 2091 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2092 # exp.Properties.Location.POST_ALIAS 2093 extend_props(self._parse_properties()) 2094 2095 if create_token.token_type == TokenType.SEQUENCE: 2096 expression = self._parse_types() 2097 props = self._parse_properties() 2098 if props: 2099 sequence_props = exp.SequenceProperties() 2100 options = [] 2101 for prop in props: 2102 if isinstance(prop, exp.SequenceProperties): 2103 for arg, value in prop.args.items(): 2104 if arg == "options": 2105 options.extend(value) 2106 else: 2107 sequence_props.set(arg, value) 2108 prop.pop() 2109 2110 if options: 2111 sequence_props.set("options", options) 2112 2113 props.append("expressions", sequence_props) 2114 extend_props(props) 2115 else: 2116 expression = self._parse_ddl_select() 2117 2118 # Some dialects also support using a table as an alias instead of a SELECT. 2119 # Here we fallback to this as an alternative. 2120 if not expression and has_alias: 2121 expression = self._try_parse(self._parse_table_parts) 2122 2123 if create_token.token_type == TokenType.TABLE: 2124 # exp.Properties.Location.POST_EXPRESSION 2125 extend_props(self._parse_properties()) 2126 2127 indexes = [] 2128 while True: 2129 index = self._parse_index() 2130 2131 # exp.Properties.Location.POST_INDEX 2132 extend_props(self._parse_properties()) 2133 if not index: 2134 break 2135 else: 2136 self._match(TokenType.COMMA) 2137 indexes.append(index) 2138 elif create_token.token_type == TokenType.VIEW: 2139 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2140 no_schema_binding = True 2141 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2142 extend_props(self._parse_properties()) 2143 2144 shallow = self._match_text_seq("SHALLOW") 2145 2146 if self._match_texts(self.CLONE_KEYWORDS): 2147 copy = self._prev.text.lower() == "copy" 2148 clone = self.expression( 2149 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2150 ) 2151 2152 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2153 return self._parse_as_command(start) 2154 2155 create_kind_text = create_token.text.upper() 2156 return self.expression( 2157 exp.Create, 2158 this=this, 2159 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2160 replace=replace, 2161 refresh=refresh, 2162 unique=unique, 2163 expression=expression, 2164 exists=exists, 2165 properties=properties, 2166 indexes=indexes, 2167 no_schema_binding=no_schema_binding, 2168 begin=begin, 2169 end=end, 2170 clone=clone, 2171 concurrently=concurrently, 2172 clustered=clustered, 2173 ) 2174 2175 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2176 seq = exp.SequenceProperties() 2177 2178 options = [] 2179 index = self._index 2180 2181 while self._curr: 2182 self._match(TokenType.COMMA) 2183 if self._match_text_seq("INCREMENT"): 2184 self._match_text_seq("BY") 2185 self._match_text_seq("=") 2186 seq.set("increment", self._parse_term()) 2187 elif self._match_text_seq("MINVALUE"): 2188 seq.set("minvalue", self._parse_term()) 2189 elif self._match_text_seq("MAXVALUE"): 2190 seq.set("maxvalue", self._parse_term()) 2191 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2192 self._match_text_seq("=") 2193 seq.set("start", self._parse_term()) 2194 elif self._match_text_seq("CACHE"): 2195 # T-SQL allows empty CACHE which is initialized dynamically 2196 seq.set("cache", self._parse_number() or True) 2197 elif self._match_text_seq("OWNED", "BY"): 2198 # "OWNED BY NONE" is the default 2199 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2200 else: 2201 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2202 if opt: 2203 options.append(opt) 2204 else: 2205 break 2206 2207 seq.set("options", options if options else None) 2208 return None if self._index == index else seq 2209 2210 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2211 # only used for teradata currently 2212 self._match(TokenType.COMMA) 2213 2214 kwargs = { 2215 "no": self._match_text_seq("NO"), 2216 "dual": self._match_text_seq("DUAL"), 2217 "before": self._match_text_seq("BEFORE"), 2218 "default": self._match_text_seq("DEFAULT"), 2219 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2220 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2221 "after": self._match_text_seq("AFTER"), 2222 "minimum": self._match_texts(("MIN", "MINIMUM")), 2223 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2224 } 2225 2226 if self._match_texts(self.PROPERTY_PARSERS): 2227 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2228 try: 2229 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2230 except TypeError: 2231 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2232 2233 return None 2234 2235 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2236 return self._parse_wrapped_csv(self._parse_property) 2237 2238 def _parse_property(self) -> t.Optional[exp.Expression]: 2239 if self._match_texts(self.PROPERTY_PARSERS): 2240 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2241 2242 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2243 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2244 2245 if self._match_text_seq("COMPOUND", "SORTKEY"): 2246 return self._parse_sortkey(compound=True) 2247 2248 if self._match_text_seq("SQL", "SECURITY"): 2249 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2250 2251 index = self._index 2252 2253 seq_props = self._parse_sequence_properties() 2254 if seq_props: 2255 return seq_props 2256 2257 self._retreat(index) 2258 key = self._parse_column() 2259 2260 if not self._match(TokenType.EQ): 2261 self._retreat(index) 2262 return None 2263 2264 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2265 if isinstance(key, exp.Column): 2266 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2267 2268 value = self._parse_bitwise() or self._parse_var(any_token=True) 2269 2270 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2271 if isinstance(value, exp.Column): 2272 value = exp.var(value.name) 2273 2274 return self.expression(exp.Property, this=key, value=value) 2275 2276 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2277 if self._match_text_seq("BY"): 2278 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2279 2280 self._match(TokenType.ALIAS) 2281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2283 2284 return self.expression( 2285 exp.FileFormatProperty, 2286 this=( 2287 self.expression( 2288 exp.InputOutputFormat, 2289 input_format=input_format, 2290 output_format=output_format, 2291 ) 2292 if input_format or output_format 2293 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2294 ), 2295 hive_format=True, 2296 ) 2297 2298 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2299 field = self._parse_field() 2300 if isinstance(field, exp.Identifier) and not field.quoted: 2301 field = exp.var(field) 2302 2303 return field 2304 2305 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2306 self._match(TokenType.EQ) 2307 self._match(TokenType.ALIAS) 2308 2309 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2310 2311 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2312 properties = [] 2313 while True: 2314 if before: 2315 prop = self._parse_property_before() 2316 else: 2317 prop = self._parse_property() 2318 if not prop: 2319 break 2320 for p in ensure_list(prop): 2321 properties.append(p) 2322 2323 if properties: 2324 return self.expression(exp.Properties, expressions=properties) 2325 2326 return None 2327 2328 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2329 return self.expression( 2330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2331 ) 2332 2333 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2334 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2335 security_specifier = self._prev.text.upper() 2336 return self.expression(exp.SecurityProperty, this=security_specifier) 2337 return None 2338 2339 def _parse_settings_property(self) -> exp.SettingsProperty: 2340 return self.expression( 2341 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2342 ) 2343 2344 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2345 if self._index >= 2: 2346 pre_volatile_token = self._tokens[self._index - 2] 2347 else: 2348 pre_volatile_token = None 2349 2350 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2351 return exp.VolatileProperty() 2352 2353 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2354 2355 def _parse_retention_period(self) -> exp.Var: 2356 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2357 number = self._parse_number() 2358 number_str = f"{number} " if number else "" 2359 unit = self._parse_var(any_token=True) 2360 return exp.var(f"{number_str}{unit}") 2361 2362 def _parse_system_versioning_property( 2363 self, with_: bool = False 2364 ) -> exp.WithSystemVersioningProperty: 2365 self._match(TokenType.EQ) 2366 prop = self.expression( 2367 exp.WithSystemVersioningProperty, 2368 **{ # type: ignore 2369 "on": True, 2370 "with": with_, 2371 }, 2372 ) 2373 2374 if self._match_text_seq("OFF"): 2375 prop.set("on", False) 2376 return prop 2377 2378 self._match(TokenType.ON) 2379 if self._match(TokenType.L_PAREN): 2380 while self._curr and not self._match(TokenType.R_PAREN): 2381 if self._match_text_seq("HISTORY_TABLE", "="): 2382 prop.set("this", self._parse_table_parts()) 2383 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2384 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2385 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2386 prop.set("retention_period", self._parse_retention_period()) 2387 2388 self._match(TokenType.COMMA) 2389 2390 return prop 2391 2392 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2393 self._match(TokenType.EQ) 2394 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2395 prop = self.expression(exp.DataDeletionProperty, on=on) 2396 2397 if self._match(TokenType.L_PAREN): 2398 while self._curr and not self._match(TokenType.R_PAREN): 2399 if self._match_text_seq("FILTER_COLUMN", "="): 2400 prop.set("filter_column", self._parse_column()) 2401 elif self._match_text_seq("RETENTION_PERIOD", "="): 2402 prop.set("retention_period", self._parse_retention_period()) 2403 2404 self._match(TokenType.COMMA) 2405 2406 return prop 2407 2408 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2409 kind = "HASH" 2410 expressions: t.Optional[t.List[exp.Expression]] = None 2411 if self._match_text_seq("BY", "HASH"): 2412 expressions = self._parse_wrapped_csv(self._parse_id_var) 2413 elif self._match_text_seq("BY", "RANDOM"): 2414 kind = "RANDOM" 2415 2416 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2417 buckets: t.Optional[exp.Expression] = None 2418 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2419 buckets = self._parse_number() 2420 2421 return self.expression( 2422 exp.DistributedByProperty, 2423 expressions=expressions, 2424 kind=kind, 2425 buckets=buckets, 2426 order=self._parse_order(), 2427 ) 2428 2429 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2430 self._match_text_seq("KEY") 2431 expressions = self._parse_wrapped_id_vars() 2432 return self.expression(expr_type, expressions=expressions) 2433 2434 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2435 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2436 prop = self._parse_system_versioning_property(with_=True) 2437 self._match_r_paren() 2438 return prop 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 return self._parse_wrapped_properties() 2442 2443 if self._match_text_seq("JOURNAL"): 2444 return self._parse_withjournaltable() 2445 2446 if self._match_texts(self.VIEW_ATTRIBUTES): 2447 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2448 2449 if self._match_text_seq("DATA"): 2450 return self._parse_withdata(no=False) 2451 elif self._match_text_seq("NO", "DATA"): 2452 return self._parse_withdata(no=True) 2453 2454 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2455 return self._parse_serde_properties(with_=True) 2456 2457 if self._match(TokenType.SCHEMA): 2458 return self.expression( 2459 exp.WithSchemaBindingProperty, 2460 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2461 ) 2462 2463 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2464 return self.expression( 2465 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2466 ) 2467 2468 if not self._next: 2469 return None 2470 2471 return self._parse_withisolatedloading() 2472 2473 def _parse_procedure_option(self) -> exp.Expression | None: 2474 if self._match_text_seq("EXECUTE", "AS"): 2475 return self.expression( 2476 exp.ExecuteAsProperty, 2477 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2478 or self._parse_string(), 2479 ) 2480 2481 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2482 2483 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2484 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2485 self._match(TokenType.EQ) 2486 2487 user = self._parse_id_var() 2488 self._match(TokenType.PARAMETER) 2489 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2490 2491 if not user or not host: 2492 return None 2493 2494 return exp.DefinerProperty(this=f"{user}@{host}") 2495 2496 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2497 self._match(TokenType.TABLE) 2498 self._match(TokenType.EQ) 2499 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2500 2501 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2502 return self.expression(exp.LogProperty, no=no) 2503 2504 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2505 return self.expression(exp.JournalProperty, **kwargs) 2506 2507 def _parse_checksum(self) -> exp.ChecksumProperty: 2508 self._match(TokenType.EQ) 2509 2510 on = None 2511 if self._match(TokenType.ON): 2512 on = True 2513 elif self._match_text_seq("OFF"): 2514 on = False 2515 2516 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2517 2518 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2519 return self.expression( 2520 exp.Cluster, 2521 expressions=( 2522 self._parse_wrapped_csv(self._parse_ordered) 2523 if wrapped 2524 else self._parse_csv(self._parse_ordered) 2525 ), 2526 ) 2527 2528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2529 self._match_text_seq("BY") 2530 2531 self._match_l_paren() 2532 expressions = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 2535 if self._match_text_seq("SORTED", "BY"): 2536 self._match_l_paren() 2537 sorted_by = self._parse_csv(self._parse_ordered) 2538 self._match_r_paren() 2539 else: 2540 sorted_by = None 2541 2542 self._match(TokenType.INTO) 2543 buckets = self._parse_number() 2544 self._match_text_seq("BUCKETS") 2545 2546 return self.expression( 2547 exp.ClusteredByProperty, 2548 expressions=expressions, 2549 sorted_by=sorted_by, 2550 buckets=buckets, 2551 ) 2552 2553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2554 if not self._match_text_seq("GRANTS"): 2555 self._retreat(self._index - 1) 2556 return None 2557 2558 return self.expression(exp.CopyGrantsProperty) 2559 2560 def _parse_freespace(self) -> exp.FreespaceProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2564 ) 2565 2566 def _parse_mergeblockratio( 2567 self, no: bool = False, default: bool = False 2568 ) -> exp.MergeBlockRatioProperty: 2569 if self._match(TokenType.EQ): 2570 return self.expression( 2571 exp.MergeBlockRatioProperty, 2572 this=self._parse_number(), 2573 percent=self._match(TokenType.PERCENT), 2574 ) 2575 2576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2577 2578 def _parse_datablocksize( 2579 self, 2580 default: t.Optional[bool] = None, 2581 minimum: t.Optional[bool] = None, 2582 maximum: t.Optional[bool] = None, 2583 ) -> exp.DataBlocksizeProperty: 2584 self._match(TokenType.EQ) 2585 size = self._parse_number() 2586 2587 units = None 2588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2589 units = self._prev.text 2590 2591 return self.expression( 2592 exp.DataBlocksizeProperty, 2593 size=size, 2594 units=units, 2595 default=default, 2596 minimum=minimum, 2597 maximum=maximum, 2598 ) 2599 2600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2601 self._match(TokenType.EQ) 2602 always = self._match_text_seq("ALWAYS") 2603 manual = self._match_text_seq("MANUAL") 2604 never = self._match_text_seq("NEVER") 2605 default = self._match_text_seq("DEFAULT") 2606 2607 autotemp = None 2608 if self._match_text_seq("AUTOTEMP"): 2609 autotemp = self._parse_schema() 2610 2611 return self.expression( 2612 exp.BlockCompressionProperty, 2613 always=always, 2614 manual=manual, 2615 never=never, 2616 default=default, 2617 autotemp=autotemp, 2618 ) 2619 2620 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2621 index = self._index 2622 no = self._match_text_seq("NO") 2623 concurrent = self._match_text_seq("CONCURRENT") 2624 2625 if not self._match_text_seq("ISOLATED", "LOADING"): 2626 self._retreat(index) 2627 return None 2628 2629 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2630 return self.expression( 2631 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2632 ) 2633 2634 def _parse_locking(self) -> exp.LockingProperty: 2635 if self._match(TokenType.TABLE): 2636 kind = "TABLE" 2637 elif self._match(TokenType.VIEW): 2638 kind = "VIEW" 2639 elif self._match(TokenType.ROW): 2640 kind = "ROW" 2641 elif self._match_text_seq("DATABASE"): 2642 kind = "DATABASE" 2643 else: 2644 kind = None 2645 2646 if kind in ("DATABASE", "TABLE", "VIEW"): 2647 this = self._parse_table_parts() 2648 else: 2649 this = None 2650 2651 if self._match(TokenType.FOR): 2652 for_or_in = "FOR" 2653 elif self._match(TokenType.IN): 2654 for_or_in = "IN" 2655 else: 2656 for_or_in = None 2657 2658 if self._match_text_seq("ACCESS"): 2659 lock_type = "ACCESS" 2660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2661 lock_type = "EXCLUSIVE" 2662 elif self._match_text_seq("SHARE"): 2663 lock_type = "SHARE" 2664 elif self._match_text_seq("READ"): 2665 lock_type = "READ" 2666 elif self._match_text_seq("WRITE"): 2667 lock_type = "WRITE" 2668 elif self._match_text_seq("CHECKSUM"): 2669 lock_type = "CHECKSUM" 2670 else: 2671 lock_type = None 2672 2673 override = self._match_text_seq("OVERRIDE") 2674 2675 return self.expression( 2676 exp.LockingProperty, 2677 this=this, 2678 kind=kind, 2679 for_or_in=for_or_in, 2680 lock_type=lock_type, 2681 override=override, 2682 ) 2683 2684 def _parse_partition_by(self) -> t.List[exp.Expression]: 2685 if self._match(TokenType.PARTITION_BY): 2686 return self._parse_csv(self._parse_assignment) 2687 return [] 2688 2689 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2690 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2691 if self._match_text_seq("MINVALUE"): 2692 return exp.var("MINVALUE") 2693 if self._match_text_seq("MAXVALUE"): 2694 return exp.var("MAXVALUE") 2695 return self._parse_bitwise() 2696 2697 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2698 expression = None 2699 from_expressions = None 2700 to_expressions = None 2701 2702 if self._match(TokenType.IN): 2703 this = self._parse_wrapped_csv(self._parse_bitwise) 2704 elif self._match(TokenType.FROM): 2705 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2706 self._match_text_seq("TO") 2707 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 elif self._match_text_seq("WITH", "(", "MODULUS"): 2709 this = self._parse_number() 2710 self._match_text_seq(",", "REMAINDER") 2711 expression = self._parse_number() 2712 self._match_r_paren() 2713 else: 2714 self.raise_error("Failed to parse partition bound spec.") 2715 2716 return self.expression( 2717 exp.PartitionBoundSpec, 2718 this=this, 2719 expression=expression, 2720 from_expressions=from_expressions, 2721 to_expressions=to_expressions, 2722 ) 2723 2724 # https://www.postgresql.org/docs/current/sql-createtable.html 2725 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2726 if not self._match_text_seq("OF"): 2727 self._retreat(self._index - 1) 2728 return None 2729 2730 this = self._parse_table(schema=True) 2731 2732 if self._match(TokenType.DEFAULT): 2733 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2734 elif self._match_text_seq("FOR", "VALUES"): 2735 expression = self._parse_partition_bound_spec() 2736 else: 2737 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2738 2739 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2740 2741 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2742 self._match(TokenType.EQ) 2743 return self.expression( 2744 exp.PartitionedByProperty, 2745 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2746 ) 2747 2748 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2749 if self._match_text_seq("AND", "STATISTICS"): 2750 statistics = True 2751 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2752 statistics = False 2753 else: 2754 statistics = None 2755 2756 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2757 2758 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2761 return None 2762 2763 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL", "DATA"): 2765 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2766 return None 2767 2768 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("PRIMARY", "INDEX"): 2770 return exp.NoPrimaryIndexProperty() 2771 if self._match_text_seq("SQL"): 2772 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2773 return None 2774 2775 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2777 return exp.OnCommitProperty() 2778 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2779 return exp.OnCommitProperty(delete=True) 2780 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2781 2782 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2783 if self._match_text_seq("SQL", "DATA"): 2784 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2785 return None 2786 2787 def _parse_distkey(self) -> exp.DistKeyProperty: 2788 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2789 2790 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2791 table = self._parse_table(schema=True) 2792 2793 options = [] 2794 while self._match_texts(("INCLUDING", "EXCLUDING")): 2795 this = self._prev.text.upper() 2796 2797 id_var = self._parse_id_var() 2798 if not id_var: 2799 return None 2800 2801 options.append( 2802 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2803 ) 2804 2805 return self.expression(exp.LikeProperty, this=table, expressions=options) 2806 2807 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2808 return self.expression( 2809 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2810 ) 2811 2812 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2813 self._match(TokenType.EQ) 2814 return self.expression( 2815 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2816 ) 2817 2818 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2819 self._match_text_seq("WITH", "CONNECTION") 2820 return self.expression( 2821 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2822 ) 2823 2824 def _parse_returns(self) -> exp.ReturnsProperty: 2825 value: t.Optional[exp.Expression] 2826 null = None 2827 is_table = self._match(TokenType.TABLE) 2828 2829 if is_table: 2830 if self._match(TokenType.LT): 2831 value = self.expression( 2832 exp.Schema, 2833 this="TABLE", 2834 expressions=self._parse_csv(self._parse_struct_types), 2835 ) 2836 if not self._match(TokenType.GT): 2837 self.raise_error("Expecting >") 2838 else: 2839 value = self._parse_schema(exp.var("TABLE")) 2840 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2841 null = True 2842 value = None 2843 else: 2844 value = self._parse_types() 2845 2846 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2847 2848 def _parse_describe(self) -> exp.Describe: 2849 kind = self._match_set(self.CREATABLES) and self._prev.text 2850 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2851 if self._match(TokenType.DOT): 2852 style = None 2853 self._retreat(self._index - 2) 2854 2855 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2856 2857 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2858 this = self._parse_statement() 2859 else: 2860 this = self._parse_table(schema=True) 2861 2862 properties = self._parse_properties() 2863 expressions = properties.expressions if properties else None 2864 partition = self._parse_partition() 2865 return self.expression( 2866 exp.Describe, 2867 this=this, 2868 style=style, 2869 kind=kind, 2870 expressions=expressions, 2871 partition=partition, 2872 format=format, 2873 ) 2874 2875 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2876 kind = self._prev.text.upper() 2877 expressions = [] 2878 2879 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2880 if self._match(TokenType.WHEN): 2881 expression = self._parse_disjunction() 2882 self._match(TokenType.THEN) 2883 else: 2884 expression = None 2885 2886 else_ = self._match(TokenType.ELSE) 2887 2888 if not self._match(TokenType.INTO): 2889 return None 2890 2891 return self.expression( 2892 exp.ConditionalInsert, 2893 this=self.expression( 2894 exp.Insert, 2895 this=self._parse_table(schema=True), 2896 expression=self._parse_derived_table_values(), 2897 ), 2898 expression=expression, 2899 else_=else_, 2900 ) 2901 2902 expression = parse_conditional_insert() 2903 while expression is not None: 2904 expressions.append(expression) 2905 expression = parse_conditional_insert() 2906 2907 return self.expression( 2908 exp.MultitableInserts, 2909 kind=kind, 2910 comments=comments, 2911 expressions=expressions, 2912 source=self._parse_table(), 2913 ) 2914 2915 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2916 comments = [] 2917 hint = self._parse_hint() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 ignore = self._match(TokenType.IGNORE) 2920 local = self._match_text_seq("LOCAL") 2921 alternative = None 2922 is_function = None 2923 2924 if self._match_text_seq("DIRECTORY"): 2925 this: t.Optional[exp.Expression] = self.expression( 2926 exp.Directory, 2927 this=self._parse_var_or_string(), 2928 local=local, 2929 row_format=self._parse_row_format(match_row=True), 2930 ) 2931 else: 2932 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2933 comments += ensure_list(self._prev_comments) 2934 return self._parse_multitable_inserts(comments) 2935 2936 if self._match(TokenType.OR): 2937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2938 2939 self._match(TokenType.INTO) 2940 comments += ensure_list(self._prev_comments) 2941 self._match(TokenType.TABLE) 2942 is_function = self._match(TokenType.FUNCTION) 2943 2944 this = ( 2945 self._parse_table(schema=True, parse_partition=True) 2946 if not is_function 2947 else self._parse_function() 2948 ) 2949 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2950 this.set("alias", self._parse_table_alias()) 2951 2952 returning = self._parse_returning() 2953 2954 return self.expression( 2955 exp.Insert, 2956 comments=comments, 2957 hint=hint, 2958 is_function=is_function, 2959 this=this, 2960 stored=self._match_text_seq("STORED") and self._parse_stored(), 2961 by_name=self._match_text_seq("BY", "NAME"), 2962 exists=self._parse_exists(), 2963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2964 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2965 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2966 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2967 conflict=self._parse_on_conflict(), 2968 returning=returning or self._parse_returning(), 2969 overwrite=overwrite, 2970 alternative=alternative, 2971 ignore=ignore, 2972 source=self._match(TokenType.TABLE) and self._parse_table(), 2973 ) 2974 2975 def _parse_kill(self) -> exp.Kill: 2976 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2977 2978 return self.expression( 2979 exp.Kill, 2980 this=self._parse_primary(), 2981 kind=kind, 2982 ) 2983 2984 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2985 conflict = self._match_text_seq("ON", "CONFLICT") 2986 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2987 2988 if not conflict and not duplicate: 2989 return None 2990 2991 conflict_keys = None 2992 constraint = None 2993 2994 if conflict: 2995 if self._match_text_seq("ON", "CONSTRAINT"): 2996 constraint = self._parse_id_var() 2997 elif self._match(TokenType.L_PAREN): 2998 conflict_keys = self._parse_csv(self._parse_id_var) 2999 self._match_r_paren() 3000 3001 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3002 if self._prev.token_type == TokenType.UPDATE: 3003 self._match(TokenType.SET) 3004 expressions = self._parse_csv(self._parse_equality) 3005 else: 3006 expressions = None 3007 3008 return self.expression( 3009 exp.OnConflict, 3010 duplicate=duplicate, 3011 expressions=expressions, 3012 action=action, 3013 conflict_keys=conflict_keys, 3014 constraint=constraint, 3015 where=self._parse_where(), 3016 ) 3017 3018 def _parse_returning(self) -> t.Optional[exp.Returning]: 3019 if not self._match(TokenType.RETURNING): 3020 return None 3021 return self.expression( 3022 exp.Returning, 3023 expressions=self._parse_csv(self._parse_expression), 3024 into=self._match(TokenType.INTO) and self._parse_table_part(), 3025 ) 3026 3027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3028 if not self._match(TokenType.FORMAT): 3029 return None 3030 return self._parse_row_format() 3031 3032 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3033 index = self._index 3034 with_ = with_ or self._match_text_seq("WITH") 3035 3036 if not self._match(TokenType.SERDE_PROPERTIES): 3037 self._retreat(index) 3038 return None 3039 return self.expression( 3040 exp.SerdeProperties, 3041 **{ # type: ignore 3042 "expressions": self._parse_wrapped_properties(), 3043 "with": with_, 3044 }, 3045 ) 3046 3047 def _parse_row_format( 3048 self, match_row: bool = False 3049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3051 return None 3052 3053 if self._match_text_seq("SERDE"): 3054 this = self._parse_string() 3055 3056 serde_properties = self._parse_serde_properties() 3057 3058 return self.expression( 3059 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3060 ) 3061 3062 self._match_text_seq("DELIMITED") 3063 3064 kwargs = {} 3065 3066 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3067 kwargs["fields"] = self._parse_string() 3068 if self._match_text_seq("ESCAPED", "BY"): 3069 kwargs["escaped"] = self._parse_string() 3070 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3071 kwargs["collection_items"] = self._parse_string() 3072 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3073 kwargs["map_keys"] = self._parse_string() 3074 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3075 kwargs["lines"] = self._parse_string() 3076 if self._match_text_seq("NULL", "DEFINED", "AS"): 3077 kwargs["null"] = self._parse_string() 3078 3079 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3080 3081 def _parse_load(self) -> exp.LoadData | exp.Command: 3082 if self._match_text_seq("DATA"): 3083 local = self._match_text_seq("LOCAL") 3084 self._match_text_seq("INPATH") 3085 inpath = self._parse_string() 3086 overwrite = self._match(TokenType.OVERWRITE) 3087 self._match_pair(TokenType.INTO, TokenType.TABLE) 3088 3089 return self.expression( 3090 exp.LoadData, 3091 this=self._parse_table(schema=True), 3092 local=local, 3093 overwrite=overwrite, 3094 inpath=inpath, 3095 partition=self._parse_partition(), 3096 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3097 serde=self._match_text_seq("SERDE") and self._parse_string(), 3098 ) 3099 return self._parse_as_command(self._prev) 3100 3101 def _parse_delete(self) -> exp.Delete: 3102 # This handles MySQL's "Multiple-Table Syntax" 3103 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3104 tables = None 3105 if not self._match(TokenType.FROM, advance=False): 3106 tables = self._parse_csv(self._parse_table) or None 3107 3108 returning = self._parse_returning() 3109 3110 return self.expression( 3111 exp.Delete, 3112 tables=tables, 3113 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3114 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3115 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3116 where=self._parse_where(), 3117 returning=returning or self._parse_returning(), 3118 limit=self._parse_limit(), 3119 ) 3120 3121 def _parse_update(self) -> exp.Update: 3122 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3123 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3124 returning = self._parse_returning() 3125 return self.expression( 3126 exp.Update, 3127 **{ # type: ignore 3128 "this": this, 3129 "expressions": expressions, 3130 "from": self._parse_from(joins=True), 3131 "where": self._parse_where(), 3132 "returning": returning or self._parse_returning(), 3133 "order": self._parse_order(), 3134 "limit": self._parse_limit(), 3135 }, 3136 ) 3137 3138 def _parse_use(self) -> exp.Use: 3139 return self.expression( 3140 exp.Use, 3141 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3142 this=self._parse_table(schema=False), 3143 ) 3144 3145 def _parse_uncache(self) -> exp.Uncache: 3146 if not self._match(TokenType.TABLE): 3147 self.raise_error("Expecting TABLE after UNCACHE") 3148 3149 return self.expression( 3150 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3151 ) 3152 3153 def _parse_cache(self) -> exp.Cache: 3154 lazy = self._match_text_seq("LAZY") 3155 self._match(TokenType.TABLE) 3156 table = self._parse_table(schema=True) 3157 3158 options = [] 3159 if self._match_text_seq("OPTIONS"): 3160 self._match_l_paren() 3161 k = self._parse_string() 3162 self._match(TokenType.EQ) 3163 v = self._parse_string() 3164 options = [k, v] 3165 self._match_r_paren() 3166 3167 self._match(TokenType.ALIAS) 3168 return self.expression( 3169 exp.Cache, 3170 this=table, 3171 lazy=lazy, 3172 options=options, 3173 expression=self._parse_select(nested=True), 3174 ) 3175 3176 def _parse_partition(self) -> t.Optional[exp.Partition]: 3177 if not self._match_texts(self.PARTITION_KEYWORDS): 3178 return None 3179 3180 return self.expression( 3181 exp.Partition, 3182 subpartition=self._prev.text.upper() == "SUBPARTITION", 3183 expressions=self._parse_wrapped_csv(self._parse_assignment), 3184 ) 3185 3186 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3187 def _parse_value_expression() -> t.Optional[exp.Expression]: 3188 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3189 return exp.var(self._prev.text.upper()) 3190 return self._parse_expression() 3191 3192 if self._match(TokenType.L_PAREN): 3193 expressions = self._parse_csv(_parse_value_expression) 3194 self._match_r_paren() 3195 return self.expression(exp.Tuple, expressions=expressions) 3196 3197 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3198 expression = self._parse_expression() 3199 if expression: 3200 return self.expression(exp.Tuple, expressions=[expression]) 3201 return None 3202 3203 def _parse_projections(self) -> t.List[exp.Expression]: 3204 return self._parse_expressions() 3205 3206 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3207 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3208 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3209 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3210 ) 3211 elif self._match(TokenType.FROM): 3212 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3213 # Support parentheses for duckdb FROM-first syntax 3214 select = self._parse_select(from_=from_) 3215 if select: 3216 if not select.args.get("from"): 3217 select.set("from", from_) 3218 this = select 3219 else: 3220 this = exp.select("*").from_(t.cast(exp.From, from_)) 3221 else: 3222 this = ( 3223 self._parse_table(consume_pipe=True) 3224 if table 3225 else self._parse_select(nested=True, parse_set_operation=False) 3226 ) 3227 3228 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3229 # in case a modifier (e.g. join) is following 3230 if table and isinstance(this, exp.Values) and this.alias: 3231 alias = this.args["alias"].pop() 3232 this = exp.Table(this=this, alias=alias) 3233 3234 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3235 3236 return this 3237 3238 def _parse_select( 3239 self, 3240 nested: bool = False, 3241 table: bool = False, 3242 parse_subquery_alias: bool = True, 3243 parse_set_operation: bool = True, 3244 consume_pipe: bool = True, 3245 from_: t.Optional[exp.From] = None, 3246 ) -> t.Optional[exp.Expression]: 3247 query = self._parse_select_query( 3248 nested=nested, 3249 table=table, 3250 parse_subquery_alias=parse_subquery_alias, 3251 parse_set_operation=parse_set_operation, 3252 ) 3253 3254 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3255 if not query and from_: 3256 query = exp.select("*").from_(from_) 3257 if isinstance(query, exp.Query): 3258 query = self._parse_pipe_syntax_query(query) 3259 query = query.subquery(copy=False) if query and table else query 3260 3261 return query 3262 3263 def _parse_select_query( 3264 self, 3265 nested: bool = False, 3266 table: bool = False, 3267 parse_subquery_alias: bool = True, 3268 parse_set_operation: bool = True, 3269 ) -> t.Optional[exp.Expression]: 3270 cte = self._parse_with() 3271 3272 if cte: 3273 this = self._parse_statement() 3274 3275 if not this: 3276 self.raise_error("Failed to parse any statement following CTE") 3277 return cte 3278 3279 if "with" in this.arg_types: 3280 this.set("with", cte) 3281 else: 3282 self.raise_error(f"{this.key} does not support CTE") 3283 this = cte 3284 3285 return this 3286 3287 # duckdb supports leading with FROM x 3288 from_ = ( 3289 self._parse_from(consume_pipe=True) 3290 if self._match(TokenType.FROM, advance=False) 3291 else None 3292 ) 3293 3294 if self._match(TokenType.SELECT): 3295 comments = self._prev_comments 3296 3297 hint = self._parse_hint() 3298 3299 if self._next and not self._next.token_type == TokenType.DOT: 3300 all_ = self._match(TokenType.ALL) 3301 distinct = self._match_set(self.DISTINCT_TOKENS) 3302 else: 3303 all_, distinct = None, None 3304 3305 kind = ( 3306 self._match(TokenType.ALIAS) 3307 and self._match_texts(("STRUCT", "VALUE")) 3308 and self._prev.text.upper() 3309 ) 3310 3311 if distinct: 3312 distinct = self.expression( 3313 exp.Distinct, 3314 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3315 ) 3316 3317 if all_ and distinct: 3318 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3319 3320 operation_modifiers = [] 3321 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3322 operation_modifiers.append(exp.var(self._prev.text.upper())) 3323 3324 limit = self._parse_limit(top=True) 3325 projections = self._parse_projections() 3326 3327 this = self.expression( 3328 exp.Select, 3329 kind=kind, 3330 hint=hint, 3331 distinct=distinct, 3332 expressions=projections, 3333 limit=limit, 3334 operation_modifiers=operation_modifiers or None, 3335 ) 3336 this.comments = comments 3337 3338 into = self._parse_into() 3339 if into: 3340 this.set("into", into) 3341 3342 if not from_: 3343 from_ = self._parse_from() 3344 3345 if from_: 3346 this.set("from", from_) 3347 3348 this = self._parse_query_modifiers(this) 3349 elif (table or nested) and self._match(TokenType.L_PAREN): 3350 this = self._parse_wrapped_select(table=table) 3351 3352 # We return early here so that the UNION isn't attached to the subquery by the 3353 # following call to _parse_set_operations, but instead becomes the parent node 3354 self._match_r_paren() 3355 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3356 elif self._match(TokenType.VALUES, advance=False): 3357 this = self._parse_derived_table_values() 3358 elif from_: 3359 this = exp.select("*").from_(from_.this, copy=False) 3360 elif self._match(TokenType.SUMMARIZE): 3361 table = self._match(TokenType.TABLE) 3362 this = self._parse_select() or self._parse_string() or self._parse_table() 3363 return self.expression(exp.Summarize, this=this, table=table) 3364 elif self._match(TokenType.DESCRIBE): 3365 this = self._parse_describe() 3366 elif self._match_text_seq("STREAM"): 3367 this = self._parse_function() 3368 if this: 3369 this = self.expression(exp.Stream, this=this) 3370 else: 3371 self._retreat(self._index - 1) 3372 else: 3373 this = None 3374 3375 return self._parse_set_operations(this) if parse_set_operation else this 3376 3377 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3378 self._match_text_seq("SEARCH") 3379 3380 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3381 3382 if not kind: 3383 return None 3384 3385 self._match_text_seq("FIRST", "BY") 3386 3387 return self.expression( 3388 exp.RecursiveWithSearch, 3389 kind=kind, 3390 this=self._parse_id_var(), 3391 expression=self._match_text_seq("SET") and self._parse_id_var(), 3392 using=self._match_text_seq("USING") and self._parse_id_var(), 3393 ) 3394 3395 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3396 if not skip_with_token and not self._match(TokenType.WITH): 3397 return None 3398 3399 comments = self._prev_comments 3400 recursive = self._match(TokenType.RECURSIVE) 3401 3402 last_comments = None 3403 expressions = [] 3404 while True: 3405 cte = self._parse_cte() 3406 if isinstance(cte, exp.CTE): 3407 expressions.append(cte) 3408 if last_comments: 3409 cte.add_comments(last_comments) 3410 3411 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3412 break 3413 else: 3414 self._match(TokenType.WITH) 3415 3416 last_comments = self._prev_comments 3417 3418 return self.expression( 3419 exp.With, 3420 comments=comments, 3421 expressions=expressions, 3422 recursive=recursive, 3423 search=self._parse_recursive_with_search(), 3424 ) 3425 3426 def _parse_cte(self) -> t.Optional[exp.CTE]: 3427 index = self._index 3428 3429 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3430 if not alias or not alias.this: 3431 self.raise_error("Expected CTE to have alias") 3432 3433 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3434 self._retreat(index) 3435 return None 3436 3437 comments = self._prev_comments 3438 3439 if self._match_text_seq("NOT", "MATERIALIZED"): 3440 materialized = False 3441 elif self._match_text_seq("MATERIALIZED"): 3442 materialized = True 3443 else: 3444 materialized = None 3445 3446 cte = self.expression( 3447 exp.CTE, 3448 this=self._parse_wrapped(self._parse_statement), 3449 alias=alias, 3450 materialized=materialized, 3451 comments=comments, 3452 ) 3453 3454 values = cte.this 3455 if isinstance(values, exp.Values): 3456 if values.alias: 3457 cte.set("this", exp.select("*").from_(values)) 3458 else: 3459 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3460 3461 return cte 3462 3463 def _parse_table_alias( 3464 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3465 ) -> t.Optional[exp.TableAlias]: 3466 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3467 # so this section tries to parse the clause version and if it fails, it treats the token 3468 # as an identifier (alias) 3469 if self._can_parse_limit_or_offset(): 3470 return None 3471 3472 any_token = self._match(TokenType.ALIAS) 3473 alias = ( 3474 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3475 or self._parse_string_as_identifier() 3476 ) 3477 3478 index = self._index 3479 if self._match(TokenType.L_PAREN): 3480 columns = self._parse_csv(self._parse_function_parameter) 3481 self._match_r_paren() if columns else self._retreat(index) 3482 else: 3483 columns = None 3484 3485 if not alias and not columns: 3486 return None 3487 3488 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3489 3490 # We bubble up comments from the Identifier to the TableAlias 3491 if isinstance(alias, exp.Identifier): 3492 table_alias.add_comments(alias.pop_comments()) 3493 3494 return table_alias 3495 3496 def _parse_subquery( 3497 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3498 ) -> t.Optional[exp.Subquery]: 3499 if not this: 3500 return None 3501 3502 return self.expression( 3503 exp.Subquery, 3504 this=this, 3505 pivots=self._parse_pivots(), 3506 alias=self._parse_table_alias() if parse_alias else None, 3507 sample=self._parse_table_sample(), 3508 ) 3509 3510 def _implicit_unnests_to_explicit(self, this: E) -> E: 3511 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3512 3513 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3514 for i, join in enumerate(this.args.get("joins") or []): 3515 table = join.this 3516 normalized_table = table.copy() 3517 normalized_table.meta["maybe_column"] = True 3518 normalized_table = _norm(normalized_table, dialect=self.dialect) 3519 3520 if isinstance(table, exp.Table) and not join.args.get("on"): 3521 if normalized_table.parts[0].name in refs: 3522 table_as_column = table.to_column() 3523 unnest = exp.Unnest(expressions=[table_as_column]) 3524 3525 # Table.to_column creates a parent Alias node that we want to convert to 3526 # a TableAlias and attach to the Unnest, so it matches the parser's output 3527 if isinstance(table.args.get("alias"), exp.TableAlias): 3528 table_as_column.replace(table_as_column.this) 3529 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3530 3531 table.replace(unnest) 3532 3533 refs.add(normalized_table.alias_or_name) 3534 3535 return this 3536 3537 def _parse_query_modifiers( 3538 self, this: t.Optional[exp.Expression] 3539 ) -> t.Optional[exp.Expression]: 3540 if isinstance(this, self.MODIFIABLES): 3541 for join in self._parse_joins(): 3542 this.append("joins", join) 3543 for lateral in iter(self._parse_lateral, None): 3544 this.append("laterals", lateral) 3545 3546 while True: 3547 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3548 modifier_token = self._curr 3549 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3550 key, expression = parser(self) 3551 3552 if expression: 3553 if this.args.get(key): 3554 self.raise_error( 3555 f"Found multiple '{modifier_token.text.upper()}' clauses", 3556 token=modifier_token, 3557 ) 3558 3559 this.set(key, expression) 3560 if key == "limit": 3561 offset = expression.args.pop("offset", None) 3562 3563 if offset: 3564 offset = exp.Offset(expression=offset) 3565 this.set("offset", offset) 3566 3567 limit_by_expressions = expression.expressions 3568 expression.set("expressions", None) 3569 offset.set("expressions", limit_by_expressions) 3570 continue 3571 break 3572 3573 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3574 this = self._implicit_unnests_to_explicit(this) 3575 3576 return this 3577 3578 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3579 start = self._curr 3580 while self._curr: 3581 self._advance() 3582 3583 end = self._tokens[self._index - 1] 3584 return exp.Hint(expressions=[self._find_sql(start, end)]) 3585 3586 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3587 return self._parse_function_call() 3588 3589 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3590 start_index = self._index 3591 should_fallback_to_string = False 3592 3593 hints = [] 3594 try: 3595 for hint in iter( 3596 lambda: self._parse_csv( 3597 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3598 ), 3599 [], 3600 ): 3601 hints.extend(hint) 3602 except ParseError: 3603 should_fallback_to_string = True 3604 3605 if should_fallback_to_string or self._curr: 3606 self._retreat(start_index) 3607 return self._parse_hint_fallback_to_string() 3608 3609 return self.expression(exp.Hint, expressions=hints) 3610 3611 def _parse_hint(self) -> t.Optional[exp.Hint]: 3612 if self._match(TokenType.HINT) and self._prev_comments: 3613 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3614 3615 return None 3616 3617 def _parse_into(self) -> t.Optional[exp.Into]: 3618 if not self._match(TokenType.INTO): 3619 return None 3620 3621 temp = self._match(TokenType.TEMPORARY) 3622 unlogged = self._match_text_seq("UNLOGGED") 3623 self._match(TokenType.TABLE) 3624 3625 return self.expression( 3626 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3627 ) 3628 3629 def _parse_from( 3630 self, 3631 joins: bool = False, 3632 skip_from_token: bool = False, 3633 consume_pipe: bool = False, 3634 ) -> t.Optional[exp.From]: 3635 if not skip_from_token and not self._match(TokenType.FROM): 3636 return None 3637 3638 return self.expression( 3639 exp.From, 3640 comments=self._prev_comments, 3641 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3642 ) 3643 3644 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3645 return self.expression( 3646 exp.MatchRecognizeMeasure, 3647 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3648 this=self._parse_expression(), 3649 ) 3650 3651 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3652 if not self._match(TokenType.MATCH_RECOGNIZE): 3653 return None 3654 3655 self._match_l_paren() 3656 3657 partition = self._parse_partition_by() 3658 order = self._parse_order() 3659 3660 measures = ( 3661 self._parse_csv(self._parse_match_recognize_measure) 3662 if self._match_text_seq("MEASURES") 3663 else None 3664 ) 3665 3666 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3667 rows = exp.var("ONE ROW PER MATCH") 3668 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3669 text = "ALL ROWS PER MATCH" 3670 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3671 text += " SHOW EMPTY MATCHES" 3672 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3673 text += " OMIT EMPTY MATCHES" 3674 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3675 text += " WITH UNMATCHED ROWS" 3676 rows = exp.var(text) 3677 else: 3678 rows = None 3679 3680 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3681 text = "AFTER MATCH SKIP" 3682 if self._match_text_seq("PAST", "LAST", "ROW"): 3683 text += " PAST LAST ROW" 3684 elif self._match_text_seq("TO", "NEXT", "ROW"): 3685 text += " TO NEXT ROW" 3686 elif self._match_text_seq("TO", "FIRST"): 3687 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3688 elif self._match_text_seq("TO", "LAST"): 3689 text += f" TO LAST {self._advance_any().text}" # type: ignore 3690 after = exp.var(text) 3691 else: 3692 after = None 3693 3694 if self._match_text_seq("PATTERN"): 3695 self._match_l_paren() 3696 3697 if not self._curr: 3698 self.raise_error("Expecting )", self._curr) 3699 3700 paren = 1 3701 start = self._curr 3702 3703 while self._curr and paren > 0: 3704 if self._curr.token_type == TokenType.L_PAREN: 3705 paren += 1 3706 if self._curr.token_type == TokenType.R_PAREN: 3707 paren -= 1 3708 3709 end = self._prev 3710 self._advance() 3711 3712 if paren > 0: 3713 self.raise_error("Expecting )", self._curr) 3714 3715 pattern = exp.var(self._find_sql(start, end)) 3716 else: 3717 pattern = None 3718 3719 define = ( 3720 self._parse_csv(self._parse_name_as_expression) 3721 if self._match_text_seq("DEFINE") 3722 else None 3723 ) 3724 3725 self._match_r_paren() 3726 3727 return self.expression( 3728 exp.MatchRecognize, 3729 partition_by=partition, 3730 order=order, 3731 measures=measures, 3732 rows=rows, 3733 after=after, 3734 pattern=pattern, 3735 define=define, 3736 alias=self._parse_table_alias(), 3737 ) 3738 3739 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3740 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3741 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3742 cross_apply = False 3743 3744 if cross_apply is not None: 3745 this = self._parse_select(table=True) 3746 view = None 3747 outer = None 3748 elif self._match(TokenType.LATERAL): 3749 this = self._parse_select(table=True) 3750 view = self._match(TokenType.VIEW) 3751 outer = self._match(TokenType.OUTER) 3752 else: 3753 return None 3754 3755 if not this: 3756 this = ( 3757 self._parse_unnest() 3758 or self._parse_function() 3759 or self._parse_id_var(any_token=False) 3760 ) 3761 3762 while self._match(TokenType.DOT): 3763 this = exp.Dot( 3764 this=this, 3765 expression=self._parse_function() or self._parse_id_var(any_token=False), 3766 ) 3767 3768 ordinality: t.Optional[bool] = None 3769 3770 if view: 3771 table = self._parse_id_var(any_token=False) 3772 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3773 table_alias: t.Optional[exp.TableAlias] = self.expression( 3774 exp.TableAlias, this=table, columns=columns 3775 ) 3776 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3777 # We move the alias from the lateral's child node to the lateral itself 3778 table_alias = this.args["alias"].pop() 3779 else: 3780 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3781 table_alias = self._parse_table_alias() 3782 3783 return self.expression( 3784 exp.Lateral, 3785 this=this, 3786 view=view, 3787 outer=outer, 3788 alias=table_alias, 3789 cross_apply=cross_apply, 3790 ordinality=ordinality, 3791 ) 3792 3793 def _parse_join_parts( 3794 self, 3795 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3796 return ( 3797 self._match_set(self.JOIN_METHODS) and self._prev, 3798 self._match_set(self.JOIN_SIDES) and self._prev, 3799 self._match_set(self.JOIN_KINDS) and self._prev, 3800 ) 3801 3802 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3803 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3804 this = self._parse_column() 3805 if isinstance(this, exp.Column): 3806 return this.this 3807 return this 3808 3809 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3810 3811 def _parse_join( 3812 self, skip_join_token: bool = False, parse_bracket: bool = False 3813 ) -> t.Optional[exp.Join]: 3814 if self._match(TokenType.COMMA): 3815 table = self._try_parse(self._parse_table) 3816 cross_join = self.expression(exp.Join, this=table) if table else None 3817 3818 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3819 cross_join.set("kind", "CROSS") 3820 3821 return cross_join 3822 3823 index = self._index 3824 method, side, kind = self._parse_join_parts() 3825 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3826 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3827 join_comments = self._prev_comments 3828 3829 if not skip_join_token and not join: 3830 self._retreat(index) 3831 kind = None 3832 method = None 3833 side = None 3834 3835 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3836 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3837 3838 if not skip_join_token and not join and not outer_apply and not cross_apply: 3839 return None 3840 3841 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3842 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3843 kwargs["expressions"] = self._parse_csv( 3844 lambda: self._parse_table(parse_bracket=parse_bracket) 3845 ) 3846 3847 if method: 3848 kwargs["method"] = method.text 3849 if side: 3850 kwargs["side"] = side.text 3851 if kind: 3852 kwargs["kind"] = kind.text 3853 if hint: 3854 kwargs["hint"] = hint 3855 3856 if self._match(TokenType.MATCH_CONDITION): 3857 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3858 3859 if self._match(TokenType.ON): 3860 kwargs["on"] = self._parse_assignment() 3861 elif self._match(TokenType.USING): 3862 kwargs["using"] = self._parse_using_identifiers() 3863 elif ( 3864 not method 3865 and not (outer_apply or cross_apply) 3866 and not isinstance(kwargs["this"], exp.Unnest) 3867 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3868 ): 3869 index = self._index 3870 joins: t.Optional[list] = list(self._parse_joins()) 3871 3872 if joins and self._match(TokenType.ON): 3873 kwargs["on"] = self._parse_assignment() 3874 elif joins and self._match(TokenType.USING): 3875 kwargs["using"] = self._parse_using_identifiers() 3876 else: 3877 joins = None 3878 self._retreat(index) 3879 3880 kwargs["this"].set("joins", joins if joins else None) 3881 3882 kwargs["pivots"] = self._parse_pivots() 3883 3884 comments = [c for token in (method, side, kind) if token for c in token.comments] 3885 comments = (join_comments or []) + comments 3886 3887 if ( 3888 self.ADD_JOIN_ON_TRUE 3889 and not kwargs.get("on") 3890 and not kwargs.get("using") 3891 and not kwargs.get("method") 3892 and kwargs.get("kind") in (None, "INNER", "OUTER") 3893 ): 3894 kwargs["on"] = exp.true() 3895 3896 return self.expression(exp.Join, comments=comments, **kwargs) 3897 3898 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3899 this = self._parse_assignment() 3900 3901 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3902 return this 3903 3904 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3905 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3906 3907 return this 3908 3909 def _parse_index_params(self) -> exp.IndexParameters: 3910 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3911 3912 if self._match(TokenType.L_PAREN, advance=False): 3913 columns = self._parse_wrapped_csv(self._parse_with_operator) 3914 else: 3915 columns = None 3916 3917 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3918 partition_by = self._parse_partition_by() 3919 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3920 tablespace = ( 3921 self._parse_var(any_token=True) 3922 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3923 else None 3924 ) 3925 where = self._parse_where() 3926 3927 on = self._parse_field() if self._match(TokenType.ON) else None 3928 3929 return self.expression( 3930 exp.IndexParameters, 3931 using=using, 3932 columns=columns, 3933 include=include, 3934 partition_by=partition_by, 3935 where=where, 3936 with_storage=with_storage, 3937 tablespace=tablespace, 3938 on=on, 3939 ) 3940 3941 def _parse_index( 3942 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3943 ) -> t.Optional[exp.Index]: 3944 if index or anonymous: 3945 unique = None 3946 primary = None 3947 amp = None 3948 3949 self._match(TokenType.ON) 3950 self._match(TokenType.TABLE) # hive 3951 table = self._parse_table_parts(schema=True) 3952 else: 3953 unique = self._match(TokenType.UNIQUE) 3954 primary = self._match_text_seq("PRIMARY") 3955 amp = self._match_text_seq("AMP") 3956 3957 if not self._match(TokenType.INDEX): 3958 return None 3959 3960 index = self._parse_id_var() 3961 table = None 3962 3963 params = self._parse_index_params() 3964 3965 return self.expression( 3966 exp.Index, 3967 this=index, 3968 table=table, 3969 unique=unique, 3970 primary=primary, 3971 amp=amp, 3972 params=params, 3973 ) 3974 3975 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3976 hints: t.List[exp.Expression] = [] 3977 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3978 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3979 hints.append( 3980 self.expression( 3981 exp.WithTableHint, 3982 expressions=self._parse_csv( 3983 lambda: self._parse_function() or self._parse_var(any_token=True) 3984 ), 3985 ) 3986 ) 3987 self._match_r_paren() 3988 else: 3989 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3990 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3991 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3992 3993 self._match_set((TokenType.INDEX, TokenType.KEY)) 3994 if self._match(TokenType.FOR): 3995 hint.set("target", self._advance_any() and self._prev.text.upper()) 3996 3997 hint.set("expressions", self._parse_wrapped_id_vars()) 3998 hints.append(hint) 3999 4000 return hints or None 4001 4002 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4003 return ( 4004 (not schema and self._parse_function(optional_parens=False)) 4005 or self._parse_id_var(any_token=False) 4006 or self._parse_string_as_identifier() 4007 or self._parse_placeholder() 4008 ) 4009 4010 def _parse_table_parts( 4011 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4012 ) -> exp.Table: 4013 catalog = None 4014 db = None 4015 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4016 4017 while self._match(TokenType.DOT): 4018 if catalog: 4019 # This allows nesting the table in arbitrarily many dot expressions if needed 4020 table = self.expression( 4021 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4022 ) 4023 else: 4024 catalog = db 4025 db = table 4026 # "" used for tsql FROM a..b case 4027 table = self._parse_table_part(schema=schema) or "" 4028 4029 if ( 4030 wildcard 4031 and self._is_connected() 4032 and (isinstance(table, exp.Identifier) or not table) 4033 and self._match(TokenType.STAR) 4034 ): 4035 if isinstance(table, exp.Identifier): 4036 table.args["this"] += "*" 4037 else: 4038 table = exp.Identifier(this="*") 4039 4040 # We bubble up comments from the Identifier to the Table 4041 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4042 4043 if is_db_reference: 4044 catalog = db 4045 db = table 4046 table = None 4047 4048 if not table and not is_db_reference: 4049 self.raise_error(f"Expected table name but got {self._curr}") 4050 if not db and is_db_reference: 4051 self.raise_error(f"Expected database name but got {self._curr}") 4052 4053 table = self.expression( 4054 exp.Table, 4055 comments=comments, 4056 this=table, 4057 db=db, 4058 catalog=catalog, 4059 ) 4060 4061 changes = self._parse_changes() 4062 if changes: 4063 table.set("changes", changes) 4064 4065 at_before = self._parse_historical_data() 4066 if at_before: 4067 table.set("when", at_before) 4068 4069 pivots = self._parse_pivots() 4070 if pivots: 4071 table.set("pivots", pivots) 4072 4073 return table 4074 4075 def _parse_table( 4076 self, 4077 schema: bool = False, 4078 joins: bool = False, 4079 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4080 parse_bracket: bool = False, 4081 is_db_reference: bool = False, 4082 parse_partition: bool = False, 4083 consume_pipe: bool = False, 4084 ) -> t.Optional[exp.Expression]: 4085 lateral = self._parse_lateral() 4086 if lateral: 4087 return lateral 4088 4089 unnest = self._parse_unnest() 4090 if unnest: 4091 return unnest 4092 4093 values = self._parse_derived_table_values() 4094 if values: 4095 return values 4096 4097 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4098 if subquery: 4099 if not subquery.args.get("pivots"): 4100 subquery.set("pivots", self._parse_pivots()) 4101 return subquery 4102 4103 bracket = parse_bracket and self._parse_bracket(None) 4104 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4105 4106 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4107 self._parse_table 4108 ) 4109 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4110 4111 only = self._match(TokenType.ONLY) 4112 4113 this = t.cast( 4114 exp.Expression, 4115 bracket 4116 or rows_from 4117 or self._parse_bracket( 4118 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4119 ), 4120 ) 4121 4122 if only: 4123 this.set("only", only) 4124 4125 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4126 self._match_text_seq("*") 4127 4128 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4129 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4130 this.set("partition", self._parse_partition()) 4131 4132 if schema: 4133 return self._parse_schema(this=this) 4134 4135 version = self._parse_version() 4136 4137 if version: 4138 this.set("version", version) 4139 4140 if self.dialect.ALIAS_POST_TABLESAMPLE: 4141 this.set("sample", self._parse_table_sample()) 4142 4143 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4144 if alias: 4145 this.set("alias", alias) 4146 4147 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4148 return self.expression( 4149 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4150 ) 4151 4152 this.set("hints", self._parse_table_hints()) 4153 4154 if not this.args.get("pivots"): 4155 this.set("pivots", self._parse_pivots()) 4156 4157 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4158 this.set("sample", self._parse_table_sample()) 4159 4160 if joins: 4161 for join in self._parse_joins(): 4162 this.append("joins", join) 4163 4164 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4165 this.set("ordinality", True) 4166 this.set("alias", self._parse_table_alias()) 4167 4168 return this 4169 4170 def _parse_version(self) -> t.Optional[exp.Version]: 4171 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4172 this = "TIMESTAMP" 4173 elif self._match(TokenType.VERSION_SNAPSHOT): 4174 this = "VERSION" 4175 else: 4176 return None 4177 4178 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4179 kind = self._prev.text.upper() 4180 start = self._parse_bitwise() 4181 self._match_texts(("TO", "AND")) 4182 end = self._parse_bitwise() 4183 expression: t.Optional[exp.Expression] = self.expression( 4184 exp.Tuple, expressions=[start, end] 4185 ) 4186 elif self._match_text_seq("CONTAINED", "IN"): 4187 kind = "CONTAINED IN" 4188 expression = self.expression( 4189 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4190 ) 4191 elif self._match(TokenType.ALL): 4192 kind = "ALL" 4193 expression = None 4194 else: 4195 self._match_text_seq("AS", "OF") 4196 kind = "AS OF" 4197 expression = self._parse_type() 4198 4199 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4200 4201 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4202 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4203 index = self._index 4204 historical_data = None 4205 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4206 this = self._prev.text.upper() 4207 kind = ( 4208 self._match(TokenType.L_PAREN) 4209 and self._match_texts(self.HISTORICAL_DATA_KIND) 4210 and self._prev.text.upper() 4211 ) 4212 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4213 4214 if expression: 4215 self._match_r_paren() 4216 historical_data = self.expression( 4217 exp.HistoricalData, this=this, kind=kind, expression=expression 4218 ) 4219 else: 4220 self._retreat(index) 4221 4222 return historical_data 4223 4224 def _parse_changes(self) -> t.Optional[exp.Changes]: 4225 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4226 return None 4227 4228 information = self._parse_var(any_token=True) 4229 self._match_r_paren() 4230 4231 return self.expression( 4232 exp.Changes, 4233 information=information, 4234 at_before=self._parse_historical_data(), 4235 end=self._parse_historical_data(), 4236 ) 4237 4238 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4239 if not self._match(TokenType.UNNEST): 4240 return None 4241 4242 expressions = self._parse_wrapped_csv(self._parse_equality) 4243 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4244 4245 alias = self._parse_table_alias() if with_alias else None 4246 4247 if alias: 4248 if self.dialect.UNNEST_COLUMN_ONLY: 4249 if alias.args.get("columns"): 4250 self.raise_error("Unexpected extra column alias in unnest.") 4251 4252 alias.set("columns", [alias.this]) 4253 alias.set("this", None) 4254 4255 columns = alias.args.get("columns") or [] 4256 if offset and len(expressions) < len(columns): 4257 offset = columns.pop() 4258 4259 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4260 self._match(TokenType.ALIAS) 4261 offset = self._parse_id_var( 4262 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4263 ) or exp.to_identifier("offset") 4264 4265 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4266 4267 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4268 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4269 if not is_derived and not ( 4270 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4271 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4272 ): 4273 return None 4274 4275 expressions = self._parse_csv(self._parse_value) 4276 alias = self._parse_table_alias() 4277 4278 if is_derived: 4279 self._match_r_paren() 4280 4281 return self.expression( 4282 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4283 ) 4284 4285 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4286 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4287 as_modifier and self._match_text_seq("USING", "SAMPLE") 4288 ): 4289 return None 4290 4291 bucket_numerator = None 4292 bucket_denominator = None 4293 bucket_field = None 4294 percent = None 4295 size = None 4296 seed = None 4297 4298 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4299 matched_l_paren = self._match(TokenType.L_PAREN) 4300 4301 if self.TABLESAMPLE_CSV: 4302 num = None 4303 expressions = self._parse_csv(self._parse_primary) 4304 else: 4305 expressions = None 4306 num = ( 4307 self._parse_factor() 4308 if self._match(TokenType.NUMBER, advance=False) 4309 else self._parse_primary() or self._parse_placeholder() 4310 ) 4311 4312 if self._match_text_seq("BUCKET"): 4313 bucket_numerator = self._parse_number() 4314 self._match_text_seq("OUT", "OF") 4315 bucket_denominator = bucket_denominator = self._parse_number() 4316 self._match(TokenType.ON) 4317 bucket_field = self._parse_field() 4318 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4319 percent = num 4320 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4321 size = num 4322 else: 4323 percent = num 4324 4325 if matched_l_paren: 4326 self._match_r_paren() 4327 4328 if self._match(TokenType.L_PAREN): 4329 method = self._parse_var(upper=True) 4330 seed = self._match(TokenType.COMMA) and self._parse_number() 4331 self._match_r_paren() 4332 elif self._match_texts(("SEED", "REPEATABLE")): 4333 seed = self._parse_wrapped(self._parse_number) 4334 4335 if not method and self.DEFAULT_SAMPLING_METHOD: 4336 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4337 4338 return self.expression( 4339 exp.TableSample, 4340 expressions=expressions, 4341 method=method, 4342 bucket_numerator=bucket_numerator, 4343 bucket_denominator=bucket_denominator, 4344 bucket_field=bucket_field, 4345 percent=percent, 4346 size=size, 4347 seed=seed, 4348 ) 4349 4350 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4351 return list(iter(self._parse_pivot, None)) or None 4352 4353 def _parse_joins(self) -> t.Iterator[exp.Join]: 4354 return iter(self._parse_join, None) 4355 4356 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4357 if not self._match(TokenType.INTO): 4358 return None 4359 4360 return self.expression( 4361 exp.UnpivotColumns, 4362 this=self._match_text_seq("NAME") and self._parse_column(), 4363 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4364 ) 4365 4366 # https://duckdb.org/docs/sql/statements/pivot 4367 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4368 def _parse_on() -> t.Optional[exp.Expression]: 4369 this = self._parse_bitwise() 4370 4371 if self._match(TokenType.IN): 4372 # PIVOT ... ON col IN (row_val1, row_val2) 4373 return self._parse_in(this) 4374 if self._match(TokenType.ALIAS, advance=False): 4375 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4376 return self._parse_alias(this) 4377 4378 return this 4379 4380 this = self._parse_table() 4381 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4382 into = self._parse_unpivot_columns() 4383 using = self._match(TokenType.USING) and self._parse_csv( 4384 lambda: self._parse_alias(self._parse_function()) 4385 ) 4386 group = self._parse_group() 4387 4388 return self.expression( 4389 exp.Pivot, 4390 this=this, 4391 expressions=expressions, 4392 using=using, 4393 group=group, 4394 unpivot=is_unpivot, 4395 into=into, 4396 ) 4397 4398 def _parse_pivot_in(self) -> exp.In: 4399 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4400 this = self._parse_select_or_expression() 4401 4402 self._match(TokenType.ALIAS) 4403 alias = self._parse_bitwise() 4404 if alias: 4405 if isinstance(alias, exp.Column) and not alias.db: 4406 alias = alias.this 4407 return self.expression(exp.PivotAlias, this=this, alias=alias) 4408 4409 return this 4410 4411 value = self._parse_column() 4412 4413 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4414 self.raise_error("Expecting IN (") 4415 4416 if self._match(TokenType.ANY): 4417 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4418 else: 4419 exprs = self._parse_csv(_parse_aliased_expression) 4420 4421 self._match_r_paren() 4422 return self.expression(exp.In, this=value, expressions=exprs) 4423 4424 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4425 func = self._parse_function() 4426 if not func: 4427 if self._prev and self._prev.token_type == TokenType.COMMA: 4428 return None 4429 self.raise_error("Expecting an aggregation function in PIVOT") 4430 4431 return self._parse_alias(func) 4432 4433 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4434 index = self._index 4435 include_nulls = None 4436 4437 if self._match(TokenType.PIVOT): 4438 unpivot = False 4439 elif self._match(TokenType.UNPIVOT): 4440 unpivot = True 4441 4442 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4443 if self._match_text_seq("INCLUDE", "NULLS"): 4444 include_nulls = True 4445 elif self._match_text_seq("EXCLUDE", "NULLS"): 4446 include_nulls = False 4447 else: 4448 return None 4449 4450 expressions = [] 4451 4452 if not self._match(TokenType.L_PAREN): 4453 self._retreat(index) 4454 return None 4455 4456 if unpivot: 4457 expressions = self._parse_csv(self._parse_column) 4458 else: 4459 expressions = self._parse_csv(self._parse_pivot_aggregation) 4460 4461 if not expressions: 4462 self.raise_error("Failed to parse PIVOT's aggregation list") 4463 4464 if not self._match(TokenType.FOR): 4465 self.raise_error("Expecting FOR") 4466 4467 fields = [] 4468 while True: 4469 field = self._try_parse(self._parse_pivot_in) 4470 if not field: 4471 break 4472 fields.append(field) 4473 4474 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4475 self._parse_bitwise 4476 ) 4477 4478 group = self._parse_group() 4479 4480 self._match_r_paren() 4481 4482 pivot = self.expression( 4483 exp.Pivot, 4484 expressions=expressions, 4485 fields=fields, 4486 unpivot=unpivot, 4487 include_nulls=include_nulls, 4488 default_on_null=default_on_null, 4489 group=group, 4490 ) 4491 4492 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4493 pivot.set("alias", self._parse_table_alias()) 4494 4495 if not unpivot: 4496 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4497 4498 columns: t.List[exp.Expression] = [] 4499 all_fields = [] 4500 for pivot_field in pivot.fields: 4501 pivot_field_expressions = pivot_field.expressions 4502 4503 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4504 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4505 continue 4506 4507 all_fields.append( 4508 [ 4509 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4510 for fld in pivot_field_expressions 4511 ] 4512 ) 4513 4514 if all_fields: 4515 if names: 4516 all_fields.append(names) 4517 4518 # Generate all possible combinations of the pivot columns 4519 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4520 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4521 for fld_parts_tuple in itertools.product(*all_fields): 4522 fld_parts = list(fld_parts_tuple) 4523 4524 if names and self.PREFIXED_PIVOT_COLUMNS: 4525 # Move the "name" to the front of the list 4526 fld_parts.insert(0, fld_parts.pop(-1)) 4527 4528 columns.append(exp.to_identifier("_".join(fld_parts))) 4529 4530 pivot.set("columns", columns) 4531 4532 return pivot 4533 4534 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4535 return [agg.alias for agg in aggregations if agg.alias] 4536 4537 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4538 if not skip_where_token and not self._match(TokenType.PREWHERE): 4539 return None 4540 4541 return self.expression( 4542 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4543 ) 4544 4545 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4546 if not skip_where_token and not self._match(TokenType.WHERE): 4547 return None 4548 4549 return self.expression( 4550 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4551 ) 4552 4553 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4554 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4555 return None 4556 comments = self._prev_comments 4557 4558 elements: t.Dict[str, t.Any] = defaultdict(list) 4559 4560 if self._match(TokenType.ALL): 4561 elements["all"] = True 4562 elif self._match(TokenType.DISTINCT): 4563 elements["all"] = False 4564 4565 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4566 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4567 4568 while True: 4569 index = self._index 4570 4571 elements["expressions"].extend( 4572 self._parse_csv( 4573 lambda: None 4574 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4575 else self._parse_assignment() 4576 ) 4577 ) 4578 4579 before_with_index = self._index 4580 with_prefix = self._match(TokenType.WITH) 4581 4582 if self._match(TokenType.ROLLUP): 4583 elements["rollup"].append( 4584 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4585 ) 4586 elif self._match(TokenType.CUBE): 4587 elements["cube"].append( 4588 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4589 ) 4590 elif self._match(TokenType.GROUPING_SETS): 4591 elements["grouping_sets"].append( 4592 self.expression( 4593 exp.GroupingSets, 4594 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4595 ) 4596 ) 4597 elif self._match_text_seq("TOTALS"): 4598 elements["totals"] = True # type: ignore 4599 4600 if before_with_index <= self._index <= before_with_index + 1: 4601 self._retreat(before_with_index) 4602 break 4603 4604 if index == self._index: 4605 break 4606 4607 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4608 4609 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4610 return self.expression( 4611 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4612 ) 4613 4614 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4615 if self._match(TokenType.L_PAREN): 4616 grouping_set = self._parse_csv(self._parse_column) 4617 self._match_r_paren() 4618 return self.expression(exp.Tuple, expressions=grouping_set) 4619 4620 return self._parse_column() 4621 4622 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4623 if not skip_having_token and not self._match(TokenType.HAVING): 4624 return None 4625 return self.expression( 4626 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4627 ) 4628 4629 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4630 if not self._match(TokenType.QUALIFY): 4631 return None 4632 return self.expression(exp.Qualify, this=self._parse_assignment()) 4633 4634 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4635 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4636 exp.Prior, this=self._parse_bitwise() 4637 ) 4638 connect = self._parse_assignment() 4639 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4640 return connect 4641 4642 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4643 if skip_start_token: 4644 start = None 4645 elif self._match(TokenType.START_WITH): 4646 start = self._parse_assignment() 4647 else: 4648 return None 4649 4650 self._match(TokenType.CONNECT_BY) 4651 nocycle = self._match_text_seq("NOCYCLE") 4652 connect = self._parse_connect_with_prior() 4653 4654 if not start and self._match(TokenType.START_WITH): 4655 start = self._parse_assignment() 4656 4657 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4658 4659 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4660 this = self._parse_id_var(any_token=True) 4661 if self._match(TokenType.ALIAS): 4662 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4663 return this 4664 4665 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4666 if self._match_text_seq("INTERPOLATE"): 4667 return self._parse_wrapped_csv(self._parse_name_as_expression) 4668 return None 4669 4670 def _parse_order( 4671 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4672 ) -> t.Optional[exp.Expression]: 4673 siblings = None 4674 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4675 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4676 return this 4677 4678 siblings = True 4679 4680 return self.expression( 4681 exp.Order, 4682 comments=self._prev_comments, 4683 this=this, 4684 expressions=self._parse_csv(self._parse_ordered), 4685 siblings=siblings, 4686 ) 4687 4688 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4689 if not self._match(token): 4690 return None 4691 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4692 4693 def _parse_ordered( 4694 self, parse_method: t.Optional[t.Callable] = None 4695 ) -> t.Optional[exp.Ordered]: 4696 this = parse_method() if parse_method else self._parse_assignment() 4697 if not this: 4698 return None 4699 4700 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4701 this = exp.var("ALL") 4702 4703 asc = self._match(TokenType.ASC) 4704 desc = self._match(TokenType.DESC) or (asc and False) 4705 4706 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4707 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4708 4709 nulls_first = is_nulls_first or False 4710 explicitly_null_ordered = is_nulls_first or is_nulls_last 4711 4712 if ( 4713 not explicitly_null_ordered 4714 and ( 4715 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4716 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4717 ) 4718 and self.dialect.NULL_ORDERING != "nulls_are_last" 4719 ): 4720 nulls_first = True 4721 4722 if self._match_text_seq("WITH", "FILL"): 4723 with_fill = self.expression( 4724 exp.WithFill, 4725 **{ # type: ignore 4726 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4727 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4728 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4729 "interpolate": self._parse_interpolate(), 4730 }, 4731 ) 4732 else: 4733 with_fill = None 4734 4735 return self.expression( 4736 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4737 ) 4738 4739 def _parse_limit_options(self) -> exp.LimitOptions: 4740 percent = self._match(TokenType.PERCENT) 4741 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4742 self._match_text_seq("ONLY") 4743 with_ties = self._match_text_seq("WITH", "TIES") 4744 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4745 4746 def _parse_limit( 4747 self, 4748 this: t.Optional[exp.Expression] = None, 4749 top: bool = False, 4750 skip_limit_token: bool = False, 4751 ) -> t.Optional[exp.Expression]: 4752 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4753 comments = self._prev_comments 4754 if top: 4755 limit_paren = self._match(TokenType.L_PAREN) 4756 expression = self._parse_term() if limit_paren else self._parse_number() 4757 4758 if limit_paren: 4759 self._match_r_paren() 4760 4761 limit_options = self._parse_limit_options() 4762 else: 4763 limit_options = None 4764 expression = self._parse_term() 4765 4766 if self._match(TokenType.COMMA): 4767 offset = expression 4768 expression = self._parse_term() 4769 else: 4770 offset = None 4771 4772 limit_exp = self.expression( 4773 exp.Limit, 4774 this=this, 4775 expression=expression, 4776 offset=offset, 4777 comments=comments, 4778 limit_options=limit_options, 4779 expressions=self._parse_limit_by(), 4780 ) 4781 4782 return limit_exp 4783 4784 if self._match(TokenType.FETCH): 4785 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4786 direction = self._prev.text.upper() if direction else "FIRST" 4787 4788 count = self._parse_field(tokens=self.FETCH_TOKENS) 4789 4790 return self.expression( 4791 exp.Fetch, 4792 direction=direction, 4793 count=count, 4794 limit_options=self._parse_limit_options(), 4795 ) 4796 4797 return this 4798 4799 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4800 if not self._match(TokenType.OFFSET): 4801 return this 4802 4803 count = self._parse_term() 4804 self._match_set((TokenType.ROW, TokenType.ROWS)) 4805 4806 return self.expression( 4807 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4808 ) 4809 4810 def _can_parse_limit_or_offset(self) -> bool: 4811 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4812 return False 4813 4814 index = self._index 4815 result = bool( 4816 self._try_parse(self._parse_limit, retreat=True) 4817 or self._try_parse(self._parse_offset, retreat=True) 4818 ) 4819 self._retreat(index) 4820 return result 4821 4822 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4823 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4824 4825 def _parse_locks(self) -> t.List[exp.Lock]: 4826 locks = [] 4827 while True: 4828 update, key = None, None 4829 if self._match_text_seq("FOR", "UPDATE"): 4830 update = True 4831 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4832 "LOCK", "IN", "SHARE", "MODE" 4833 ): 4834 update = False 4835 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4836 update, key = False, True 4837 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4838 update, key = True, True 4839 else: 4840 break 4841 4842 expressions = None 4843 if self._match_text_seq("OF"): 4844 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4845 4846 wait: t.Optional[bool | exp.Expression] = None 4847 if self._match_text_seq("NOWAIT"): 4848 wait = True 4849 elif self._match_text_seq("WAIT"): 4850 wait = self._parse_primary() 4851 elif self._match_text_seq("SKIP", "LOCKED"): 4852 wait = False 4853 4854 locks.append( 4855 self.expression( 4856 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4857 ) 4858 ) 4859 4860 return locks 4861 4862 def parse_set_operation( 4863 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4864 ) -> t.Optional[exp.Expression]: 4865 start = self._index 4866 _, side_token, kind_token = self._parse_join_parts() 4867 4868 side = side_token.text if side_token else None 4869 kind = kind_token.text if kind_token else None 4870 4871 if not self._match_set(self.SET_OPERATIONS): 4872 self._retreat(start) 4873 return None 4874 4875 token_type = self._prev.token_type 4876 4877 if token_type == TokenType.UNION: 4878 operation: t.Type[exp.SetOperation] = exp.Union 4879 elif token_type == TokenType.EXCEPT: 4880 operation = exp.Except 4881 else: 4882 operation = exp.Intersect 4883 4884 comments = self._prev.comments 4885 4886 if self._match(TokenType.DISTINCT): 4887 distinct: t.Optional[bool] = True 4888 elif self._match(TokenType.ALL): 4889 distinct = False 4890 else: 4891 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4892 if distinct is None: 4893 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4894 4895 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4896 "STRICT", "CORRESPONDING" 4897 ) 4898 if self._match_text_seq("CORRESPONDING"): 4899 by_name = True 4900 if not side and not kind: 4901 kind = "INNER" 4902 4903 on_column_list = None 4904 if by_name and self._match_texts(("ON", "BY")): 4905 on_column_list = self._parse_wrapped_csv(self._parse_column) 4906 4907 expression = self._parse_select( 4908 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4909 ) 4910 4911 return self.expression( 4912 operation, 4913 comments=comments, 4914 this=this, 4915 distinct=distinct, 4916 by_name=by_name, 4917 expression=expression, 4918 side=side, 4919 kind=kind, 4920 on=on_column_list, 4921 ) 4922 4923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4924 while this: 4925 setop = self.parse_set_operation(this) 4926 if not setop: 4927 break 4928 this = setop 4929 4930 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4931 expression = this.expression 4932 4933 if expression: 4934 for arg in self.SET_OP_MODIFIERS: 4935 expr = expression.args.get(arg) 4936 if expr: 4937 this.set(arg, expr.pop()) 4938 4939 return this 4940 4941 def _parse_expression(self) -> t.Optional[exp.Expression]: 4942 return self._parse_alias(self._parse_assignment()) 4943 4944 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4945 this = self._parse_disjunction() 4946 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4947 # This allows us to parse <non-identifier token> := <expr> 4948 this = exp.column( 4949 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4950 ) 4951 4952 while self._match_set(self.ASSIGNMENT): 4953 if isinstance(this, exp.Column) and len(this.parts) == 1: 4954 this = this.this 4955 4956 this = self.expression( 4957 self.ASSIGNMENT[self._prev.token_type], 4958 this=this, 4959 comments=self._prev_comments, 4960 expression=self._parse_assignment(), 4961 ) 4962 4963 return this 4964 4965 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4966 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4967 4968 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4969 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4970 4971 def _parse_equality(self) -> t.Optional[exp.Expression]: 4972 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4973 4974 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4975 return self._parse_tokens(self._parse_range, self.COMPARISON) 4976 4977 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4978 this = this or self._parse_bitwise() 4979 negate = self._match(TokenType.NOT) 4980 4981 if self._match_set(self.RANGE_PARSERS): 4982 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4983 if not expression: 4984 return this 4985 4986 this = expression 4987 elif self._match(TokenType.ISNULL): 4988 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4989 4990 # Postgres supports ISNULL and NOTNULL for conditions. 4991 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4992 if self._match(TokenType.NOTNULL): 4993 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4994 this = self.expression(exp.Not, this=this) 4995 4996 if negate: 4997 this = self._negate_range(this) 4998 4999 if self._match(TokenType.IS): 5000 this = self._parse_is(this) 5001 5002 return this 5003 5004 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5005 if not this: 5006 return this 5007 5008 return self.expression(exp.Not, this=this) 5009 5010 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5011 index = self._index - 1 5012 negate = self._match(TokenType.NOT) 5013 5014 if self._match_text_seq("DISTINCT", "FROM"): 5015 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5016 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5017 5018 if self._match(TokenType.JSON): 5019 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5020 5021 if self._match_text_seq("WITH"): 5022 _with = True 5023 elif self._match_text_seq("WITHOUT"): 5024 _with = False 5025 else: 5026 _with = None 5027 5028 unique = self._match(TokenType.UNIQUE) 5029 self._match_text_seq("KEYS") 5030 expression: t.Optional[exp.Expression] = self.expression( 5031 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5032 ) 5033 else: 5034 expression = self._parse_primary() or self._parse_null() 5035 if not expression: 5036 self._retreat(index) 5037 return None 5038 5039 this = self.expression(exp.Is, this=this, expression=expression) 5040 return self.expression(exp.Not, this=this) if negate else this 5041 5042 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5043 unnest = self._parse_unnest(with_alias=False) 5044 if unnest: 5045 this = self.expression(exp.In, this=this, unnest=unnest) 5046 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5047 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5048 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5049 5050 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5051 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5052 else: 5053 this = self.expression(exp.In, this=this, expressions=expressions) 5054 5055 if matched_l_paren: 5056 self._match_r_paren(this) 5057 elif not self._match(TokenType.R_BRACKET, expression=this): 5058 self.raise_error("Expecting ]") 5059 else: 5060 this = self.expression(exp.In, this=this, field=self._parse_column()) 5061 5062 return this 5063 5064 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5065 symmetric = None 5066 if self._match_text_seq("SYMMETRIC"): 5067 symmetric = True 5068 elif self._match_text_seq("ASYMMETRIC"): 5069 symmetric = False 5070 5071 low = self._parse_bitwise() 5072 self._match(TokenType.AND) 5073 high = self._parse_bitwise() 5074 5075 return self.expression( 5076 exp.Between, 5077 this=this, 5078 low=low, 5079 high=high, 5080 symmetric=symmetric, 5081 ) 5082 5083 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5084 if not self._match(TokenType.ESCAPE): 5085 return this 5086 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5087 5088 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5089 index = self._index 5090 5091 if not self._match(TokenType.INTERVAL) and match_interval: 5092 return None 5093 5094 if self._match(TokenType.STRING, advance=False): 5095 this = self._parse_primary() 5096 else: 5097 this = self._parse_term() 5098 5099 if not this or ( 5100 isinstance(this, exp.Column) 5101 and not this.table 5102 and not this.this.quoted 5103 and this.name.upper() in ("IS", "ROWS") 5104 ): 5105 self._retreat(index) 5106 return None 5107 5108 unit = self._parse_function() or ( 5109 not self._match(TokenType.ALIAS, advance=False) 5110 and self._parse_var(any_token=True, upper=True) 5111 ) 5112 5113 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5114 # each INTERVAL expression into this canonical form so it's easy to transpile 5115 if this and this.is_number: 5116 this = exp.Literal.string(this.to_py()) 5117 elif this and this.is_string: 5118 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5119 if parts and unit: 5120 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5121 unit = None 5122 self._retreat(self._index - 1) 5123 5124 if len(parts) == 1: 5125 this = exp.Literal.string(parts[0][0]) 5126 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5127 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5128 unit = self.expression( 5129 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5130 ) 5131 5132 interval = self.expression(exp.Interval, this=this, unit=unit) 5133 5134 index = self._index 5135 self._match(TokenType.PLUS) 5136 5137 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5138 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5139 return self.expression( 5140 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5141 ) 5142 5143 self._retreat(index) 5144 return interval 5145 5146 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5147 this = self._parse_term() 5148 5149 while True: 5150 if self._match_set(self.BITWISE): 5151 this = self.expression( 5152 self.BITWISE[self._prev.token_type], 5153 this=this, 5154 expression=self._parse_term(), 5155 ) 5156 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5157 this = self.expression( 5158 exp.DPipe, 5159 this=this, 5160 expression=self._parse_term(), 5161 safe=not self.dialect.STRICT_STRING_CONCAT, 5162 ) 5163 elif self._match(TokenType.DQMARK): 5164 this = self.expression( 5165 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5166 ) 5167 elif self._match_pair(TokenType.LT, TokenType.LT): 5168 this = self.expression( 5169 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5170 ) 5171 elif self._match_pair(TokenType.GT, TokenType.GT): 5172 this = self.expression( 5173 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5174 ) 5175 else: 5176 break 5177 5178 return this 5179 5180 def _parse_term(self) -> t.Optional[exp.Expression]: 5181 this = self._parse_factor() 5182 5183 while self._match_set(self.TERM): 5184 klass = self.TERM[self._prev.token_type] 5185 comments = self._prev_comments 5186 expression = self._parse_factor() 5187 5188 this = self.expression(klass, this=this, comments=comments, expression=expression) 5189 5190 if isinstance(this, exp.Collate): 5191 expr = this.expression 5192 5193 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5194 # fallback to Identifier / Var 5195 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5196 ident = expr.this 5197 if isinstance(ident, exp.Identifier): 5198 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5199 5200 return this 5201 5202 def _parse_factor(self) -> t.Optional[exp.Expression]: 5203 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5204 this = parse_method() 5205 5206 while self._match_set(self.FACTOR): 5207 klass = self.FACTOR[self._prev.token_type] 5208 comments = self._prev_comments 5209 expression = parse_method() 5210 5211 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5212 self._retreat(self._index - 1) 5213 return this 5214 5215 this = self.expression(klass, this=this, comments=comments, expression=expression) 5216 5217 if isinstance(this, exp.Div): 5218 this.args["typed"] = self.dialect.TYPED_DIVISION 5219 this.args["safe"] = self.dialect.SAFE_DIVISION 5220 5221 return this 5222 5223 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5224 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5225 5226 def _parse_unary(self) -> t.Optional[exp.Expression]: 5227 if self._match_set(self.UNARY_PARSERS): 5228 return self.UNARY_PARSERS[self._prev.token_type](self) 5229 return self._parse_at_time_zone(self._parse_type()) 5230 5231 def _parse_type( 5232 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5233 ) -> t.Optional[exp.Expression]: 5234 interval = parse_interval and self._parse_interval() 5235 if interval: 5236 return interval 5237 5238 index = self._index 5239 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5240 5241 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5242 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5243 if isinstance(data_type, exp.Cast): 5244 # This constructor can contain ops directly after it, for instance struct unnesting: 5245 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5246 return self._parse_column_ops(data_type) 5247 5248 if data_type: 5249 index2 = self._index 5250 this = self._parse_primary() 5251 5252 if isinstance(this, exp.Literal): 5253 literal = this.name 5254 this = self._parse_column_ops(this) 5255 5256 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5257 if parser: 5258 return parser(self, this, data_type) 5259 5260 if ( 5261 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5262 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5263 and TIME_ZONE_RE.search(literal) 5264 ): 5265 data_type = exp.DataType.build("TIMESTAMPTZ") 5266 5267 return self.expression(exp.Cast, this=this, to=data_type) 5268 5269 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5270 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5271 # 5272 # If the index difference here is greater than 1, that means the parser itself must have 5273 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5274 # 5275 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5276 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5277 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5278 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5279 # 5280 # In these cases, we don't really want to return the converted type, but instead retreat 5281 # and try to parse a Column or Identifier in the section below. 5282 if data_type.expressions and index2 - index > 1: 5283 self._retreat(index2) 5284 return self._parse_column_ops(data_type) 5285 5286 self._retreat(index) 5287 5288 if fallback_to_identifier: 5289 return self._parse_id_var() 5290 5291 this = self._parse_column() 5292 return this and self._parse_column_ops(this) 5293 5294 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5295 this = self._parse_type() 5296 if not this: 5297 return None 5298 5299 if isinstance(this, exp.Column) and not this.table: 5300 this = exp.var(this.name.upper()) 5301 5302 return self.expression( 5303 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5304 ) 5305 5306 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5307 type_name = identifier.name 5308 5309 while self._match(TokenType.DOT): 5310 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5311 5312 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5313 5314 def _parse_types( 5315 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5316 ) -> t.Optional[exp.Expression]: 5317 index = self._index 5318 5319 this: t.Optional[exp.Expression] = None 5320 prefix = self._match_text_seq("SYSUDTLIB", ".") 5321 5322 if self._match_set(self.TYPE_TOKENS): 5323 type_token = self._prev.token_type 5324 else: 5325 type_token = None 5326 identifier = allow_identifiers and self._parse_id_var( 5327 any_token=False, tokens=(TokenType.VAR,) 5328 ) 5329 if isinstance(identifier, exp.Identifier): 5330 try: 5331 tokens = self.dialect.tokenize(identifier.name) 5332 except TokenError: 5333 tokens = None 5334 5335 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5336 type_token = tokens[0].token_type 5337 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5338 this = self._parse_user_defined_type(identifier) 5339 else: 5340 self._retreat(self._index - 1) 5341 return None 5342 else: 5343 return None 5344 5345 if type_token == TokenType.PSEUDO_TYPE: 5346 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5347 5348 if type_token == TokenType.OBJECT_IDENTIFIER: 5349 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5350 5351 # https://materialize.com/docs/sql/types/map/ 5352 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5353 key_type = self._parse_types( 5354 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5355 ) 5356 if not self._match(TokenType.FARROW): 5357 self._retreat(index) 5358 return None 5359 5360 value_type = self._parse_types( 5361 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5362 ) 5363 if not self._match(TokenType.R_BRACKET): 5364 self._retreat(index) 5365 return None 5366 5367 return exp.DataType( 5368 this=exp.DataType.Type.MAP, 5369 expressions=[key_type, value_type], 5370 nested=True, 5371 prefix=prefix, 5372 ) 5373 5374 nested = type_token in self.NESTED_TYPE_TOKENS 5375 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5376 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5377 expressions = None 5378 maybe_func = False 5379 5380 if self._match(TokenType.L_PAREN): 5381 if is_struct: 5382 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5383 elif nested: 5384 expressions = self._parse_csv( 5385 lambda: self._parse_types( 5386 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5387 ) 5388 ) 5389 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5390 this = expressions[0] 5391 this.set("nullable", True) 5392 self._match_r_paren() 5393 return this 5394 elif type_token in self.ENUM_TYPE_TOKENS: 5395 expressions = self._parse_csv(self._parse_equality) 5396 elif is_aggregate: 5397 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5398 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5399 ) 5400 if not func_or_ident: 5401 return None 5402 expressions = [func_or_ident] 5403 if self._match(TokenType.COMMA): 5404 expressions.extend( 5405 self._parse_csv( 5406 lambda: self._parse_types( 5407 check_func=check_func, 5408 schema=schema, 5409 allow_identifiers=allow_identifiers, 5410 ) 5411 ) 5412 ) 5413 else: 5414 expressions = self._parse_csv(self._parse_type_size) 5415 5416 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5417 if type_token == TokenType.VECTOR and len(expressions) == 2: 5418 expressions = self._parse_vector_expressions(expressions) 5419 5420 if not self._match(TokenType.R_PAREN): 5421 self._retreat(index) 5422 return None 5423 5424 maybe_func = True 5425 5426 values: t.Optional[t.List[exp.Expression]] = None 5427 5428 if nested and self._match(TokenType.LT): 5429 if is_struct: 5430 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5431 else: 5432 expressions = self._parse_csv( 5433 lambda: self._parse_types( 5434 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5435 ) 5436 ) 5437 5438 if not self._match(TokenType.GT): 5439 self.raise_error("Expecting >") 5440 5441 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5442 values = self._parse_csv(self._parse_assignment) 5443 if not values and is_struct: 5444 values = None 5445 self._retreat(self._index - 1) 5446 else: 5447 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5448 5449 if type_token in self.TIMESTAMPS: 5450 if self._match_text_seq("WITH", "TIME", "ZONE"): 5451 maybe_func = False 5452 tz_type = ( 5453 exp.DataType.Type.TIMETZ 5454 if type_token in self.TIMES 5455 else exp.DataType.Type.TIMESTAMPTZ 5456 ) 5457 this = exp.DataType(this=tz_type, expressions=expressions) 5458 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5459 maybe_func = False 5460 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5461 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5462 maybe_func = False 5463 elif type_token == TokenType.INTERVAL: 5464 unit = self._parse_var(upper=True) 5465 if unit: 5466 if self._match_text_seq("TO"): 5467 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5468 5469 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5470 else: 5471 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5472 elif type_token == TokenType.VOID: 5473 this = exp.DataType(this=exp.DataType.Type.NULL) 5474 5475 if maybe_func and check_func: 5476 index2 = self._index 5477 peek = self._parse_string() 5478 5479 if not peek: 5480 self._retreat(index) 5481 return None 5482 5483 self._retreat(index2) 5484 5485 if not this: 5486 if self._match_text_seq("UNSIGNED"): 5487 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5488 if not unsigned_type_token: 5489 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5490 5491 type_token = unsigned_type_token or type_token 5492 5493 this = exp.DataType( 5494 this=exp.DataType.Type[type_token.value], 5495 expressions=expressions, 5496 nested=nested, 5497 prefix=prefix, 5498 ) 5499 5500 # Empty arrays/structs are allowed 5501 if values is not None: 5502 cls = exp.Struct if is_struct else exp.Array 5503 this = exp.cast(cls(expressions=values), this, copy=False) 5504 5505 elif expressions: 5506 this.set("expressions", expressions) 5507 5508 # https://materialize.com/docs/sql/types/list/#type-name 5509 while self._match(TokenType.LIST): 5510 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5511 5512 index = self._index 5513 5514 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5515 matched_array = self._match(TokenType.ARRAY) 5516 5517 while self._curr: 5518 datatype_token = self._prev.token_type 5519 matched_l_bracket = self._match(TokenType.L_BRACKET) 5520 5521 if (not matched_l_bracket and not matched_array) or ( 5522 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5523 ): 5524 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5525 # not to be confused with the fixed size array parsing 5526 break 5527 5528 matched_array = False 5529 values = self._parse_csv(self._parse_assignment) or None 5530 if ( 5531 values 5532 and not schema 5533 and ( 5534 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5535 ) 5536 ): 5537 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5538 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5539 self._retreat(index) 5540 break 5541 5542 this = exp.DataType( 5543 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5544 ) 5545 self._match(TokenType.R_BRACKET) 5546 5547 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5548 converter = self.TYPE_CONVERTERS.get(this.this) 5549 if converter: 5550 this = converter(t.cast(exp.DataType, this)) 5551 5552 return this 5553 5554 def _parse_vector_expressions( 5555 self, expressions: t.List[exp.Expression] 5556 ) -> t.List[exp.Expression]: 5557 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5558 5559 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5560 index = self._index 5561 5562 if ( 5563 self._curr 5564 and self._next 5565 and self._curr.token_type in self.TYPE_TOKENS 5566 and self._next.token_type in self.TYPE_TOKENS 5567 ): 5568 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5569 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5570 this = self._parse_id_var() 5571 else: 5572 this = ( 5573 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5574 or self._parse_id_var() 5575 ) 5576 5577 self._match(TokenType.COLON) 5578 5579 if ( 5580 type_required 5581 and not isinstance(this, exp.DataType) 5582 and not self._match_set(self.TYPE_TOKENS, advance=False) 5583 ): 5584 self._retreat(index) 5585 return self._parse_types() 5586 5587 return self._parse_column_def(this) 5588 5589 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5590 if not self._match_text_seq("AT", "TIME", "ZONE"): 5591 return this 5592 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5593 5594 def _parse_column(self) -> t.Optional[exp.Expression]: 5595 this = self._parse_column_reference() 5596 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5597 5598 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5599 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5600 5601 return column 5602 5603 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5604 this = self._parse_field() 5605 if ( 5606 not this 5607 and self._match(TokenType.VALUES, advance=False) 5608 and self.VALUES_FOLLOWED_BY_PAREN 5609 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5610 ): 5611 this = self._parse_id_var() 5612 5613 if isinstance(this, exp.Identifier): 5614 # We bubble up comments from the Identifier to the Column 5615 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5616 5617 return this 5618 5619 def _parse_colon_as_variant_extract( 5620 self, this: t.Optional[exp.Expression] 5621 ) -> t.Optional[exp.Expression]: 5622 casts = [] 5623 json_path = [] 5624 escape = None 5625 5626 while self._match(TokenType.COLON): 5627 start_index = self._index 5628 5629 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5630 path = self._parse_column_ops( 5631 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5632 ) 5633 5634 # The cast :: operator has a lower precedence than the extraction operator :, so 5635 # we rearrange the AST appropriately to avoid casting the JSON path 5636 while isinstance(path, exp.Cast): 5637 casts.append(path.to) 5638 path = path.this 5639 5640 if casts: 5641 dcolon_offset = next( 5642 i 5643 for i, t in enumerate(self._tokens[start_index:]) 5644 if t.token_type == TokenType.DCOLON 5645 ) 5646 end_token = self._tokens[start_index + dcolon_offset - 1] 5647 else: 5648 end_token = self._prev 5649 5650 if path: 5651 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5652 # it'll roundtrip to a string literal in GET_PATH 5653 if isinstance(path, exp.Identifier) and path.quoted: 5654 escape = True 5655 5656 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5657 5658 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5659 # Databricks transforms it back to the colon/dot notation 5660 if json_path: 5661 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5662 5663 if json_path_expr: 5664 json_path_expr.set("escape", escape) 5665 5666 this = self.expression( 5667 exp.JSONExtract, 5668 this=this, 5669 expression=json_path_expr, 5670 variant_extract=True, 5671 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5672 ) 5673 5674 while casts: 5675 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5676 5677 return this 5678 5679 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5680 return self._parse_types() 5681 5682 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5683 this = self._parse_bracket(this) 5684 5685 while self._match_set(self.COLUMN_OPERATORS): 5686 op_token = self._prev.token_type 5687 op = self.COLUMN_OPERATORS.get(op_token) 5688 5689 if op_token in self.CAST_COLUMN_OPERATORS: 5690 field = self._parse_dcolon() 5691 if not field: 5692 self.raise_error("Expected type") 5693 elif op and self._curr: 5694 field = self._parse_column_reference() or self._parse_bracket() 5695 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5696 field = self._parse_column_ops(field) 5697 else: 5698 field = self._parse_field(any_token=True, anonymous_func=True) 5699 5700 # Function calls can be qualified, e.g., x.y.FOO() 5701 # This converts the final AST to a series of Dots leading to the function call 5702 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5703 if isinstance(field, (exp.Func, exp.Window)) and this: 5704 this = this.transform( 5705 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5706 ) 5707 5708 if op: 5709 this = op(self, this, field) 5710 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5711 this = self.expression( 5712 exp.Column, 5713 comments=this.comments, 5714 this=field, 5715 table=this.this, 5716 db=this.args.get("table"), 5717 catalog=this.args.get("db"), 5718 ) 5719 elif isinstance(field, exp.Window): 5720 # Move the exp.Dot's to the window's function 5721 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5722 field.set("this", window_func) 5723 this = field 5724 else: 5725 this = self.expression(exp.Dot, this=this, expression=field) 5726 5727 if field and field.comments: 5728 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5729 5730 this = self._parse_bracket(this) 5731 5732 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5733 5734 def _parse_paren(self) -> t.Optional[exp.Expression]: 5735 if not self._match(TokenType.L_PAREN): 5736 return None 5737 5738 comments = self._prev_comments 5739 query = self._parse_select() 5740 5741 if query: 5742 expressions = [query] 5743 else: 5744 expressions = self._parse_expressions() 5745 5746 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5747 5748 if not this and self._match(TokenType.R_PAREN, advance=False): 5749 this = self.expression(exp.Tuple) 5750 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5751 this = self._parse_subquery(this=this, parse_alias=False) 5752 elif isinstance(this, exp.Subquery): 5753 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5754 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5755 this = self.expression(exp.Tuple, expressions=expressions) 5756 else: 5757 this = self.expression(exp.Paren, this=this) 5758 5759 if this: 5760 this.add_comments(comments) 5761 5762 self._match_r_paren(expression=this) 5763 return this 5764 5765 def _parse_primary(self) -> t.Optional[exp.Expression]: 5766 if self._match_set(self.PRIMARY_PARSERS): 5767 token_type = self._prev.token_type 5768 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5769 5770 if token_type == TokenType.STRING: 5771 expressions = [primary] 5772 while self._match(TokenType.STRING): 5773 expressions.append(exp.Literal.string(self._prev.text)) 5774 5775 if len(expressions) > 1: 5776 return self.expression(exp.Concat, expressions=expressions) 5777 5778 return primary 5779 5780 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5781 return exp.Literal.number(f"0.{self._prev.text}") 5782 5783 return self._parse_paren() 5784 5785 def _parse_field( 5786 self, 5787 any_token: bool = False, 5788 tokens: t.Optional[t.Collection[TokenType]] = None, 5789 anonymous_func: bool = False, 5790 ) -> t.Optional[exp.Expression]: 5791 if anonymous_func: 5792 field = ( 5793 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5794 or self._parse_primary() 5795 ) 5796 else: 5797 field = self._parse_primary() or self._parse_function( 5798 anonymous=anonymous_func, any_token=any_token 5799 ) 5800 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5801 5802 def _parse_function( 5803 self, 5804 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5805 anonymous: bool = False, 5806 optional_parens: bool = True, 5807 any_token: bool = False, 5808 ) -> t.Optional[exp.Expression]: 5809 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5810 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5811 fn_syntax = False 5812 if ( 5813 self._match(TokenType.L_BRACE, advance=False) 5814 and self._next 5815 and self._next.text.upper() == "FN" 5816 ): 5817 self._advance(2) 5818 fn_syntax = True 5819 5820 func = self._parse_function_call( 5821 functions=functions, 5822 anonymous=anonymous, 5823 optional_parens=optional_parens, 5824 any_token=any_token, 5825 ) 5826 5827 if fn_syntax: 5828 self._match(TokenType.R_BRACE) 5829 5830 return func 5831 5832 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5833 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5834 5835 def _parse_function_call( 5836 self, 5837 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5838 anonymous: bool = False, 5839 optional_parens: bool = True, 5840 any_token: bool = False, 5841 ) -> t.Optional[exp.Expression]: 5842 if not self._curr: 5843 return None 5844 5845 comments = self._curr.comments 5846 prev = self._prev 5847 token = self._curr 5848 token_type = self._curr.token_type 5849 this = self._curr.text 5850 upper = this.upper() 5851 5852 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5853 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5854 self._advance() 5855 return self._parse_window(parser(self)) 5856 5857 if not self._next or self._next.token_type != TokenType.L_PAREN: 5858 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5859 self._advance() 5860 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5861 5862 return None 5863 5864 if any_token: 5865 if token_type in self.RESERVED_TOKENS: 5866 return None 5867 elif token_type not in self.FUNC_TOKENS: 5868 return None 5869 5870 self._advance(2) 5871 5872 parser = self.FUNCTION_PARSERS.get(upper) 5873 if parser and not anonymous: 5874 this = parser(self) 5875 else: 5876 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5877 5878 if subquery_predicate: 5879 expr = None 5880 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5881 expr = self._parse_select() 5882 self._match_r_paren() 5883 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5884 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5885 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5886 self._advance(-1) 5887 expr = self._parse_bitwise() 5888 5889 if expr: 5890 return self.expression(subquery_predicate, comments=comments, this=expr) 5891 5892 if functions is None: 5893 functions = self.FUNCTIONS 5894 5895 function = functions.get(upper) 5896 known_function = function and not anonymous 5897 5898 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5899 args = self._parse_function_args(alias) 5900 5901 post_func_comments = self._curr and self._curr.comments 5902 if known_function and post_func_comments: 5903 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5904 # call we'll construct it as exp.Anonymous, even if it's "known" 5905 if any( 5906 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5907 for comment in post_func_comments 5908 ): 5909 known_function = False 5910 5911 if alias and known_function: 5912 args = self._kv_to_prop_eq(args) 5913 5914 if known_function: 5915 func_builder = t.cast(t.Callable, function) 5916 5917 if "dialect" in func_builder.__code__.co_varnames: 5918 func = func_builder(args, dialect=self.dialect) 5919 else: 5920 func = func_builder(args) 5921 5922 func = self.validate_expression(func, args) 5923 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5924 func.meta["name"] = this 5925 5926 this = func 5927 else: 5928 if token_type == TokenType.IDENTIFIER: 5929 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5930 5931 this = self.expression(exp.Anonymous, this=this, expressions=args) 5932 this = this.update_positions(token) 5933 5934 if isinstance(this, exp.Expression): 5935 this.add_comments(comments) 5936 5937 self._match_r_paren(this) 5938 return self._parse_window(this) 5939 5940 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5941 return expression 5942 5943 def _kv_to_prop_eq( 5944 self, expressions: t.List[exp.Expression], parse_map: bool = False 5945 ) -> t.List[exp.Expression]: 5946 transformed = [] 5947 5948 for index, e in enumerate(expressions): 5949 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5950 if isinstance(e, exp.Alias): 5951 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5952 5953 if not isinstance(e, exp.PropertyEQ): 5954 e = self.expression( 5955 exp.PropertyEQ, 5956 this=e.this if parse_map else exp.to_identifier(e.this.name), 5957 expression=e.expression, 5958 ) 5959 5960 if isinstance(e.this, exp.Column): 5961 e.this.replace(e.this.this) 5962 else: 5963 e = self._to_prop_eq(e, index) 5964 5965 transformed.append(e) 5966 5967 return transformed 5968 5969 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5970 return self._parse_statement() 5971 5972 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5973 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5974 5975 def _parse_user_defined_function( 5976 self, kind: t.Optional[TokenType] = None 5977 ) -> t.Optional[exp.Expression]: 5978 this = self._parse_table_parts(schema=True) 5979 5980 if not self._match(TokenType.L_PAREN): 5981 return this 5982 5983 expressions = self._parse_csv(self._parse_function_parameter) 5984 self._match_r_paren() 5985 return self.expression( 5986 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5987 ) 5988 5989 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5990 literal = self._parse_primary() 5991 if literal: 5992 return self.expression(exp.Introducer, this=token.text, expression=literal) 5993 5994 return self._identifier_expression(token) 5995 5996 def _parse_session_parameter(self) -> exp.SessionParameter: 5997 kind = None 5998 this = self._parse_id_var() or self._parse_primary() 5999 6000 if this and self._match(TokenType.DOT): 6001 kind = this.name 6002 this = self._parse_var() or self._parse_primary() 6003 6004 return self.expression(exp.SessionParameter, this=this, kind=kind) 6005 6006 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6007 return self._parse_id_var() 6008 6009 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6010 index = self._index 6011 6012 if self._match(TokenType.L_PAREN): 6013 expressions = t.cast( 6014 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6015 ) 6016 6017 if not self._match(TokenType.R_PAREN): 6018 self._retreat(index) 6019 else: 6020 expressions = [self._parse_lambda_arg()] 6021 6022 if self._match_set(self.LAMBDAS): 6023 return self.LAMBDAS[self._prev.token_type](self, expressions) 6024 6025 self._retreat(index) 6026 6027 this: t.Optional[exp.Expression] 6028 6029 if self._match(TokenType.DISTINCT): 6030 this = self.expression( 6031 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6032 ) 6033 else: 6034 this = self._parse_select_or_expression(alias=alias) 6035 6036 return self._parse_limit( 6037 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6038 ) 6039 6040 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6041 index = self._index 6042 if not self._match(TokenType.L_PAREN): 6043 return this 6044 6045 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6046 # expr can be of both types 6047 if self._match_set(self.SELECT_START_TOKENS): 6048 self._retreat(index) 6049 return this 6050 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6051 self._match_r_paren() 6052 return self.expression(exp.Schema, this=this, expressions=args) 6053 6054 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6055 return self._parse_column_def(self._parse_field(any_token=True)) 6056 6057 def _parse_column_def( 6058 self, this: t.Optional[exp.Expression], computed_column: bool = True 6059 ) -> t.Optional[exp.Expression]: 6060 # column defs are not really columns, they're identifiers 6061 if isinstance(this, exp.Column): 6062 this = this.this 6063 6064 if not computed_column: 6065 self._match(TokenType.ALIAS) 6066 6067 kind = self._parse_types(schema=True) 6068 6069 if self._match_text_seq("FOR", "ORDINALITY"): 6070 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6071 6072 constraints: t.List[exp.Expression] = [] 6073 6074 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6075 ("ALIAS", "MATERIALIZED") 6076 ): 6077 persisted = self._prev.text.upper() == "MATERIALIZED" 6078 constraint_kind = exp.ComputedColumnConstraint( 6079 this=self._parse_assignment(), 6080 persisted=persisted or self._match_text_seq("PERSISTED"), 6081 data_type=exp.Var(this="AUTO") 6082 if self._match_text_seq("AUTO") 6083 else self._parse_types(), 6084 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6085 ) 6086 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6087 elif ( 6088 kind 6089 and self._match(TokenType.ALIAS, advance=False) 6090 and ( 6091 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6092 or (self._next and self._next.token_type == TokenType.L_PAREN) 6093 ) 6094 ): 6095 self._advance() 6096 constraints.append( 6097 self.expression( 6098 exp.ColumnConstraint, 6099 kind=exp.ComputedColumnConstraint( 6100 this=self._parse_disjunction(), 6101 persisted=self._match_texts(("STORED", "VIRTUAL")) 6102 and self._prev.text.upper() == "STORED", 6103 ), 6104 ) 6105 ) 6106 6107 while True: 6108 constraint = self._parse_column_constraint() 6109 if not constraint: 6110 break 6111 constraints.append(constraint) 6112 6113 if not kind and not constraints: 6114 return this 6115 6116 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6117 6118 def _parse_auto_increment( 6119 self, 6120 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6121 start = None 6122 increment = None 6123 order = None 6124 6125 if self._match(TokenType.L_PAREN, advance=False): 6126 args = self._parse_wrapped_csv(self._parse_bitwise) 6127 start = seq_get(args, 0) 6128 increment = seq_get(args, 1) 6129 elif self._match_text_seq("START"): 6130 start = self._parse_bitwise() 6131 self._match_text_seq("INCREMENT") 6132 increment = self._parse_bitwise() 6133 if self._match_text_seq("ORDER"): 6134 order = True 6135 elif self._match_text_seq("NOORDER"): 6136 order = False 6137 6138 if start and increment: 6139 return exp.GeneratedAsIdentityColumnConstraint( 6140 start=start, increment=increment, this=False, order=order 6141 ) 6142 6143 return exp.AutoIncrementColumnConstraint() 6144 6145 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6146 if not self._match_text_seq("REFRESH"): 6147 self._retreat(self._index - 1) 6148 return None 6149 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6150 6151 def _parse_compress(self) -> exp.CompressColumnConstraint: 6152 if self._match(TokenType.L_PAREN, advance=False): 6153 return self.expression( 6154 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6155 ) 6156 6157 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6158 6159 def _parse_generated_as_identity( 6160 self, 6161 ) -> ( 6162 exp.GeneratedAsIdentityColumnConstraint 6163 | exp.ComputedColumnConstraint 6164 | exp.GeneratedAsRowColumnConstraint 6165 ): 6166 if self._match_text_seq("BY", "DEFAULT"): 6167 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6168 this = self.expression( 6169 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6170 ) 6171 else: 6172 self._match_text_seq("ALWAYS") 6173 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6174 6175 self._match(TokenType.ALIAS) 6176 6177 if self._match_text_seq("ROW"): 6178 start = self._match_text_seq("START") 6179 if not start: 6180 self._match(TokenType.END) 6181 hidden = self._match_text_seq("HIDDEN") 6182 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6183 6184 identity = self._match_text_seq("IDENTITY") 6185 6186 if self._match(TokenType.L_PAREN): 6187 if self._match(TokenType.START_WITH): 6188 this.set("start", self._parse_bitwise()) 6189 if self._match_text_seq("INCREMENT", "BY"): 6190 this.set("increment", self._parse_bitwise()) 6191 if self._match_text_seq("MINVALUE"): 6192 this.set("minvalue", self._parse_bitwise()) 6193 if self._match_text_seq("MAXVALUE"): 6194 this.set("maxvalue", self._parse_bitwise()) 6195 6196 if self._match_text_seq("CYCLE"): 6197 this.set("cycle", True) 6198 elif self._match_text_seq("NO", "CYCLE"): 6199 this.set("cycle", False) 6200 6201 if not identity: 6202 this.set("expression", self._parse_range()) 6203 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6204 args = self._parse_csv(self._parse_bitwise) 6205 this.set("start", seq_get(args, 0)) 6206 this.set("increment", seq_get(args, 1)) 6207 6208 self._match_r_paren() 6209 6210 return this 6211 6212 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6213 self._match_text_seq("LENGTH") 6214 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6215 6216 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6217 if self._match_text_seq("NULL"): 6218 return self.expression(exp.NotNullColumnConstraint) 6219 if self._match_text_seq("CASESPECIFIC"): 6220 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6221 if self._match_text_seq("FOR", "REPLICATION"): 6222 return self.expression(exp.NotForReplicationColumnConstraint) 6223 6224 # Unconsume the `NOT` token 6225 self._retreat(self._index - 1) 6226 return None 6227 6228 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6229 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6230 6231 procedure_option_follows = ( 6232 self._match(TokenType.WITH, advance=False) 6233 and self._next 6234 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6235 ) 6236 6237 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6238 return self.expression( 6239 exp.ColumnConstraint, 6240 this=this, 6241 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6242 ) 6243 6244 return this 6245 6246 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6247 if not self._match(TokenType.CONSTRAINT): 6248 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6249 6250 return self.expression( 6251 exp.Constraint, 6252 this=self._parse_id_var(), 6253 expressions=self._parse_unnamed_constraints(), 6254 ) 6255 6256 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6257 constraints = [] 6258 while True: 6259 constraint = self._parse_unnamed_constraint() or self._parse_function() 6260 if not constraint: 6261 break 6262 constraints.append(constraint) 6263 6264 return constraints 6265 6266 def _parse_unnamed_constraint( 6267 self, constraints: t.Optional[t.Collection[str]] = None 6268 ) -> t.Optional[exp.Expression]: 6269 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6270 constraints or self.CONSTRAINT_PARSERS 6271 ): 6272 return None 6273 6274 constraint = self._prev.text.upper() 6275 if constraint not in self.CONSTRAINT_PARSERS: 6276 self.raise_error(f"No parser found for schema constraint {constraint}.") 6277 6278 return self.CONSTRAINT_PARSERS[constraint](self) 6279 6280 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6281 return self._parse_id_var(any_token=False) 6282 6283 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6284 self._match_texts(("KEY", "INDEX")) 6285 return self.expression( 6286 exp.UniqueColumnConstraint, 6287 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6288 this=self._parse_schema(self._parse_unique_key()), 6289 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6290 on_conflict=self._parse_on_conflict(), 6291 options=self._parse_key_constraint_options(), 6292 ) 6293 6294 def _parse_key_constraint_options(self) -> t.List[str]: 6295 options = [] 6296 while True: 6297 if not self._curr: 6298 break 6299 6300 if self._match(TokenType.ON): 6301 action = None 6302 on = self._advance_any() and self._prev.text 6303 6304 if self._match_text_seq("NO", "ACTION"): 6305 action = "NO ACTION" 6306 elif self._match_text_seq("CASCADE"): 6307 action = "CASCADE" 6308 elif self._match_text_seq("RESTRICT"): 6309 action = "RESTRICT" 6310 elif self._match_pair(TokenType.SET, TokenType.NULL): 6311 action = "SET NULL" 6312 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6313 action = "SET DEFAULT" 6314 else: 6315 self.raise_error("Invalid key constraint") 6316 6317 options.append(f"ON {on} {action}") 6318 else: 6319 var = self._parse_var_from_options( 6320 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6321 ) 6322 if not var: 6323 break 6324 options.append(var.name) 6325 6326 return options 6327 6328 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6329 if match and not self._match(TokenType.REFERENCES): 6330 return None 6331 6332 expressions = None 6333 this = self._parse_table(schema=True) 6334 options = self._parse_key_constraint_options() 6335 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6336 6337 def _parse_foreign_key(self) -> exp.ForeignKey: 6338 expressions = ( 6339 self._parse_wrapped_id_vars() 6340 if not self._match(TokenType.REFERENCES, advance=False) 6341 else None 6342 ) 6343 reference = self._parse_references() 6344 on_options = {} 6345 6346 while self._match(TokenType.ON): 6347 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6348 self.raise_error("Expected DELETE or UPDATE") 6349 6350 kind = self._prev.text.lower() 6351 6352 if self._match_text_seq("NO", "ACTION"): 6353 action = "NO ACTION" 6354 elif self._match(TokenType.SET): 6355 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6356 action = "SET " + self._prev.text.upper() 6357 else: 6358 self._advance() 6359 action = self._prev.text.upper() 6360 6361 on_options[kind] = action 6362 6363 return self.expression( 6364 exp.ForeignKey, 6365 expressions=expressions, 6366 reference=reference, 6367 options=self._parse_key_constraint_options(), 6368 **on_options, # type: ignore 6369 ) 6370 6371 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6372 return self._parse_ordered() or self._parse_field() 6373 6374 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6375 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6376 self._retreat(self._index - 1) 6377 return None 6378 6379 id_vars = self._parse_wrapped_id_vars() 6380 return self.expression( 6381 exp.PeriodForSystemTimeConstraint, 6382 this=seq_get(id_vars, 0), 6383 expression=seq_get(id_vars, 1), 6384 ) 6385 6386 def _parse_primary_key( 6387 self, wrapped_optional: bool = False, in_props: bool = False 6388 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6389 desc = ( 6390 self._match_set((TokenType.ASC, TokenType.DESC)) 6391 and self._prev.token_type == TokenType.DESC 6392 ) 6393 6394 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6395 return self.expression( 6396 exp.PrimaryKeyColumnConstraint, 6397 desc=desc, 6398 options=self._parse_key_constraint_options(), 6399 ) 6400 6401 expressions = self._parse_wrapped_csv( 6402 self._parse_primary_key_part, optional=wrapped_optional 6403 ) 6404 6405 return self.expression( 6406 exp.PrimaryKey, 6407 expressions=expressions, 6408 include=self._parse_index_params(), 6409 options=self._parse_key_constraint_options(), 6410 ) 6411 6412 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6413 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6414 6415 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6416 """ 6417 Parses a datetime column in ODBC format. We parse the column into the corresponding 6418 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6419 same as we did for `DATE('yyyy-mm-dd')`. 6420 6421 Reference: 6422 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6423 """ 6424 self._match(TokenType.VAR) 6425 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6426 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6427 if not self._match(TokenType.R_BRACE): 6428 self.raise_error("Expected }") 6429 return expression 6430 6431 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6432 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6433 return this 6434 6435 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6436 map_token = seq_get(self._tokens, self._index - 2) 6437 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6438 else: 6439 parse_map = False 6440 6441 bracket_kind = self._prev.token_type 6442 if ( 6443 bracket_kind == TokenType.L_BRACE 6444 and self._curr 6445 and self._curr.token_type == TokenType.VAR 6446 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6447 ): 6448 return self._parse_odbc_datetime_literal() 6449 6450 expressions = self._parse_csv( 6451 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6452 ) 6453 6454 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6455 self.raise_error("Expected ]") 6456 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6457 self.raise_error("Expected }") 6458 6459 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6460 if bracket_kind == TokenType.L_BRACE: 6461 this = self.expression( 6462 exp.Struct, 6463 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6464 ) 6465 elif not this: 6466 this = build_array_constructor( 6467 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6468 ) 6469 else: 6470 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6471 if constructor_type: 6472 return build_array_constructor( 6473 constructor_type, 6474 args=expressions, 6475 bracket_kind=bracket_kind, 6476 dialect=self.dialect, 6477 ) 6478 6479 expressions = apply_index_offset( 6480 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6481 ) 6482 this = self.expression( 6483 exp.Bracket, 6484 this=this, 6485 expressions=expressions, 6486 comments=this.pop_comments(), 6487 ) 6488 6489 self._add_comments(this) 6490 return self._parse_bracket(this) 6491 6492 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6493 if self._match(TokenType.COLON): 6494 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6495 return this 6496 6497 def _parse_case(self) -> t.Optional[exp.Expression]: 6498 if self._match(TokenType.DOT, advance=False): 6499 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6500 self._retreat(self._index - 1) 6501 return None 6502 6503 ifs = [] 6504 default = None 6505 6506 comments = self._prev_comments 6507 expression = self._parse_assignment() 6508 6509 while self._match(TokenType.WHEN): 6510 this = self._parse_assignment() 6511 self._match(TokenType.THEN) 6512 then = self._parse_assignment() 6513 ifs.append(self.expression(exp.If, this=this, true=then)) 6514 6515 if self._match(TokenType.ELSE): 6516 default = self._parse_assignment() 6517 6518 if not self._match(TokenType.END): 6519 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6520 default = exp.column("interval") 6521 else: 6522 self.raise_error("Expected END after CASE", self._prev) 6523 6524 return self.expression( 6525 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6526 ) 6527 6528 def _parse_if(self) -> t.Optional[exp.Expression]: 6529 if self._match(TokenType.L_PAREN): 6530 args = self._parse_csv( 6531 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6532 ) 6533 this = self.validate_expression(exp.If.from_arg_list(args), args) 6534 self._match_r_paren() 6535 else: 6536 index = self._index - 1 6537 6538 if self.NO_PAREN_IF_COMMANDS and index == 0: 6539 return self._parse_as_command(self._prev) 6540 6541 condition = self._parse_assignment() 6542 6543 if not condition: 6544 self._retreat(index) 6545 return None 6546 6547 self._match(TokenType.THEN) 6548 true = self._parse_assignment() 6549 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6550 self._match(TokenType.END) 6551 this = self.expression(exp.If, this=condition, true=true, false=false) 6552 6553 return this 6554 6555 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6556 if not self._match_text_seq("VALUE", "FOR"): 6557 self._retreat(self._index - 1) 6558 return None 6559 6560 return self.expression( 6561 exp.NextValueFor, 6562 this=self._parse_column(), 6563 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6564 ) 6565 6566 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6567 this = self._parse_function() or self._parse_var_or_string(upper=True) 6568 6569 if self._match(TokenType.FROM): 6570 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6571 6572 if not self._match(TokenType.COMMA): 6573 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6574 6575 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6576 6577 def _parse_gap_fill(self) -> exp.GapFill: 6578 self._match(TokenType.TABLE) 6579 this = self._parse_table() 6580 6581 self._match(TokenType.COMMA) 6582 args = [this, *self._parse_csv(self._parse_lambda)] 6583 6584 gap_fill = exp.GapFill.from_arg_list(args) 6585 return self.validate_expression(gap_fill, args) 6586 6587 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6588 this = self._parse_assignment() 6589 6590 if not self._match(TokenType.ALIAS): 6591 if self._match(TokenType.COMMA): 6592 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6593 6594 self.raise_error("Expected AS after CAST") 6595 6596 fmt = None 6597 to = self._parse_types() 6598 6599 default = self._match(TokenType.DEFAULT) 6600 if default: 6601 default = self._parse_bitwise() 6602 self._match_text_seq("ON", "CONVERSION", "ERROR") 6603 6604 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6605 fmt_string = self._parse_string() 6606 fmt = self._parse_at_time_zone(fmt_string) 6607 6608 if not to: 6609 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6610 if to.this in exp.DataType.TEMPORAL_TYPES: 6611 this = self.expression( 6612 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6613 this=this, 6614 format=exp.Literal.string( 6615 format_time( 6616 fmt_string.this if fmt_string else "", 6617 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6618 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6619 ) 6620 ), 6621 safe=safe, 6622 ) 6623 6624 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6625 this.set("zone", fmt.args["zone"]) 6626 return this 6627 elif not to: 6628 self.raise_error("Expected TYPE after CAST") 6629 elif isinstance(to, exp.Identifier): 6630 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6631 elif to.this == exp.DataType.Type.CHAR: 6632 if self._match(TokenType.CHARACTER_SET): 6633 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6634 6635 return self.build_cast( 6636 strict=strict, 6637 this=this, 6638 to=to, 6639 format=fmt, 6640 safe=safe, 6641 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6642 default=default, 6643 ) 6644 6645 def _parse_string_agg(self) -> exp.GroupConcat: 6646 if self._match(TokenType.DISTINCT): 6647 args: t.List[t.Optional[exp.Expression]] = [ 6648 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6649 ] 6650 if self._match(TokenType.COMMA): 6651 args.extend(self._parse_csv(self._parse_assignment)) 6652 else: 6653 args = self._parse_csv(self._parse_assignment) # type: ignore 6654 6655 if self._match_text_seq("ON", "OVERFLOW"): 6656 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6657 if self._match_text_seq("ERROR"): 6658 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6659 else: 6660 self._match_text_seq("TRUNCATE") 6661 on_overflow = self.expression( 6662 exp.OverflowTruncateBehavior, 6663 this=self._parse_string(), 6664 with_count=( 6665 self._match_text_seq("WITH", "COUNT") 6666 or not self._match_text_seq("WITHOUT", "COUNT") 6667 ), 6668 ) 6669 else: 6670 on_overflow = None 6671 6672 index = self._index 6673 if not self._match(TokenType.R_PAREN) and args: 6674 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6675 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6676 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6677 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6678 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6679 6680 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6681 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6682 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6683 if not self._match_text_seq("WITHIN", "GROUP"): 6684 self._retreat(index) 6685 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6686 6687 # The corresponding match_r_paren will be called in parse_function (caller) 6688 self._match_l_paren() 6689 6690 return self.expression( 6691 exp.GroupConcat, 6692 this=self._parse_order(this=seq_get(args, 0)), 6693 separator=seq_get(args, 1), 6694 on_overflow=on_overflow, 6695 ) 6696 6697 def _parse_convert( 6698 self, strict: bool, safe: t.Optional[bool] = None 6699 ) -> t.Optional[exp.Expression]: 6700 this = self._parse_bitwise() 6701 6702 if self._match(TokenType.USING): 6703 to: t.Optional[exp.Expression] = self.expression( 6704 exp.CharacterSet, this=self._parse_var() 6705 ) 6706 elif self._match(TokenType.COMMA): 6707 to = self._parse_types() 6708 else: 6709 to = None 6710 6711 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6712 6713 def _parse_xml_table(self) -> exp.XMLTable: 6714 namespaces = None 6715 passing = None 6716 columns = None 6717 6718 if self._match_text_seq("XMLNAMESPACES", "("): 6719 namespaces = self._parse_xml_namespace() 6720 self._match_text_seq(")", ",") 6721 6722 this = self._parse_string() 6723 6724 if self._match_text_seq("PASSING"): 6725 # The BY VALUE keywords are optional and are provided for semantic clarity 6726 self._match_text_seq("BY", "VALUE") 6727 passing = self._parse_csv(self._parse_column) 6728 6729 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6730 6731 if self._match_text_seq("COLUMNS"): 6732 columns = self._parse_csv(self._parse_field_def) 6733 6734 return self.expression( 6735 exp.XMLTable, 6736 this=this, 6737 namespaces=namespaces, 6738 passing=passing, 6739 columns=columns, 6740 by_ref=by_ref, 6741 ) 6742 6743 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6744 namespaces = [] 6745 6746 while True: 6747 if self._match(TokenType.DEFAULT): 6748 uri = self._parse_string() 6749 else: 6750 uri = self._parse_alias(self._parse_string()) 6751 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6752 if not self._match(TokenType.COMMA): 6753 break 6754 6755 return namespaces 6756 6757 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6758 args = self._parse_csv(self._parse_assignment) 6759 6760 if len(args) < 3: 6761 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6762 6763 return self.expression(exp.DecodeCase, expressions=args) 6764 6765 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6766 self._match_text_seq("KEY") 6767 key = self._parse_column() 6768 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6769 self._match_text_seq("VALUE") 6770 value = self._parse_bitwise() 6771 6772 if not key and not value: 6773 return None 6774 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6775 6776 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6777 if not this or not self._match_text_seq("FORMAT", "JSON"): 6778 return this 6779 6780 return self.expression(exp.FormatJson, this=this) 6781 6782 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6783 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6784 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6785 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6786 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6787 else: 6788 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6789 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6790 6791 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6792 6793 if not empty and not error and not null: 6794 return None 6795 6796 return self.expression( 6797 exp.OnCondition, 6798 empty=empty, 6799 error=error, 6800 null=null, 6801 ) 6802 6803 def _parse_on_handling( 6804 self, on: str, *values: str 6805 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6806 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6807 for value in values: 6808 if self._match_text_seq(value, "ON", on): 6809 return f"{value} ON {on}" 6810 6811 index = self._index 6812 if self._match(TokenType.DEFAULT): 6813 default_value = self._parse_bitwise() 6814 if self._match_text_seq("ON", on): 6815 return default_value 6816 6817 self._retreat(index) 6818 6819 return None 6820 6821 @t.overload 6822 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6823 6824 @t.overload 6825 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6826 6827 def _parse_json_object(self, agg=False): 6828 star = self._parse_star() 6829 expressions = ( 6830 [star] 6831 if star 6832 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6833 ) 6834 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6835 6836 unique_keys = None 6837 if self._match_text_seq("WITH", "UNIQUE"): 6838 unique_keys = True 6839 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6840 unique_keys = False 6841 6842 self._match_text_seq("KEYS") 6843 6844 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6845 self._parse_type() 6846 ) 6847 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6848 6849 return self.expression( 6850 exp.JSONObjectAgg if agg else exp.JSONObject, 6851 expressions=expressions, 6852 null_handling=null_handling, 6853 unique_keys=unique_keys, 6854 return_type=return_type, 6855 encoding=encoding, 6856 ) 6857 6858 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6859 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6860 if not self._match_text_seq("NESTED"): 6861 this = self._parse_id_var() 6862 kind = self._parse_types(allow_identifiers=False) 6863 nested = None 6864 else: 6865 this = None 6866 kind = None 6867 nested = True 6868 6869 path = self._match_text_seq("PATH") and self._parse_string() 6870 nested_schema = nested and self._parse_json_schema() 6871 6872 return self.expression( 6873 exp.JSONColumnDef, 6874 this=this, 6875 kind=kind, 6876 path=path, 6877 nested_schema=nested_schema, 6878 ) 6879 6880 def _parse_json_schema(self) -> exp.JSONSchema: 6881 self._match_text_seq("COLUMNS") 6882 return self.expression( 6883 exp.JSONSchema, 6884 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6885 ) 6886 6887 def _parse_json_table(self) -> exp.JSONTable: 6888 this = self._parse_format_json(self._parse_bitwise()) 6889 path = self._match(TokenType.COMMA) and self._parse_string() 6890 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6891 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6892 schema = self._parse_json_schema() 6893 6894 return exp.JSONTable( 6895 this=this, 6896 schema=schema, 6897 path=path, 6898 error_handling=error_handling, 6899 empty_handling=empty_handling, 6900 ) 6901 6902 def _parse_match_against(self) -> exp.MatchAgainst: 6903 if self._match_text_seq("TABLE"): 6904 # parse SingleStore MATCH(TABLE ...) syntax 6905 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6906 expressions = [] 6907 table = self._parse_table() 6908 if table: 6909 expressions = [table] 6910 else: 6911 expressions = self._parse_csv(self._parse_column) 6912 6913 self._match_text_seq(")", "AGAINST", "(") 6914 6915 this = self._parse_string() 6916 6917 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6918 modifier = "IN NATURAL LANGUAGE MODE" 6919 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6920 modifier = f"{modifier} WITH QUERY EXPANSION" 6921 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6922 modifier = "IN BOOLEAN MODE" 6923 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6924 modifier = "WITH QUERY EXPANSION" 6925 else: 6926 modifier = None 6927 6928 return self.expression( 6929 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6930 ) 6931 6932 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6933 def _parse_open_json(self) -> exp.OpenJSON: 6934 this = self._parse_bitwise() 6935 path = self._match(TokenType.COMMA) and self._parse_string() 6936 6937 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6938 this = self._parse_field(any_token=True) 6939 kind = self._parse_types() 6940 path = self._parse_string() 6941 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6942 6943 return self.expression( 6944 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6945 ) 6946 6947 expressions = None 6948 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6949 self._match_l_paren() 6950 expressions = self._parse_csv(_parse_open_json_column_def) 6951 6952 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6953 6954 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6955 args = self._parse_csv(self._parse_bitwise) 6956 6957 if self._match(TokenType.IN): 6958 return self.expression( 6959 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6960 ) 6961 6962 if haystack_first: 6963 haystack = seq_get(args, 0) 6964 needle = seq_get(args, 1) 6965 else: 6966 haystack = seq_get(args, 1) 6967 needle = seq_get(args, 0) 6968 6969 return self.expression( 6970 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6971 ) 6972 6973 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6974 args = self._parse_csv(self._parse_table) 6975 return exp.JoinHint(this=func_name.upper(), expressions=args) 6976 6977 def _parse_substring(self) -> exp.Substring: 6978 # Postgres supports the form: substring(string [from int] [for int]) 6979 # (despite being undocumented, the reverse order also works) 6980 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6981 6982 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6983 6984 start, length = None, None 6985 6986 while self._curr: 6987 if self._match(TokenType.FROM): 6988 start = self._parse_bitwise() 6989 elif self._match(TokenType.FOR): 6990 if not start: 6991 start = exp.Literal.number(1) 6992 length = self._parse_bitwise() 6993 else: 6994 break 6995 6996 if start: 6997 args.append(start) 6998 if length: 6999 args.append(length) 7000 7001 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7002 7003 def _parse_trim(self) -> exp.Trim: 7004 # https://www.w3resource.com/sql/character-functions/trim.php 7005 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7006 7007 position = None 7008 collation = None 7009 expression = None 7010 7011 if self._match_texts(self.TRIM_TYPES): 7012 position = self._prev.text.upper() 7013 7014 this = self._parse_bitwise() 7015 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7016 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7017 expression = self._parse_bitwise() 7018 7019 if invert_order: 7020 this, expression = expression, this 7021 7022 if self._match(TokenType.COLLATE): 7023 collation = self._parse_bitwise() 7024 7025 return self.expression( 7026 exp.Trim, this=this, position=position, expression=expression, collation=collation 7027 ) 7028 7029 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7030 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7031 7032 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7033 return self._parse_window(self._parse_id_var(), alias=True) 7034 7035 def _parse_respect_or_ignore_nulls( 7036 self, this: t.Optional[exp.Expression] 7037 ) -> t.Optional[exp.Expression]: 7038 if self._match_text_seq("IGNORE", "NULLS"): 7039 return self.expression(exp.IgnoreNulls, this=this) 7040 if self._match_text_seq("RESPECT", "NULLS"): 7041 return self.expression(exp.RespectNulls, this=this) 7042 return this 7043 7044 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7045 if self._match(TokenType.HAVING): 7046 self._match_texts(("MAX", "MIN")) 7047 max = self._prev.text.upper() != "MIN" 7048 return self.expression( 7049 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7050 ) 7051 7052 return this 7053 7054 def _parse_window( 7055 self, this: t.Optional[exp.Expression], alias: bool = False 7056 ) -> t.Optional[exp.Expression]: 7057 func = this 7058 comments = func.comments if isinstance(func, exp.Expression) else None 7059 7060 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7061 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7062 if self._match_text_seq("WITHIN", "GROUP"): 7063 order = self._parse_wrapped(self._parse_order) 7064 this = self.expression(exp.WithinGroup, this=this, expression=order) 7065 7066 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7067 self._match(TokenType.WHERE) 7068 this = self.expression( 7069 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7070 ) 7071 self._match_r_paren() 7072 7073 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7074 # Some dialects choose to implement and some do not. 7075 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7076 7077 # There is some code above in _parse_lambda that handles 7078 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7079 7080 # The below changes handle 7081 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7082 7083 # Oracle allows both formats 7084 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7085 # and Snowflake chose to do the same for familiarity 7086 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7087 if isinstance(this, exp.AggFunc): 7088 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7089 7090 if ignore_respect and ignore_respect is not this: 7091 ignore_respect.replace(ignore_respect.this) 7092 this = self.expression(ignore_respect.__class__, this=this) 7093 7094 this = self._parse_respect_or_ignore_nulls(this) 7095 7096 # bigquery select from window x AS (partition by ...) 7097 if alias: 7098 over = None 7099 self._match(TokenType.ALIAS) 7100 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7101 return this 7102 else: 7103 over = self._prev.text.upper() 7104 7105 if comments and isinstance(func, exp.Expression): 7106 func.pop_comments() 7107 7108 if not self._match(TokenType.L_PAREN): 7109 return self.expression( 7110 exp.Window, 7111 comments=comments, 7112 this=this, 7113 alias=self._parse_id_var(False), 7114 over=over, 7115 ) 7116 7117 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7118 7119 first = self._match(TokenType.FIRST) 7120 if self._match_text_seq("LAST"): 7121 first = False 7122 7123 partition, order = self._parse_partition_and_order() 7124 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7125 7126 if kind: 7127 self._match(TokenType.BETWEEN) 7128 start = self._parse_window_spec() 7129 7130 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7131 exclude = ( 7132 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7133 if self._match_text_seq("EXCLUDE") 7134 else None 7135 ) 7136 7137 spec = self.expression( 7138 exp.WindowSpec, 7139 kind=kind, 7140 start=start["value"], 7141 start_side=start["side"], 7142 end=end.get("value"), 7143 end_side=end.get("side"), 7144 exclude=exclude, 7145 ) 7146 else: 7147 spec = None 7148 7149 self._match_r_paren() 7150 7151 window = self.expression( 7152 exp.Window, 7153 comments=comments, 7154 this=this, 7155 partition_by=partition, 7156 order=order, 7157 spec=spec, 7158 alias=window_alias, 7159 over=over, 7160 first=first, 7161 ) 7162 7163 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7164 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7165 return self._parse_window(window, alias=alias) 7166 7167 return window 7168 7169 def _parse_partition_and_order( 7170 self, 7171 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7172 return self._parse_partition_by(), self._parse_order() 7173 7174 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7175 self._match(TokenType.BETWEEN) 7176 7177 return { 7178 "value": ( 7179 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7180 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7181 or self._parse_type() 7182 ), 7183 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7184 } 7185 7186 def _parse_alias( 7187 self, this: t.Optional[exp.Expression], explicit: bool = False 7188 ) -> t.Optional[exp.Expression]: 7189 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7190 # so this section tries to parse the clause version and if it fails, it treats the token 7191 # as an identifier (alias) 7192 if self._can_parse_limit_or_offset(): 7193 return this 7194 7195 any_token = self._match(TokenType.ALIAS) 7196 comments = self._prev_comments or [] 7197 7198 if explicit and not any_token: 7199 return this 7200 7201 if self._match(TokenType.L_PAREN): 7202 aliases = self.expression( 7203 exp.Aliases, 7204 comments=comments, 7205 this=this, 7206 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7207 ) 7208 self._match_r_paren(aliases) 7209 return aliases 7210 7211 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7212 self.STRING_ALIASES and self._parse_string_as_identifier() 7213 ) 7214 7215 if alias: 7216 comments.extend(alias.pop_comments()) 7217 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7218 column = this.this 7219 7220 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7221 if not this.comments and column and column.comments: 7222 this.comments = column.pop_comments() 7223 7224 return this 7225 7226 def _parse_id_var( 7227 self, 7228 any_token: bool = True, 7229 tokens: t.Optional[t.Collection[TokenType]] = None, 7230 ) -> t.Optional[exp.Expression]: 7231 expression = self._parse_identifier() 7232 if not expression and ( 7233 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7234 ): 7235 quoted = self._prev.token_type == TokenType.STRING 7236 expression = self._identifier_expression(quoted=quoted) 7237 7238 return expression 7239 7240 def _parse_string(self) -> t.Optional[exp.Expression]: 7241 if self._match_set(self.STRING_PARSERS): 7242 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7243 return self._parse_placeholder() 7244 7245 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7246 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7247 if output: 7248 output.update_positions(self._prev) 7249 return output 7250 7251 def _parse_number(self) -> t.Optional[exp.Expression]: 7252 if self._match_set(self.NUMERIC_PARSERS): 7253 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7254 return self._parse_placeholder() 7255 7256 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7257 if self._match(TokenType.IDENTIFIER): 7258 return self._identifier_expression(quoted=True) 7259 return self._parse_placeholder() 7260 7261 def _parse_var( 7262 self, 7263 any_token: bool = False, 7264 tokens: t.Optional[t.Collection[TokenType]] = None, 7265 upper: bool = False, 7266 ) -> t.Optional[exp.Expression]: 7267 if ( 7268 (any_token and self._advance_any()) 7269 or self._match(TokenType.VAR) 7270 or (self._match_set(tokens) if tokens else False) 7271 ): 7272 return self.expression( 7273 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7274 ) 7275 return self._parse_placeholder() 7276 7277 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7278 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7279 self._advance() 7280 return self._prev 7281 return None 7282 7283 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7284 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7285 7286 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7287 return self._parse_primary() or self._parse_var(any_token=True) 7288 7289 def _parse_null(self) -> t.Optional[exp.Expression]: 7290 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7291 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7292 return self._parse_placeholder() 7293 7294 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7295 if self._match(TokenType.TRUE): 7296 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7297 if self._match(TokenType.FALSE): 7298 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7299 return self._parse_placeholder() 7300 7301 def _parse_star(self) -> t.Optional[exp.Expression]: 7302 if self._match(TokenType.STAR): 7303 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7304 return self._parse_placeholder() 7305 7306 def _parse_parameter(self) -> exp.Parameter: 7307 this = self._parse_identifier() or self._parse_primary_or_var() 7308 return self.expression(exp.Parameter, this=this) 7309 7310 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7311 if self._match_set(self.PLACEHOLDER_PARSERS): 7312 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7313 if placeholder: 7314 return placeholder 7315 self._advance(-1) 7316 return None 7317 7318 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7319 if not self._match_texts(keywords): 7320 return None 7321 if self._match(TokenType.L_PAREN, advance=False): 7322 return self._parse_wrapped_csv(self._parse_expression) 7323 7324 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7325 return [expression] if expression else None 7326 7327 def _parse_csv( 7328 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7329 ) -> t.List[exp.Expression]: 7330 parse_result = parse_method() 7331 items = [parse_result] if parse_result is not None else [] 7332 7333 while self._match(sep): 7334 self._add_comments(parse_result) 7335 parse_result = parse_method() 7336 if parse_result is not None: 7337 items.append(parse_result) 7338 7339 return items 7340 7341 def _parse_tokens( 7342 self, parse_method: t.Callable, expressions: t.Dict 7343 ) -> t.Optional[exp.Expression]: 7344 this = parse_method() 7345 7346 while self._match_set(expressions): 7347 this = self.expression( 7348 expressions[self._prev.token_type], 7349 this=this, 7350 comments=self._prev_comments, 7351 expression=parse_method(), 7352 ) 7353 7354 return this 7355 7356 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7357 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7358 7359 def _parse_wrapped_csv( 7360 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7361 ) -> t.List[exp.Expression]: 7362 return self._parse_wrapped( 7363 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7364 ) 7365 7366 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7367 wrapped = self._match(TokenType.L_PAREN) 7368 if not wrapped and not optional: 7369 self.raise_error("Expecting (") 7370 parse_result = parse_method() 7371 if wrapped: 7372 self._match_r_paren() 7373 return parse_result 7374 7375 def _parse_expressions(self) -> t.List[exp.Expression]: 7376 return self._parse_csv(self._parse_expression) 7377 7378 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7379 return ( 7380 self._parse_set_operations( 7381 self._parse_alias(self._parse_assignment(), explicit=True) 7382 if alias 7383 else self._parse_assignment() 7384 ) 7385 or self._parse_select() 7386 ) 7387 7388 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7389 return self._parse_query_modifiers( 7390 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7391 ) 7392 7393 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7394 this = None 7395 if self._match_texts(self.TRANSACTION_KIND): 7396 this = self._prev.text 7397 7398 self._match_texts(("TRANSACTION", "WORK")) 7399 7400 modes = [] 7401 while True: 7402 mode = [] 7403 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7404 mode.append(self._prev.text) 7405 7406 if mode: 7407 modes.append(" ".join(mode)) 7408 if not self._match(TokenType.COMMA): 7409 break 7410 7411 return self.expression(exp.Transaction, this=this, modes=modes) 7412 7413 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7414 chain = None 7415 savepoint = None 7416 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7417 7418 self._match_texts(("TRANSACTION", "WORK")) 7419 7420 if self._match_text_seq("TO"): 7421 self._match_text_seq("SAVEPOINT") 7422 savepoint = self._parse_id_var() 7423 7424 if self._match(TokenType.AND): 7425 chain = not self._match_text_seq("NO") 7426 self._match_text_seq("CHAIN") 7427 7428 if is_rollback: 7429 return self.expression(exp.Rollback, savepoint=savepoint) 7430 7431 return self.expression(exp.Commit, chain=chain) 7432 7433 def _parse_refresh(self) -> exp.Refresh: 7434 self._match(TokenType.TABLE) 7435 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7436 7437 def _parse_column_def_with_exists(self): 7438 start = self._index 7439 self._match(TokenType.COLUMN) 7440 7441 exists_column = self._parse_exists(not_=True) 7442 expression = self._parse_field_def() 7443 7444 if not isinstance(expression, exp.ColumnDef): 7445 self._retreat(start) 7446 return None 7447 7448 expression.set("exists", exists_column) 7449 7450 return expression 7451 7452 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7453 if not self._prev.text.upper() == "ADD": 7454 return None 7455 7456 expression = self._parse_column_def_with_exists() 7457 if not expression: 7458 return None 7459 7460 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7461 if self._match_texts(("FIRST", "AFTER")): 7462 position = self._prev.text 7463 column_position = self.expression( 7464 exp.ColumnPosition, this=self._parse_column(), position=position 7465 ) 7466 expression.set("position", column_position) 7467 7468 return expression 7469 7470 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7471 drop = self._match(TokenType.DROP) and self._parse_drop() 7472 if drop and not isinstance(drop, exp.Command): 7473 drop.set("kind", drop.args.get("kind", "COLUMN")) 7474 return drop 7475 7476 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7477 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7478 return self.expression( 7479 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7480 ) 7481 7482 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7483 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7484 self._match_text_seq("ADD") 7485 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7486 return self.expression( 7487 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7488 ) 7489 7490 column_def = self._parse_add_column() 7491 if isinstance(column_def, exp.ColumnDef): 7492 return column_def 7493 7494 exists = self._parse_exists(not_=True) 7495 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7496 return self.expression( 7497 exp.AddPartition, 7498 exists=exists, 7499 this=self._parse_field(any_token=True), 7500 location=self._match_text_seq("LOCATION", advance=False) 7501 and self._parse_property(), 7502 ) 7503 7504 return None 7505 7506 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7507 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7508 or self._match_text_seq("COLUMNS") 7509 ): 7510 schema = self._parse_schema() 7511 7512 return ( 7513 ensure_list(schema) 7514 if schema 7515 else self._parse_csv(self._parse_column_def_with_exists) 7516 ) 7517 7518 return self._parse_csv(_parse_add_alteration) 7519 7520 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7521 if self._match_texts(self.ALTER_ALTER_PARSERS): 7522 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7523 7524 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7525 # keyword after ALTER we default to parsing this statement 7526 self._match(TokenType.COLUMN) 7527 column = self._parse_field(any_token=True) 7528 7529 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7530 return self.expression(exp.AlterColumn, this=column, drop=True) 7531 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7532 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7533 if self._match(TokenType.COMMENT): 7534 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7535 if self._match_text_seq("DROP", "NOT", "NULL"): 7536 return self.expression( 7537 exp.AlterColumn, 7538 this=column, 7539 drop=True, 7540 allow_null=True, 7541 ) 7542 if self._match_text_seq("SET", "NOT", "NULL"): 7543 return self.expression( 7544 exp.AlterColumn, 7545 this=column, 7546 allow_null=False, 7547 ) 7548 7549 if self._match_text_seq("SET", "VISIBLE"): 7550 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7551 if self._match_text_seq("SET", "INVISIBLE"): 7552 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7553 7554 self._match_text_seq("SET", "DATA") 7555 self._match_text_seq("TYPE") 7556 return self.expression( 7557 exp.AlterColumn, 7558 this=column, 7559 dtype=self._parse_types(), 7560 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7561 using=self._match(TokenType.USING) and self._parse_assignment(), 7562 ) 7563 7564 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7565 if self._match_texts(("ALL", "EVEN", "AUTO")): 7566 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7567 7568 self._match_text_seq("KEY", "DISTKEY") 7569 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7570 7571 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7572 if compound: 7573 self._match_text_seq("SORTKEY") 7574 7575 if self._match(TokenType.L_PAREN, advance=False): 7576 return self.expression( 7577 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7578 ) 7579 7580 self._match_texts(("AUTO", "NONE")) 7581 return self.expression( 7582 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7583 ) 7584 7585 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7586 index = self._index - 1 7587 7588 partition_exists = self._parse_exists() 7589 if self._match(TokenType.PARTITION, advance=False): 7590 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7591 7592 self._retreat(index) 7593 return self._parse_csv(self._parse_drop_column) 7594 7595 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7596 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7597 exists = self._parse_exists() 7598 old_column = self._parse_column() 7599 to = self._match_text_seq("TO") 7600 new_column = self._parse_column() 7601 7602 if old_column is None or to is None or new_column is None: 7603 return None 7604 7605 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7606 7607 self._match_text_seq("TO") 7608 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7609 7610 def _parse_alter_table_set(self) -> exp.AlterSet: 7611 alter_set = self.expression(exp.AlterSet) 7612 7613 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7614 "TABLE", "PROPERTIES" 7615 ): 7616 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7617 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7618 alter_set.set("expressions", [self._parse_assignment()]) 7619 elif self._match_texts(("LOGGED", "UNLOGGED")): 7620 alter_set.set("option", exp.var(self._prev.text.upper())) 7621 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7622 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7623 elif self._match_text_seq("LOCATION"): 7624 alter_set.set("location", self._parse_field()) 7625 elif self._match_text_seq("ACCESS", "METHOD"): 7626 alter_set.set("access_method", self._parse_field()) 7627 elif self._match_text_seq("TABLESPACE"): 7628 alter_set.set("tablespace", self._parse_field()) 7629 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7630 alter_set.set("file_format", [self._parse_field()]) 7631 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7632 alter_set.set("file_format", self._parse_wrapped_options()) 7633 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7634 alter_set.set("copy_options", self._parse_wrapped_options()) 7635 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7636 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7637 else: 7638 if self._match_text_seq("SERDE"): 7639 alter_set.set("serde", self._parse_field()) 7640 7641 properties = self._parse_wrapped(self._parse_properties, optional=True) 7642 alter_set.set("expressions", [properties]) 7643 7644 return alter_set 7645 7646 def _parse_alter_session(self) -> exp.AlterSession: 7647 """Parse ALTER SESSION SET/UNSET statements.""" 7648 if self._match(TokenType.SET): 7649 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7650 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7651 7652 self._match_text_seq("UNSET") 7653 expressions = self._parse_csv( 7654 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7655 ) 7656 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7657 7658 def _parse_alter(self) -> exp.Alter | exp.Command: 7659 start = self._prev 7660 7661 alter_token = self._match_set(self.ALTERABLES) and self._prev 7662 if not alter_token: 7663 return self._parse_as_command(start) 7664 7665 exists = self._parse_exists() 7666 only = self._match_text_seq("ONLY") 7667 7668 if alter_token.token_type == TokenType.SESSION: 7669 this = None 7670 check = None 7671 cluster = None 7672 else: 7673 this = self._parse_table(schema=True) 7674 check = self._match_text_seq("WITH", "CHECK") 7675 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7676 7677 if self._next: 7678 self._advance() 7679 7680 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7681 if parser: 7682 actions = ensure_list(parser(self)) 7683 not_valid = self._match_text_seq("NOT", "VALID") 7684 options = self._parse_csv(self._parse_property) 7685 7686 if not self._curr and actions: 7687 return self.expression( 7688 exp.Alter, 7689 this=this, 7690 kind=alter_token.text.upper(), 7691 exists=exists, 7692 actions=actions, 7693 only=only, 7694 options=options, 7695 cluster=cluster, 7696 not_valid=not_valid, 7697 check=check, 7698 ) 7699 7700 return self._parse_as_command(start) 7701 7702 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7703 start = self._prev 7704 # https://duckdb.org/docs/sql/statements/analyze 7705 if not self._curr: 7706 return self.expression(exp.Analyze) 7707 7708 options = [] 7709 while self._match_texts(self.ANALYZE_STYLES): 7710 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7711 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7712 else: 7713 options.append(self._prev.text.upper()) 7714 7715 this: t.Optional[exp.Expression] = None 7716 inner_expression: t.Optional[exp.Expression] = None 7717 7718 kind = self._curr and self._curr.text.upper() 7719 7720 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7721 this = self._parse_table_parts() 7722 elif self._match_text_seq("TABLES"): 7723 if self._match_set((TokenType.FROM, TokenType.IN)): 7724 kind = f"{kind} {self._prev.text.upper()}" 7725 this = self._parse_table(schema=True, is_db_reference=True) 7726 elif self._match_text_seq("DATABASE"): 7727 this = self._parse_table(schema=True, is_db_reference=True) 7728 elif self._match_text_seq("CLUSTER"): 7729 this = self._parse_table() 7730 # Try matching inner expr keywords before fallback to parse table. 7731 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7732 kind = None 7733 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7734 else: 7735 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7736 kind = None 7737 this = self._parse_table_parts() 7738 7739 partition = self._try_parse(self._parse_partition) 7740 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7741 return self._parse_as_command(start) 7742 7743 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7744 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7745 "WITH", "ASYNC", "MODE" 7746 ): 7747 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7748 else: 7749 mode = None 7750 7751 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7752 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7753 7754 properties = self._parse_properties() 7755 return self.expression( 7756 exp.Analyze, 7757 kind=kind, 7758 this=this, 7759 mode=mode, 7760 partition=partition, 7761 properties=properties, 7762 expression=inner_expression, 7763 options=options, 7764 ) 7765 7766 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7767 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7768 this = None 7769 kind = self._prev.text.upper() 7770 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7771 expressions = [] 7772 7773 if not self._match_text_seq("STATISTICS"): 7774 self.raise_error("Expecting token STATISTICS") 7775 7776 if self._match_text_seq("NOSCAN"): 7777 this = "NOSCAN" 7778 elif self._match(TokenType.FOR): 7779 if self._match_text_seq("ALL", "COLUMNS"): 7780 this = "FOR ALL COLUMNS" 7781 if self._match_texts("COLUMNS"): 7782 this = "FOR COLUMNS" 7783 expressions = self._parse_csv(self._parse_column_reference) 7784 elif self._match_text_seq("SAMPLE"): 7785 sample = self._parse_number() 7786 expressions = [ 7787 self.expression( 7788 exp.AnalyzeSample, 7789 sample=sample, 7790 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7791 ) 7792 ] 7793 7794 return self.expression( 7795 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7796 ) 7797 7798 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7799 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7800 kind = None 7801 this = None 7802 expression: t.Optional[exp.Expression] = None 7803 if self._match_text_seq("REF", "UPDATE"): 7804 kind = "REF" 7805 this = "UPDATE" 7806 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7807 this = "UPDATE SET DANGLING TO NULL" 7808 elif self._match_text_seq("STRUCTURE"): 7809 kind = "STRUCTURE" 7810 if self._match_text_seq("CASCADE", "FAST"): 7811 this = "CASCADE FAST" 7812 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7813 ("ONLINE", "OFFLINE") 7814 ): 7815 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7816 expression = self._parse_into() 7817 7818 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7819 7820 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7821 this = self._prev.text.upper() 7822 if self._match_text_seq("COLUMNS"): 7823 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7824 return None 7825 7826 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7827 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7828 if self._match_text_seq("STATISTICS"): 7829 return self.expression(exp.AnalyzeDelete, kind=kind) 7830 return None 7831 7832 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7833 if self._match_text_seq("CHAINED", "ROWS"): 7834 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7835 return None 7836 7837 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7838 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7839 this = self._prev.text.upper() 7840 expression: t.Optional[exp.Expression] = None 7841 expressions = [] 7842 update_options = None 7843 7844 if self._match_text_seq("HISTOGRAM", "ON"): 7845 expressions = self._parse_csv(self._parse_column_reference) 7846 with_expressions = [] 7847 while self._match(TokenType.WITH): 7848 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7849 if self._match_texts(("SYNC", "ASYNC")): 7850 if self._match_text_seq("MODE", advance=False): 7851 with_expressions.append(f"{self._prev.text.upper()} MODE") 7852 self._advance() 7853 else: 7854 buckets = self._parse_number() 7855 if self._match_text_seq("BUCKETS"): 7856 with_expressions.append(f"{buckets} BUCKETS") 7857 if with_expressions: 7858 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7859 7860 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7861 TokenType.UPDATE, advance=False 7862 ): 7863 update_options = self._prev.text.upper() 7864 self._advance() 7865 elif self._match_text_seq("USING", "DATA"): 7866 expression = self.expression(exp.UsingData, this=self._parse_string()) 7867 7868 return self.expression( 7869 exp.AnalyzeHistogram, 7870 this=this, 7871 expressions=expressions, 7872 expression=expression, 7873 update_options=update_options, 7874 ) 7875 7876 def _parse_merge(self) -> exp.Merge: 7877 self._match(TokenType.INTO) 7878 target = self._parse_table() 7879 7880 if target and self._match(TokenType.ALIAS, advance=False): 7881 target.set("alias", self._parse_table_alias()) 7882 7883 self._match(TokenType.USING) 7884 using = self._parse_table() 7885 7886 self._match(TokenType.ON) 7887 on = self._parse_assignment() 7888 7889 return self.expression( 7890 exp.Merge, 7891 this=target, 7892 using=using, 7893 on=on, 7894 whens=self._parse_when_matched(), 7895 returning=self._parse_returning(), 7896 ) 7897 7898 def _parse_when_matched(self) -> exp.Whens: 7899 whens = [] 7900 7901 while self._match(TokenType.WHEN): 7902 matched = not self._match(TokenType.NOT) 7903 self._match_text_seq("MATCHED") 7904 source = ( 7905 False 7906 if self._match_text_seq("BY", "TARGET") 7907 else self._match_text_seq("BY", "SOURCE") 7908 ) 7909 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7910 7911 self._match(TokenType.THEN) 7912 7913 if self._match(TokenType.INSERT): 7914 this = self._parse_star() 7915 if this: 7916 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7917 else: 7918 then = self.expression( 7919 exp.Insert, 7920 this=exp.var("ROW") 7921 if self._match_text_seq("ROW") 7922 else self._parse_value(values=False), 7923 expression=self._match_text_seq("VALUES") and self._parse_value(), 7924 ) 7925 elif self._match(TokenType.UPDATE): 7926 expressions = self._parse_star() 7927 if expressions: 7928 then = self.expression(exp.Update, expressions=expressions) 7929 else: 7930 then = self.expression( 7931 exp.Update, 7932 expressions=self._match(TokenType.SET) 7933 and self._parse_csv(self._parse_equality), 7934 ) 7935 elif self._match(TokenType.DELETE): 7936 then = self.expression(exp.Var, this=self._prev.text) 7937 else: 7938 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7939 7940 whens.append( 7941 self.expression( 7942 exp.When, 7943 matched=matched, 7944 source=source, 7945 condition=condition, 7946 then=then, 7947 ) 7948 ) 7949 return self.expression(exp.Whens, expressions=whens) 7950 7951 def _parse_show(self) -> t.Optional[exp.Expression]: 7952 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7953 if parser: 7954 return parser(self) 7955 return self._parse_as_command(self._prev) 7956 7957 def _parse_set_item_assignment( 7958 self, kind: t.Optional[str] = None 7959 ) -> t.Optional[exp.Expression]: 7960 index = self._index 7961 7962 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7963 return self._parse_set_transaction(global_=kind == "GLOBAL") 7964 7965 left = self._parse_primary() or self._parse_column() 7966 assignment_delimiter = self._match_texts(("=", "TO")) 7967 7968 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7969 self._retreat(index) 7970 return None 7971 7972 right = self._parse_statement() or self._parse_id_var() 7973 if isinstance(right, (exp.Column, exp.Identifier)): 7974 right = exp.var(right.name) 7975 7976 this = self.expression(exp.EQ, this=left, expression=right) 7977 return self.expression(exp.SetItem, this=this, kind=kind) 7978 7979 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7980 self._match_text_seq("TRANSACTION") 7981 characteristics = self._parse_csv( 7982 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7983 ) 7984 return self.expression( 7985 exp.SetItem, 7986 expressions=characteristics, 7987 kind="TRANSACTION", 7988 **{"global": global_}, # type: ignore 7989 ) 7990 7991 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7992 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7993 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7994 7995 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7996 index = self._index 7997 set_ = self.expression( 7998 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7999 ) 8000 8001 if self._curr: 8002 self._retreat(index) 8003 return self._parse_as_command(self._prev) 8004 8005 return set_ 8006 8007 def _parse_var_from_options( 8008 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8009 ) -> t.Optional[exp.Var]: 8010 start = self._curr 8011 if not start: 8012 return None 8013 8014 option = start.text.upper() 8015 continuations = options.get(option) 8016 8017 index = self._index 8018 self._advance() 8019 for keywords in continuations or []: 8020 if isinstance(keywords, str): 8021 keywords = (keywords,) 8022 8023 if self._match_text_seq(*keywords): 8024 option = f"{option} {' '.join(keywords)}" 8025 break 8026 else: 8027 if continuations or continuations is None: 8028 if raise_unmatched: 8029 self.raise_error(f"Unknown option {option}") 8030 8031 self._retreat(index) 8032 return None 8033 8034 return exp.var(option) 8035 8036 def _parse_as_command(self, start: Token) -> exp.Command: 8037 while self._curr: 8038 self._advance() 8039 text = self._find_sql(start, self._prev) 8040 size = len(start.text) 8041 self._warn_unsupported() 8042 return exp.Command(this=text[:size], expression=text[size:]) 8043 8044 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8045 settings = [] 8046 8047 self._match_l_paren() 8048 kind = self._parse_id_var() 8049 8050 if self._match(TokenType.L_PAREN): 8051 while True: 8052 key = self._parse_id_var() 8053 value = self._parse_primary() 8054 if not key and value is None: 8055 break 8056 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8057 self._match(TokenType.R_PAREN) 8058 8059 self._match_r_paren() 8060 8061 return self.expression( 8062 exp.DictProperty, 8063 this=this, 8064 kind=kind.this if kind else None, 8065 settings=settings, 8066 ) 8067 8068 def _parse_dict_range(self, this: str) -> exp.DictRange: 8069 self._match_l_paren() 8070 has_min = self._match_text_seq("MIN") 8071 if has_min: 8072 min = self._parse_var() or self._parse_primary() 8073 self._match_text_seq("MAX") 8074 max = self._parse_var() or self._parse_primary() 8075 else: 8076 max = self._parse_var() or self._parse_primary() 8077 min = exp.Literal.number(0) 8078 self._match_r_paren() 8079 return self.expression(exp.DictRange, this=this, min=min, max=max) 8080 8081 def _parse_comprehension( 8082 self, this: t.Optional[exp.Expression] 8083 ) -> t.Optional[exp.Comprehension]: 8084 index = self._index 8085 expression = self._parse_column() 8086 if not self._match(TokenType.IN): 8087 self._retreat(index - 1) 8088 return None 8089 iterator = self._parse_column() 8090 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8091 return self.expression( 8092 exp.Comprehension, 8093 this=this, 8094 expression=expression, 8095 iterator=iterator, 8096 condition=condition, 8097 ) 8098 8099 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8100 if self._match(TokenType.HEREDOC_STRING): 8101 return self.expression(exp.Heredoc, this=self._prev.text) 8102 8103 if not self._match_text_seq("$"): 8104 return None 8105 8106 tags = ["$"] 8107 tag_text = None 8108 8109 if self._is_connected(): 8110 self._advance() 8111 tags.append(self._prev.text.upper()) 8112 else: 8113 self.raise_error("No closing $ found") 8114 8115 if tags[-1] != "$": 8116 if self._is_connected() and self._match_text_seq("$"): 8117 tag_text = tags[-1] 8118 tags.append("$") 8119 else: 8120 self.raise_error("No closing $ found") 8121 8122 heredoc_start = self._curr 8123 8124 while self._curr: 8125 if self._match_text_seq(*tags, advance=False): 8126 this = self._find_sql(heredoc_start, self._prev) 8127 self._advance(len(tags)) 8128 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8129 8130 self._advance() 8131 8132 self.raise_error(f"No closing {''.join(tags)} found") 8133 return None 8134 8135 def _find_parser( 8136 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8137 ) -> t.Optional[t.Callable]: 8138 if not self._curr: 8139 return None 8140 8141 index = self._index 8142 this = [] 8143 while True: 8144 # The current token might be multiple words 8145 curr = self._curr.text.upper() 8146 key = curr.split(" ") 8147 this.append(curr) 8148 8149 self._advance() 8150 result, trie = in_trie(trie, key) 8151 if result == TrieResult.FAILED: 8152 break 8153 8154 if result == TrieResult.EXISTS: 8155 subparser = parsers[" ".join(this)] 8156 return subparser 8157 8158 self._retreat(index) 8159 return None 8160 8161 def _match(self, token_type, advance=True, expression=None): 8162 if not self._curr: 8163 return None 8164 8165 if self._curr.token_type == token_type: 8166 if advance: 8167 self._advance() 8168 self._add_comments(expression) 8169 return True 8170 8171 return None 8172 8173 def _match_set(self, types, advance=True): 8174 if not self._curr: 8175 return None 8176 8177 if self._curr.token_type in types: 8178 if advance: 8179 self._advance() 8180 return True 8181 8182 return None 8183 8184 def _match_pair(self, token_type_a, token_type_b, advance=True): 8185 if not self._curr or not self._next: 8186 return None 8187 8188 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8189 if advance: 8190 self._advance(2) 8191 return True 8192 8193 return None 8194 8195 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8196 if not self._match(TokenType.L_PAREN, expression=expression): 8197 self.raise_error("Expecting (") 8198 8199 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8200 if not self._match(TokenType.R_PAREN, expression=expression): 8201 self.raise_error("Expecting )") 8202 8203 def _match_texts(self, texts, advance=True): 8204 if ( 8205 self._curr 8206 and self._curr.token_type != TokenType.STRING 8207 and self._curr.text.upper() in texts 8208 ): 8209 if advance: 8210 self._advance() 8211 return True 8212 return None 8213 8214 def _match_text_seq(self, *texts, advance=True): 8215 index = self._index 8216 for text in texts: 8217 if ( 8218 self._curr 8219 and self._curr.token_type != TokenType.STRING 8220 and self._curr.text.upper() == text 8221 ): 8222 self._advance() 8223 else: 8224 self._retreat(index) 8225 return None 8226 8227 if not advance: 8228 self._retreat(index) 8229 8230 return True 8231 8232 def _replace_lambda( 8233 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8234 ) -> t.Optional[exp.Expression]: 8235 if not node: 8236 return node 8237 8238 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8239 8240 for column in node.find_all(exp.Column): 8241 typ = lambda_types.get(column.parts[0].name) 8242 if typ is not None: 8243 dot_or_id = column.to_dot() if column.table else column.this 8244 8245 if typ: 8246 dot_or_id = self.expression( 8247 exp.Cast, 8248 this=dot_or_id, 8249 to=typ, 8250 ) 8251 8252 parent = column.parent 8253 8254 while isinstance(parent, exp.Dot): 8255 if not isinstance(parent.parent, exp.Dot): 8256 parent.replace(dot_or_id) 8257 break 8258 parent = parent.parent 8259 else: 8260 if column is node: 8261 node = dot_or_id 8262 else: 8263 column.replace(dot_or_id) 8264 return node 8265 8266 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8267 start = self._prev 8268 8269 # Not to be confused with TRUNCATE(number, decimals) function call 8270 if self._match(TokenType.L_PAREN): 8271 self._retreat(self._index - 2) 8272 return self._parse_function() 8273 8274 # Clickhouse supports TRUNCATE DATABASE as well 8275 is_database = self._match(TokenType.DATABASE) 8276 8277 self._match(TokenType.TABLE) 8278 8279 exists = self._parse_exists(not_=False) 8280 8281 expressions = self._parse_csv( 8282 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8283 ) 8284 8285 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8286 8287 if self._match_text_seq("RESTART", "IDENTITY"): 8288 identity = "RESTART" 8289 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8290 identity = "CONTINUE" 8291 else: 8292 identity = None 8293 8294 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8295 option = self._prev.text 8296 else: 8297 option = None 8298 8299 partition = self._parse_partition() 8300 8301 # Fallback case 8302 if self._curr: 8303 return self._parse_as_command(start) 8304 8305 return self.expression( 8306 exp.TruncateTable, 8307 expressions=expressions, 8308 is_database=is_database, 8309 exists=exists, 8310 cluster=cluster, 8311 identity=identity, 8312 option=option, 8313 partition=partition, 8314 ) 8315 8316 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8317 this = self._parse_ordered(self._parse_opclass) 8318 8319 if not self._match(TokenType.WITH): 8320 return this 8321 8322 op = self._parse_var(any_token=True) 8323 8324 return self.expression(exp.WithOperator, this=this, op=op) 8325 8326 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8327 self._match(TokenType.EQ) 8328 self._match(TokenType.L_PAREN) 8329 8330 opts: t.List[t.Optional[exp.Expression]] = [] 8331 option: exp.Expression | None 8332 while self._curr and not self._match(TokenType.R_PAREN): 8333 if self._match_text_seq("FORMAT_NAME", "="): 8334 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8335 option = self._parse_format_name() 8336 else: 8337 option = self._parse_property() 8338 8339 if option is None: 8340 self.raise_error("Unable to parse option") 8341 break 8342 8343 opts.append(option) 8344 8345 return opts 8346 8347 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8348 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8349 8350 options = [] 8351 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8352 option = self._parse_var(any_token=True) 8353 prev = self._prev.text.upper() 8354 8355 # Different dialects might separate options and values by white space, "=" and "AS" 8356 self._match(TokenType.EQ) 8357 self._match(TokenType.ALIAS) 8358 8359 param = self.expression(exp.CopyParameter, this=option) 8360 8361 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8362 TokenType.L_PAREN, advance=False 8363 ): 8364 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8365 param.set("expressions", self._parse_wrapped_options()) 8366 elif prev == "FILE_FORMAT": 8367 # T-SQL's external file format case 8368 param.set("expression", self._parse_field()) 8369 else: 8370 param.set("expression", self._parse_unquoted_field()) 8371 8372 options.append(param) 8373 self._match(sep) 8374 8375 return options 8376 8377 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8378 expr = self.expression(exp.Credentials) 8379 8380 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8381 expr.set("storage", self._parse_field()) 8382 if self._match_text_seq("CREDENTIALS"): 8383 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8384 creds = ( 8385 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8386 ) 8387 expr.set("credentials", creds) 8388 if self._match_text_seq("ENCRYPTION"): 8389 expr.set("encryption", self._parse_wrapped_options()) 8390 if self._match_text_seq("IAM_ROLE"): 8391 expr.set("iam_role", self._parse_field()) 8392 if self._match_text_seq("REGION"): 8393 expr.set("region", self._parse_field()) 8394 8395 return expr 8396 8397 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8398 return self._parse_field() 8399 8400 def _parse_copy(self) -> exp.Copy | exp.Command: 8401 start = self._prev 8402 8403 self._match(TokenType.INTO) 8404 8405 this = ( 8406 self._parse_select(nested=True, parse_subquery_alias=False) 8407 if self._match(TokenType.L_PAREN, advance=False) 8408 else self._parse_table(schema=True) 8409 ) 8410 8411 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8412 8413 files = self._parse_csv(self._parse_file_location) 8414 if self._match(TokenType.EQ, advance=False): 8415 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8416 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8417 # list via `_parse_wrapped(..)` below. 8418 self._advance(-1) 8419 files = [] 8420 8421 credentials = self._parse_credentials() 8422 8423 self._match_text_seq("WITH") 8424 8425 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8426 8427 # Fallback case 8428 if self._curr: 8429 return self._parse_as_command(start) 8430 8431 return self.expression( 8432 exp.Copy, 8433 this=this, 8434 kind=kind, 8435 credentials=credentials, 8436 files=files, 8437 params=params, 8438 ) 8439 8440 def _parse_normalize(self) -> exp.Normalize: 8441 return self.expression( 8442 exp.Normalize, 8443 this=self._parse_bitwise(), 8444 form=self._match(TokenType.COMMA) and self._parse_var(), 8445 ) 8446 8447 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8448 args = self._parse_csv(lambda: self._parse_lambda()) 8449 8450 this = seq_get(args, 0) 8451 decimals = seq_get(args, 1) 8452 8453 return expr_type( 8454 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8455 ) 8456 8457 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8458 star_token = self._prev 8459 8460 if self._match_text_seq("COLUMNS", "(", advance=False): 8461 this = self._parse_function() 8462 if isinstance(this, exp.Columns): 8463 this.set("unpack", True) 8464 return this 8465 8466 return self.expression( 8467 exp.Star, 8468 **{ # type: ignore 8469 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8470 "replace": self._parse_star_op("REPLACE"), 8471 "rename": self._parse_star_op("RENAME"), 8472 }, 8473 ).update_positions(star_token) 8474 8475 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8476 privilege_parts = [] 8477 8478 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8479 # (end of privilege list) or L_PAREN (start of column list) are met 8480 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8481 privilege_parts.append(self._curr.text.upper()) 8482 self._advance() 8483 8484 this = exp.var(" ".join(privilege_parts)) 8485 expressions = ( 8486 self._parse_wrapped_csv(self._parse_column) 8487 if self._match(TokenType.L_PAREN, advance=False) 8488 else None 8489 ) 8490 8491 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8492 8493 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8494 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8495 principal = self._parse_id_var() 8496 8497 if not principal: 8498 return None 8499 8500 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8501 8502 def _parse_grant_revoke_common( 8503 self, 8504 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8505 privileges = self._parse_csv(self._parse_grant_privilege) 8506 8507 self._match(TokenType.ON) 8508 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8509 8510 # Attempt to parse the securable e.g. MySQL allows names 8511 # such as "foo.*", "*.*" which are not easily parseable yet 8512 securable = self._try_parse(self._parse_table_parts) 8513 8514 return privileges, kind, securable 8515 8516 def _parse_grant(self) -> exp.Grant | exp.Command: 8517 start = self._prev 8518 8519 privileges, kind, securable = self._parse_grant_revoke_common() 8520 8521 if not securable or not self._match_text_seq("TO"): 8522 return self._parse_as_command(start) 8523 8524 principals = self._parse_csv(self._parse_grant_principal) 8525 8526 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8527 8528 if self._curr: 8529 return self._parse_as_command(start) 8530 8531 return self.expression( 8532 exp.Grant, 8533 privileges=privileges, 8534 kind=kind, 8535 securable=securable, 8536 principals=principals, 8537 grant_option=grant_option, 8538 ) 8539 8540 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8541 start = self._prev 8542 8543 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8544 8545 privileges, kind, securable = self._parse_grant_revoke_common() 8546 8547 if not securable or not self._match_text_seq("FROM"): 8548 return self._parse_as_command(start) 8549 8550 principals = self._parse_csv(self._parse_grant_principal) 8551 8552 cascade = None 8553 if self._match_texts(("CASCADE", "RESTRICT")): 8554 cascade = self._prev.text.upper() 8555 8556 if self._curr: 8557 return self._parse_as_command(start) 8558 8559 return self.expression( 8560 exp.Revoke, 8561 privileges=privileges, 8562 kind=kind, 8563 securable=securable, 8564 principals=principals, 8565 grant_option=grant_option, 8566 cascade=cascade, 8567 ) 8568 8569 def _parse_overlay(self) -> exp.Overlay: 8570 return self.expression( 8571 exp.Overlay, 8572 **{ # type: ignore 8573 "this": self._parse_bitwise(), 8574 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8575 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8576 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8577 }, 8578 ) 8579 8580 def _parse_format_name(self) -> exp.Property: 8581 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8582 # for FILE_FORMAT = <format_name> 8583 return self.expression( 8584 exp.Property, 8585 this=exp.var("FORMAT_NAME"), 8586 value=self._parse_string() or self._parse_table_parts(), 8587 ) 8588 8589 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8590 args: t.List[exp.Expression] = [] 8591 8592 if self._match(TokenType.DISTINCT): 8593 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8594 self._match(TokenType.COMMA) 8595 8596 args.extend(self._parse_csv(self._parse_assignment)) 8597 8598 return self.expression( 8599 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8600 ) 8601 8602 def _identifier_expression( 8603 self, token: t.Optional[Token] = None, **kwargs: t.Any 8604 ) -> exp.Identifier: 8605 token = token or self._prev 8606 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8607 expression.update_positions(token) 8608 return expression 8609 8610 def _build_pipe_cte( 8611 self, 8612 query: exp.Query, 8613 expressions: t.List[exp.Expression], 8614 alias_cte: t.Optional[exp.TableAlias] = None, 8615 ) -> exp.Select: 8616 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8617 if alias_cte: 8618 new_cte = alias_cte 8619 else: 8620 self._pipe_cte_counter += 1 8621 new_cte = f"__tmp{self._pipe_cte_counter}" 8622 8623 with_ = query.args.get("with") 8624 ctes = with_.pop() if with_ else None 8625 8626 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8627 if ctes: 8628 new_select.set("with", ctes) 8629 8630 return new_select.with_(new_cte, as_=query, copy=False) 8631 8632 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8633 select = self._parse_select(consume_pipe=False) 8634 if not select: 8635 return query 8636 8637 return self._build_pipe_cte( 8638 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8639 ) 8640 8641 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8642 limit = self._parse_limit() 8643 offset = self._parse_offset() 8644 if limit: 8645 curr_limit = query.args.get("limit", limit) 8646 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8647 query.limit(limit, copy=False) 8648 if offset: 8649 curr_offset = query.args.get("offset") 8650 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8651 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8652 8653 return query 8654 8655 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8656 this = self._parse_assignment() 8657 if self._match_text_seq("GROUP", "AND", advance=False): 8658 return this 8659 8660 this = self._parse_alias(this) 8661 8662 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8663 return self._parse_ordered(lambda: this) 8664 8665 return this 8666 8667 def _parse_pipe_syntax_aggregate_group_order_by( 8668 self, query: exp.Select, group_by_exists: bool = True 8669 ) -> exp.Select: 8670 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8671 aggregates_or_groups, orders = [], [] 8672 for element in expr: 8673 if isinstance(element, exp.Ordered): 8674 this = element.this 8675 if isinstance(this, exp.Alias): 8676 element.set("this", this.args["alias"]) 8677 orders.append(element) 8678 else: 8679 this = element 8680 aggregates_or_groups.append(this) 8681 8682 if group_by_exists: 8683 query.select(*aggregates_or_groups, copy=False).group_by( 8684 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8685 copy=False, 8686 ) 8687 else: 8688 query.select(*aggregates_or_groups, append=False, copy=False) 8689 8690 if orders: 8691 return query.order_by(*orders, append=False, copy=False) 8692 8693 return query 8694 8695 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8696 self._match_text_seq("AGGREGATE") 8697 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8698 8699 if self._match(TokenType.GROUP_BY) or ( 8700 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8701 ): 8702 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8703 8704 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8705 8706 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8707 first_setop = self.parse_set_operation(this=query) 8708 if not first_setop: 8709 return None 8710 8711 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8712 expr = self._parse_paren() 8713 return expr.assert_is(exp.Subquery).unnest() if expr else None 8714 8715 first_setop.this.pop() 8716 8717 setops = [ 8718 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8719 *self._parse_csv(_parse_and_unwrap_query), 8720 ] 8721 8722 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8723 with_ = query.args.get("with") 8724 ctes = with_.pop() if with_ else None 8725 8726 if isinstance(first_setop, exp.Union): 8727 query = query.union(*setops, copy=False, **first_setop.args) 8728 elif isinstance(first_setop, exp.Except): 8729 query = query.except_(*setops, copy=False, **first_setop.args) 8730 else: 8731 query = query.intersect(*setops, copy=False, **first_setop.args) 8732 8733 query.set("with", ctes) 8734 8735 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8736 8737 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8738 join = self._parse_join() 8739 if not join: 8740 return None 8741 8742 if isinstance(query, exp.Select): 8743 return query.join(join, copy=False) 8744 8745 return query 8746 8747 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8748 pivots = self._parse_pivots() 8749 if not pivots: 8750 return query 8751 8752 from_ = query.args.get("from") 8753 if from_: 8754 from_.this.set("pivots", pivots) 8755 8756 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8757 8758 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8759 self._match_text_seq("EXTEND") 8760 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8761 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8762 8763 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8764 sample = self._parse_table_sample() 8765 8766 with_ = query.args.get("with") 8767 if with_: 8768 with_.expressions[-1].this.set("sample", sample) 8769 else: 8770 query.set("sample", sample) 8771 8772 return query 8773 8774 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8775 if isinstance(query, exp.Subquery): 8776 query = exp.select("*").from_(query, copy=False) 8777 8778 if not query.args.get("from"): 8779 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8780 8781 while self._match(TokenType.PIPE_GT): 8782 start = self._curr 8783 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8784 if not parser: 8785 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8786 # keywords, making it tricky to disambiguate them without lookahead. The approach 8787 # here is to try and parse a set operation and if that fails, then try to parse a 8788 # join operator. If that fails as well, then the operator is not supported. 8789 parsed_query = self._parse_pipe_syntax_set_operator(query) 8790 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8791 if not parsed_query: 8792 self._retreat(start) 8793 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8794 break 8795 query = parsed_query 8796 else: 8797 query = parser(self, query) 8798 8799 return query 8800 8801 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8802 vars = self._parse_csv(self._parse_id_var) 8803 if not vars: 8804 return None 8805 8806 return self.expression( 8807 exp.DeclareItem, 8808 this=vars, 8809 kind=self._parse_types(), 8810 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8811 ) 8812 8813 def _parse_declare(self) -> exp.Declare | exp.Command: 8814 start = self._prev 8815 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8816 8817 if not expressions or self._curr: 8818 return self._parse_as_command(start) 8819 8820 return self.expression(exp.Declare, expressions=expressions) 8821 8822 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8823 exp_class = exp.Cast if strict else exp.TryCast 8824 8825 if exp_class == exp.TryCast: 8826 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8827 8828 return self.expression(exp_class, **kwargs) 8829 8830 def _parse_json_value(self) -> exp.JSONValue: 8831 this = self._parse_bitwise() 8832 self._match(TokenType.COMMA) 8833 path = self._parse_bitwise() 8834 8835 returning = self._match(TokenType.RETURNING) and self._parse_type() 8836 8837 return self.expression( 8838 exp.JSONValue, 8839 this=this, 8840 path=self.dialect.to_json_path(path), 8841 returning=returning, 8842 on_condition=self._parse_on_condition(), 8843 ) 8844 8845 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8846 def concat_exprs( 8847 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8848 ) -> exp.Expression: 8849 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8850 concat_exprs = [ 8851 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8852 ] 8853 node.set("expressions", concat_exprs) 8854 return node 8855 if len(exprs) == 1: 8856 return exprs[0] 8857 return self.expression(exp.Concat, expressions=args, safe=True) 8858 8859 args = self._parse_csv(self._parse_lambda) 8860 8861 if args: 8862 order = args[-1] if isinstance(args[-1], exp.Order) else None 8863 8864 if order: 8865 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8866 # remove 'expr' from exp.Order and add it back to args 8867 args[-1] = order.this 8868 order.set("this", concat_exprs(order.this, args)) 8869 8870 this = order or concat_exprs(args[0], args) 8871 else: 8872 this = None 8873 8874 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8875 8876 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.Group: lambda self: self._parse_group(), 814 exp.Having: lambda self: self._parse_having(), 815 exp.Hint: lambda self: self._parse_hint_body(), 816 exp.Identifier: lambda self: self._parse_id_var(), 817 exp.Join: lambda self: self._parse_join(), 818 exp.Lambda: lambda self: self._parse_lambda(), 819 exp.Lateral: lambda self: self._parse_lateral(), 820 exp.Limit: lambda self: self._parse_limit(), 821 exp.Offset: lambda self: self._parse_offset(), 822 exp.Order: lambda self: self._parse_order(), 823 exp.Ordered: lambda self: self._parse_ordered(), 824 exp.Properties: lambda self: self._parse_properties(), 825 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 826 exp.Qualify: lambda self: self._parse_qualify(), 827 exp.Returning: lambda self: self._parse_returning(), 828 exp.Select: lambda self: self._parse_select(), 829 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 830 exp.Table: lambda self: self._parse_table_parts(), 831 exp.TableAlias: lambda self: self._parse_table_alias(), 832 exp.Tuple: lambda self: self._parse_value(values=False), 833 exp.Whens: lambda self: self._parse_when_matched(), 834 exp.Where: lambda self: self._parse_where(), 835 exp.Window: lambda self: self._parse_named_window(), 836 exp.With: lambda self: self._parse_with(), 837 "JOIN_TYPE": lambda self: self._parse_join_parts(), 838 } 839 840 STATEMENT_PARSERS = { 841 TokenType.ALTER: lambda self: self._parse_alter(), 842 TokenType.ANALYZE: lambda self: self._parse_analyze(), 843 TokenType.BEGIN: lambda self: self._parse_transaction(), 844 TokenType.CACHE: lambda self: self._parse_cache(), 845 TokenType.COMMENT: lambda self: self._parse_comment(), 846 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 847 TokenType.COPY: lambda self: self._parse_copy(), 848 TokenType.CREATE: lambda self: self._parse_create(), 849 TokenType.DELETE: lambda self: self._parse_delete(), 850 TokenType.DESC: lambda self: self._parse_describe(), 851 TokenType.DESCRIBE: lambda self: self._parse_describe(), 852 TokenType.DROP: lambda self: self._parse_drop(), 853 TokenType.GRANT: lambda self: self._parse_grant(), 854 TokenType.REVOKE: lambda self: self._parse_revoke(), 855 TokenType.INSERT: lambda self: self._parse_insert(), 856 TokenType.KILL: lambda self: self._parse_kill(), 857 TokenType.LOAD: lambda self: self._parse_load(), 858 TokenType.MERGE: lambda self: self._parse_merge(), 859 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 860 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 861 TokenType.REFRESH: lambda self: self._parse_refresh(), 862 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 863 TokenType.SET: lambda self: self._parse_set(), 864 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 865 TokenType.UNCACHE: lambda self: self._parse_uncache(), 866 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 867 TokenType.UPDATE: lambda self: self._parse_update(), 868 TokenType.USE: lambda self: self._parse_use(), 869 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 870 } 871 872 UNARY_PARSERS = { 873 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 874 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 875 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 876 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 877 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 878 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 879 } 880 881 STRING_PARSERS = { 882 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 883 exp.RawString, this=token.text 884 ), 885 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 886 exp.National, this=token.text 887 ), 888 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 889 TokenType.STRING: lambda self, token: self.expression( 890 exp.Literal, this=token.text, is_string=True 891 ), 892 TokenType.UNICODE_STRING: lambda self, token: self.expression( 893 exp.UnicodeString, 894 this=token.text, 895 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 896 ), 897 } 898 899 NUMERIC_PARSERS = { 900 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 901 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 902 TokenType.HEX_STRING: lambda self, token: self.expression( 903 exp.HexString, 904 this=token.text, 905 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 906 ), 907 TokenType.NUMBER: lambda self, token: self.expression( 908 exp.Literal, this=token.text, is_string=False 909 ), 910 } 911 912 PRIMARY_PARSERS = { 913 **STRING_PARSERS, 914 **NUMERIC_PARSERS, 915 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 916 TokenType.NULL: lambda self, _: self.expression(exp.Null), 917 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 918 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 919 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 920 TokenType.STAR: lambda self, _: self._parse_star_ops(), 921 } 922 923 PLACEHOLDER_PARSERS = { 924 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 925 TokenType.PARAMETER: lambda self: self._parse_parameter(), 926 TokenType.COLON: lambda self: ( 927 self.expression(exp.Placeholder, this=self._prev.text) 928 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 929 else None 930 ), 931 } 932 933 RANGE_PARSERS = { 934 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 935 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 936 TokenType.GLOB: binary_range_parser(exp.Glob), 937 TokenType.ILIKE: binary_range_parser(exp.ILike), 938 TokenType.IN: lambda self, this: self._parse_in(this), 939 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 940 TokenType.IS: lambda self, this: self._parse_is(this), 941 TokenType.LIKE: binary_range_parser(exp.Like), 942 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 943 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 944 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 945 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 946 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 947 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 948 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 949 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 950 } 951 952 PIPE_SYNTAX_TRANSFORM_PARSERS = { 953 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 954 "AS": lambda self, query: self._build_pipe_cte( 955 query, [exp.Star()], self._parse_table_alias() 956 ), 957 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 958 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 959 "ORDER BY": lambda self, query: query.order_by( 960 self._parse_order(), append=False, copy=False 961 ), 962 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 963 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 964 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 965 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 966 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 967 } 968 969 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 970 "ALLOWED_VALUES": lambda self: self.expression( 971 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 972 ), 973 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 974 "AUTO": lambda self: self._parse_auto_property(), 975 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 976 "BACKUP": lambda self: self.expression( 977 exp.BackupProperty, this=self._parse_var(any_token=True) 978 ), 979 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 980 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHECKSUM": lambda self: self._parse_checksum(), 983 "CLUSTER BY": lambda self: self._parse_cluster(), 984 "CLUSTERED": lambda self: self._parse_clustered_by(), 985 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 986 exp.CollateProperty, **kwargs 987 ), 988 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 989 "CONTAINS": lambda self: self._parse_contains_property(), 990 "COPY": lambda self: self._parse_copy_property(), 991 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 992 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 993 "DEFINER": lambda self: self._parse_definer(), 994 "DETERMINISTIC": lambda self: self.expression( 995 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 996 ), 997 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 998 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 999 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1000 "DISTKEY": lambda self: self._parse_distkey(), 1001 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1002 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1003 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1004 "ENVIRONMENT": lambda self: self.expression( 1005 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1006 ), 1007 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1008 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1009 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1010 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1011 "FREESPACE": lambda self: self._parse_freespace(), 1012 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1013 "HEAP": lambda self: self.expression(exp.HeapProperty), 1014 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1015 "IMMUTABLE": lambda self: self.expression( 1016 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1017 ), 1018 "INHERITS": lambda self: self.expression( 1019 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1020 ), 1021 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1022 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1023 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1024 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1025 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1026 "LIKE": lambda self: self._parse_create_like(), 1027 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1028 "LOCK": lambda self: self._parse_locking(), 1029 "LOCKING": lambda self: self._parse_locking(), 1030 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1031 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1032 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1033 "MODIFIES": lambda self: self._parse_modifies_property(), 1034 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1035 "NO": lambda self: self._parse_no_property(), 1036 "ON": lambda self: self._parse_on_property(), 1037 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1038 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1039 "PARTITION": lambda self: self._parse_partitioned_of(), 1040 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1043 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1044 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1045 "READS": lambda self: self._parse_reads_property(), 1046 "REMOTE": lambda self: self._parse_remote_with_connection(), 1047 "RETURNS": lambda self: self._parse_returns(), 1048 "STRICT": lambda self: self.expression(exp.StrictProperty), 1049 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1050 "ROW": lambda self: self._parse_row(), 1051 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1052 "SAMPLE": lambda self: self.expression( 1053 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1054 ), 1055 "SECURE": lambda self: self.expression(exp.SecureProperty), 1056 "SECURITY": lambda self: self._parse_security(), 1057 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1058 "SETTINGS": lambda self: self._parse_settings_property(), 1059 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1060 "SORTKEY": lambda self: self._parse_sortkey(), 1061 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1062 "STABLE": lambda self: self.expression( 1063 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1064 ), 1065 "STORED": lambda self: self._parse_stored(), 1066 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1067 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1068 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1069 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1070 "TO": lambda self: self._parse_to_table(), 1071 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1072 "TRANSFORM": lambda self: self.expression( 1073 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1074 ), 1075 "TTL": lambda self: self._parse_ttl(), 1076 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1077 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1078 "VOLATILE": lambda self: self._parse_volatile_property(), 1079 "WITH": lambda self: self._parse_with_property(), 1080 } 1081 1082 CONSTRAINT_PARSERS = { 1083 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1084 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1085 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1086 "CHARACTER SET": lambda self: self.expression( 1087 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1088 ), 1089 "CHECK": lambda self: self.expression( 1090 exp.CheckColumnConstraint, 1091 this=self._parse_wrapped(self._parse_assignment), 1092 enforced=self._match_text_seq("ENFORCED"), 1093 ), 1094 "COLLATE": lambda self: self.expression( 1095 exp.CollateColumnConstraint, 1096 this=self._parse_identifier() or self._parse_column(), 1097 ), 1098 "COMMENT": lambda self: self.expression( 1099 exp.CommentColumnConstraint, this=self._parse_string() 1100 ), 1101 "COMPRESS": lambda self: self._parse_compress(), 1102 "CLUSTERED": lambda self: self.expression( 1103 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1104 ), 1105 "NONCLUSTERED": lambda self: self.expression( 1106 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1107 ), 1108 "DEFAULT": lambda self: self.expression( 1109 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1110 ), 1111 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1112 "EPHEMERAL": lambda self: self.expression( 1113 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1114 ), 1115 "EXCLUDE": lambda self: self.expression( 1116 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1117 ), 1118 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1119 "FORMAT": lambda self: self.expression( 1120 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1121 ), 1122 "GENERATED": lambda self: self._parse_generated_as_identity(), 1123 "IDENTITY": lambda self: self._parse_auto_increment(), 1124 "INLINE": lambda self: self._parse_inline(), 1125 "LIKE": lambda self: self._parse_create_like(), 1126 "NOT": lambda self: self._parse_not_constraint(), 1127 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1128 "ON": lambda self: ( 1129 self._match(TokenType.UPDATE) 1130 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1131 ) 1132 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1133 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1134 "PERIOD": lambda self: self._parse_period_for_system_time(), 1135 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1136 "REFERENCES": lambda self: self._parse_references(match=False), 1137 "TITLE": lambda self: self.expression( 1138 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1139 ), 1140 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1141 "UNIQUE": lambda self: self._parse_unique(), 1142 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1143 "WATERMARK": lambda self: self.expression( 1144 exp.WatermarkColumnConstraint, 1145 this=self._match(TokenType.FOR) and self._parse_column(), 1146 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1147 ), 1148 "WITH": lambda self: self.expression( 1149 exp.Properties, expressions=self._parse_wrapped_properties() 1150 ), 1151 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1153 } 1154 1155 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1156 if not self._match(TokenType.L_PAREN, advance=False): 1157 # Partitioning by bucket or truncate follows the syntax: 1158 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1159 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1160 self._retreat(self._index - 1) 1161 return None 1162 1163 klass = ( 1164 exp.PartitionedByBucket 1165 if self._prev.text.upper() == "BUCKET" 1166 else exp.PartitionByTruncate 1167 ) 1168 1169 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1170 this, expression = seq_get(args, 0), seq_get(args, 1) 1171 1172 if isinstance(this, exp.Literal): 1173 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1174 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1175 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1176 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1177 # 1178 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1179 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1180 this, expression = expression, this 1181 1182 return self.expression(klass, this=this, expression=expression) 1183 1184 ALTER_PARSERS = { 1185 "ADD": lambda self: self._parse_alter_table_add(), 1186 "AS": lambda self: self._parse_select(), 1187 "ALTER": lambda self: self._parse_alter_table_alter(), 1188 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1189 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1190 "DROP": lambda self: self._parse_alter_table_drop(), 1191 "RENAME": lambda self: self._parse_alter_table_rename(), 1192 "SET": lambda self: self._parse_alter_table_set(), 1193 "SWAP": lambda self: self.expression( 1194 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1195 ), 1196 } 1197 1198 ALTER_ALTER_PARSERS = { 1199 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1200 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1201 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1202 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1203 } 1204 1205 SCHEMA_UNNAMED_CONSTRAINTS = { 1206 "CHECK", 1207 "EXCLUDE", 1208 "FOREIGN KEY", 1209 "LIKE", 1210 "PERIOD", 1211 "PRIMARY KEY", 1212 "UNIQUE", 1213 "WATERMARK", 1214 "BUCKET", 1215 "TRUNCATE", 1216 } 1217 1218 NO_PAREN_FUNCTION_PARSERS = { 1219 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1220 "CASE": lambda self: self._parse_case(), 1221 "CONNECT_BY_ROOT": lambda self: self.expression( 1222 exp.ConnectByRoot, this=self._parse_column() 1223 ), 1224 "IF": lambda self: self._parse_if(), 1225 } 1226 1227 INVALID_FUNC_NAME_TOKENS = { 1228 TokenType.IDENTIFIER, 1229 TokenType.STRING, 1230 } 1231 1232 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1233 1234 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1235 1236 FUNCTION_PARSERS = { 1237 **{ 1238 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1239 }, 1240 **{ 1241 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1242 }, 1243 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1244 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1245 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1246 "DECODE": lambda self: self._parse_decode(), 1247 "EXTRACT": lambda self: self._parse_extract(), 1248 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1249 "GAP_FILL": lambda self: self._parse_gap_fill(), 1250 "JSON_OBJECT": lambda self: self._parse_json_object(), 1251 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1252 "JSON_TABLE": lambda self: self._parse_json_table(), 1253 "MATCH": lambda self: self._parse_match_against(), 1254 "NORMALIZE": lambda self: self._parse_normalize(), 1255 "OPENJSON": lambda self: self._parse_open_json(), 1256 "OVERLAY": lambda self: self._parse_overlay(), 1257 "POSITION": lambda self: self._parse_position(), 1258 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "STRING_AGG": lambda self: self._parse_string_agg(), 1260 "SUBSTRING": lambda self: self._parse_substring(), 1261 "TRIM": lambda self: self._parse_trim(), 1262 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1263 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1264 "XMLELEMENT": lambda self: self.expression( 1265 exp.XMLElement, 1266 this=self._match_text_seq("NAME") and self._parse_id_var(), 1267 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1268 ), 1269 "XMLTABLE": lambda self: self._parse_xml_table(), 1270 } 1271 1272 QUERY_MODIFIER_PARSERS = { 1273 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1274 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1275 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1276 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1277 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1278 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1279 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1280 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1281 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1282 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1283 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1284 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1285 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1286 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1288 TokenType.CLUSTER_BY: lambda self: ( 1289 "cluster", 1290 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1291 ), 1292 TokenType.DISTRIBUTE_BY: lambda self: ( 1293 "distribute", 1294 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1295 ), 1296 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1297 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1298 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1299 } 1300 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1301 1302 SET_PARSERS = { 1303 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1304 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1305 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1306 "TRANSACTION": lambda self: self._parse_set_transaction(), 1307 } 1308 1309 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1310 1311 TYPE_LITERAL_PARSERS = { 1312 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1313 } 1314 1315 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1316 1317 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1318 1319 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1320 1321 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1322 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1323 "ISOLATION": ( 1324 ("LEVEL", "REPEATABLE", "READ"), 1325 ("LEVEL", "READ", "COMMITTED"), 1326 ("LEVEL", "READ", "UNCOMITTED"), 1327 ("LEVEL", "SERIALIZABLE"), 1328 ), 1329 "READ": ("WRITE", "ONLY"), 1330 } 1331 1332 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1333 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1334 ) 1335 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1336 1337 CREATE_SEQUENCE: OPTIONS_TYPE = { 1338 "SCALE": ("EXTEND", "NOEXTEND"), 1339 "SHARD": ("EXTEND", "NOEXTEND"), 1340 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1341 **dict.fromkeys( 1342 ( 1343 "SESSION", 1344 "GLOBAL", 1345 "KEEP", 1346 "NOKEEP", 1347 "ORDER", 1348 "NOORDER", 1349 "NOCACHE", 1350 "CYCLE", 1351 "NOCYCLE", 1352 "NOMINVALUE", 1353 "NOMAXVALUE", 1354 "NOSCALE", 1355 "NOSHARD", 1356 ), 1357 tuple(), 1358 ), 1359 } 1360 1361 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1362 1363 USABLES: OPTIONS_TYPE = dict.fromkeys( 1364 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1365 ) 1366 1367 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1368 1369 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1370 "TYPE": ("EVOLUTION",), 1371 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1372 } 1373 1374 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1375 1376 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1377 1378 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1379 "NOT": ("ENFORCED",), 1380 "MATCH": ( 1381 "FULL", 1382 "PARTIAL", 1383 "SIMPLE", 1384 ), 1385 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1386 "USING": ( 1387 "BTREE", 1388 "HASH", 1389 ), 1390 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1391 } 1392 1393 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1394 "NO": ("OTHERS",), 1395 "CURRENT": ("ROW",), 1396 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1397 } 1398 1399 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1400 1401 CLONE_KEYWORDS = {"CLONE", "COPY"} 1402 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1403 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1404 1405 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1406 1407 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1408 1409 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1410 1411 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1412 1413 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1414 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1415 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1416 1417 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1418 1419 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1420 1421 ADD_CONSTRAINT_TOKENS = { 1422 TokenType.CONSTRAINT, 1423 TokenType.FOREIGN_KEY, 1424 TokenType.INDEX, 1425 TokenType.KEY, 1426 TokenType.PRIMARY_KEY, 1427 TokenType.UNIQUE, 1428 } 1429 1430 DISTINCT_TOKENS = {TokenType.DISTINCT} 1431 1432 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1433 1434 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1435 1436 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1437 1438 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1439 1440 ODBC_DATETIME_LITERALS = { 1441 "d": exp.Date, 1442 "t": exp.Time, 1443 "ts": exp.Timestamp, 1444 } 1445 1446 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1447 1448 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1449 1450 # The style options for the DESCRIBE statement 1451 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1452 1453 # The style options for the ANALYZE statement 1454 ANALYZE_STYLES = { 1455 "BUFFER_USAGE_LIMIT", 1456 "FULL", 1457 "LOCAL", 1458 "NO_WRITE_TO_BINLOG", 1459 "SAMPLE", 1460 "SKIP_LOCKED", 1461 "VERBOSE", 1462 } 1463 1464 ANALYZE_EXPRESSION_PARSERS = { 1465 "ALL": lambda self: self._parse_analyze_columns(), 1466 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1467 "DELETE": lambda self: self._parse_analyze_delete(), 1468 "DROP": lambda self: self._parse_analyze_histogram(), 1469 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1470 "LIST": lambda self: self._parse_analyze_list(), 1471 "PREDICATE": lambda self: self._parse_analyze_columns(), 1472 "UPDATE": lambda self: self._parse_analyze_histogram(), 1473 "VALIDATE": lambda self: self._parse_analyze_validate(), 1474 } 1475 1476 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1477 1478 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1479 1480 OPERATION_MODIFIERS: t.Set[str] = set() 1481 1482 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1483 1484 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1485 1486 STRICT_CAST = True 1487 1488 PREFIXED_PIVOT_COLUMNS = False 1489 IDENTIFY_PIVOT_STRINGS = False 1490 1491 LOG_DEFAULTS_TO_LN = False 1492 1493 # Whether the table sample clause expects CSV syntax 1494 TABLESAMPLE_CSV = False 1495 1496 # The default method used for table sampling 1497 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1498 1499 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1500 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1501 1502 # Whether the TRIM function expects the characters to trim as its first argument 1503 TRIM_PATTERN_FIRST = False 1504 1505 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1506 STRING_ALIASES = False 1507 1508 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1509 MODIFIERS_ATTACHED_TO_SET_OP = True 1510 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1511 1512 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1513 NO_PAREN_IF_COMMANDS = True 1514 1515 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1516 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1517 1518 # Whether the `:` operator is used to extract a value from a VARIANT column 1519 COLON_IS_VARIANT_EXTRACT = False 1520 1521 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1522 # If this is True and '(' is not found, the keyword will be treated as an identifier 1523 VALUES_FOLLOWED_BY_PAREN = True 1524 1525 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1526 SUPPORTS_IMPLICIT_UNNEST = False 1527 1528 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1529 INTERVAL_SPANS = True 1530 1531 # Whether a PARTITION clause can follow a table reference 1532 SUPPORTS_PARTITION_SELECTION = False 1533 1534 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1535 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1536 1537 # Whether the 'AS' keyword is optional in the CTE definition syntax 1538 OPTIONAL_ALIAS_TOKEN_CTE = True 1539 1540 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1541 ALTER_RENAME_REQUIRES_COLUMN = True 1542 1543 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1544 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1545 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1546 # as BigQuery, where all joins have the same precedence. 1547 JOINS_HAVE_EQUAL_PRECEDENCE = False 1548 1549 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1550 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1551 1552 # Whether map literals support arbitrary expressions as keys. 1553 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1554 # When False, keys are typically restricted to identifiers. 1555 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1556 1557 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1558 # is true for Snowflake but not for BigQuery which can also process strings 1559 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1560 1561 # Dialects like Databricks support JOINS without join criteria 1562 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1563 ADD_JOIN_ON_TRUE = False 1564 1565 __slots__ = ( 1566 "error_level", 1567 "error_message_context", 1568 "max_errors", 1569 "dialect", 1570 "sql", 1571 "errors", 1572 "_tokens", 1573 "_index", 1574 "_curr", 1575 "_next", 1576 "_prev", 1577 "_prev_comments", 1578 "_pipe_cte_counter", 1579 ) 1580 1581 # Autofilled 1582 SHOW_TRIE: t.Dict = {} 1583 SET_TRIE: t.Dict = {} 1584 1585 def __init__( 1586 self, 1587 error_level: t.Optional[ErrorLevel] = None, 1588 error_message_context: int = 100, 1589 max_errors: int = 3, 1590 dialect: DialectType = None, 1591 ): 1592 from sqlglot.dialects import Dialect 1593 1594 self.error_level = error_level or ErrorLevel.IMMEDIATE 1595 self.error_message_context = error_message_context 1596 self.max_errors = max_errors 1597 self.dialect = Dialect.get_or_raise(dialect) 1598 self.reset() 1599 1600 def reset(self): 1601 self.sql = "" 1602 self.errors = [] 1603 self._tokens = [] 1604 self._index = 0 1605 self._curr = None 1606 self._next = None 1607 self._prev = None 1608 self._prev_comments = None 1609 self._pipe_cte_counter = 0 1610 1611 def parse( 1612 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1613 ) -> t.List[t.Optional[exp.Expression]]: 1614 """ 1615 Parses a list of tokens and returns a list of syntax trees, one tree 1616 per parsed SQL statement. 1617 1618 Args: 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The list of the produced syntax trees. 1624 """ 1625 return self._parse( 1626 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1627 ) 1628 1629 def parse_into( 1630 self, 1631 expression_types: exp.IntoType, 1632 raw_tokens: t.List[Token], 1633 sql: t.Optional[str] = None, 1634 ) -> t.List[t.Optional[exp.Expression]]: 1635 """ 1636 Parses a list of tokens into a given Expression type. If a collection of Expression 1637 types is given instead, this method will try to parse the token list into each one 1638 of them, stopping at the first for which the parsing succeeds. 1639 1640 Args: 1641 expression_types: The expression type(s) to try and parse the token list into. 1642 raw_tokens: The list of tokens. 1643 sql: The original SQL string, used to produce helpful debug messages. 1644 1645 Returns: 1646 The target Expression. 1647 """ 1648 errors = [] 1649 for expression_type in ensure_list(expression_types): 1650 parser = self.EXPRESSION_PARSERS.get(expression_type) 1651 if not parser: 1652 raise TypeError(f"No parser registered for {expression_type}") 1653 1654 try: 1655 return self._parse(parser, raw_tokens, sql) 1656 except ParseError as e: 1657 e.errors[0]["into_expression"] = expression_type 1658 errors.append(e) 1659 1660 raise ParseError( 1661 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1662 errors=merge_errors(errors), 1663 ) from errors[-1] 1664 1665 def _parse( 1666 self, 1667 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1668 raw_tokens: t.List[Token], 1669 sql: t.Optional[str] = None, 1670 ) -> t.List[t.Optional[exp.Expression]]: 1671 self.reset() 1672 self.sql = sql or "" 1673 1674 total = len(raw_tokens) 1675 chunks: t.List[t.List[Token]] = [[]] 1676 1677 for i, token in enumerate(raw_tokens): 1678 if token.token_type == TokenType.SEMICOLON: 1679 if token.comments: 1680 chunks.append([token]) 1681 1682 if i < total - 1: 1683 chunks.append([]) 1684 else: 1685 chunks[-1].append(token) 1686 1687 expressions = [] 1688 1689 for tokens in chunks: 1690 self._index = -1 1691 self._tokens = tokens 1692 self._advance() 1693 1694 expressions.append(parse_method(self)) 1695 1696 if self._index < len(self._tokens): 1697 self.raise_error("Invalid expression / Unexpected token") 1698 1699 self.check_errors() 1700 1701 return expressions 1702 1703 def check_errors(self) -> None: 1704 """Logs or raises any found errors, depending on the chosen error level setting.""" 1705 if self.error_level == ErrorLevel.WARN: 1706 for error in self.errors: 1707 logger.error(str(error)) 1708 elif self.error_level == ErrorLevel.RAISE and self.errors: 1709 raise ParseError( 1710 concat_messages(self.errors, self.max_errors), 1711 errors=merge_errors(self.errors), 1712 ) 1713 1714 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1715 """ 1716 Appends an error in the list of recorded errors or raises it, depending on the chosen 1717 error level setting. 1718 """ 1719 token = token or self._curr or self._prev or Token.string("") 1720 start = token.start 1721 end = token.end + 1 1722 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1723 highlight = self.sql[start:end] 1724 end_context = self.sql[end : end + self.error_message_context] 1725 1726 error = ParseError.new( 1727 f"{message}. Line {token.line}, Col: {token.col}.\n" 1728 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1729 description=message, 1730 line=token.line, 1731 col=token.col, 1732 start_context=start_context, 1733 highlight=highlight, 1734 end_context=end_context, 1735 ) 1736 1737 if self.error_level == ErrorLevel.IMMEDIATE: 1738 raise error 1739 1740 self.errors.append(error) 1741 1742 def expression( 1743 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1744 ) -> E: 1745 """ 1746 Creates a new, validated Expression. 1747 1748 Args: 1749 exp_class: The expression class to instantiate. 1750 comments: An optional list of comments to attach to the expression. 1751 kwargs: The arguments to set for the expression along with their respective values. 1752 1753 Returns: 1754 The target expression. 1755 """ 1756 instance = exp_class(**kwargs) 1757 instance.add_comments(comments) if comments else self._add_comments(instance) 1758 return self.validate_expression(instance) 1759 1760 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1761 if expression and self._prev_comments: 1762 expression.add_comments(self._prev_comments) 1763 self._prev_comments = None 1764 1765 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1766 """ 1767 Validates an Expression, making sure that all its mandatory arguments are set. 1768 1769 Args: 1770 expression: The expression to validate. 1771 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1772 1773 Returns: 1774 The validated expression. 1775 """ 1776 if self.error_level != ErrorLevel.IGNORE: 1777 for error_message in expression.error_messages(args): 1778 self.raise_error(error_message) 1779 1780 return expression 1781 1782 def _find_sql(self, start: Token, end: Token) -> str: 1783 return self.sql[start.start : end.end + 1] 1784 1785 def _is_connected(self) -> bool: 1786 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1787 1788 def _advance(self, times: int = 1) -> None: 1789 self._index += times 1790 self._curr = seq_get(self._tokens, self._index) 1791 self._next = seq_get(self._tokens, self._index + 1) 1792 1793 if self._index > 0: 1794 self._prev = self._tokens[self._index - 1] 1795 self._prev_comments = self._prev.comments 1796 else: 1797 self._prev = None 1798 self._prev_comments = None 1799 1800 def _retreat(self, index: int) -> None: 1801 if index != self._index: 1802 self._advance(index - self._index) 1803 1804 def _warn_unsupported(self) -> None: 1805 if len(self._tokens) <= 1: 1806 return 1807 1808 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1809 # interested in emitting a warning for the one being currently processed. 1810 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1811 1812 logger.warning( 1813 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1814 ) 1815 1816 def _parse_command(self) -> exp.Command: 1817 self._warn_unsupported() 1818 return self.expression( 1819 exp.Command, 1820 comments=self._prev_comments, 1821 this=self._prev.text.upper(), 1822 expression=self._parse_string(), 1823 ) 1824 1825 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1826 """ 1827 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1828 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1829 solve this by setting & resetting the parser state accordingly 1830 """ 1831 index = self._index 1832 error_level = self.error_level 1833 1834 self.error_level = ErrorLevel.IMMEDIATE 1835 try: 1836 this = parse_method() 1837 except ParseError: 1838 this = None 1839 finally: 1840 if not this or retreat: 1841 self._retreat(index) 1842 self.error_level = error_level 1843 1844 return this 1845 1846 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1847 start = self._prev 1848 exists = self._parse_exists() if allow_exists else None 1849 1850 self._match(TokenType.ON) 1851 1852 materialized = self._match_text_seq("MATERIALIZED") 1853 kind = self._match_set(self.CREATABLES) and self._prev 1854 if not kind: 1855 return self._parse_as_command(start) 1856 1857 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1858 this = self._parse_user_defined_function(kind=kind.token_type) 1859 elif kind.token_type == TokenType.TABLE: 1860 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1861 elif kind.token_type == TokenType.COLUMN: 1862 this = self._parse_column() 1863 else: 1864 this = self._parse_id_var() 1865 1866 self._match(TokenType.IS) 1867 1868 return self.expression( 1869 exp.Comment, 1870 this=this, 1871 kind=kind.text, 1872 expression=self._parse_string(), 1873 exists=exists, 1874 materialized=materialized, 1875 ) 1876 1877 def _parse_to_table( 1878 self, 1879 ) -> exp.ToTableProperty: 1880 table = self._parse_table_parts(schema=True) 1881 return self.expression(exp.ToTableProperty, this=table) 1882 1883 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1884 def _parse_ttl(self) -> exp.Expression: 1885 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1886 this = self._parse_bitwise() 1887 1888 if self._match_text_seq("DELETE"): 1889 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1890 if self._match_text_seq("RECOMPRESS"): 1891 return self.expression( 1892 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1893 ) 1894 if self._match_text_seq("TO", "DISK"): 1895 return self.expression( 1896 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1897 ) 1898 if self._match_text_seq("TO", "VOLUME"): 1899 return self.expression( 1900 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1901 ) 1902 1903 return this 1904 1905 expressions = self._parse_csv(_parse_ttl_action) 1906 where = self._parse_where() 1907 group = self._parse_group() 1908 1909 aggregates = None 1910 if group and self._match(TokenType.SET): 1911 aggregates = self._parse_csv(self._parse_set_item) 1912 1913 return self.expression( 1914 exp.MergeTreeTTL, 1915 expressions=expressions, 1916 where=where, 1917 group=group, 1918 aggregates=aggregates, 1919 ) 1920 1921 def _parse_statement(self) -> t.Optional[exp.Expression]: 1922 if self._curr is None: 1923 return None 1924 1925 if self._match_set(self.STATEMENT_PARSERS): 1926 comments = self._prev_comments 1927 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1928 stmt.add_comments(comments, prepend=True) 1929 return stmt 1930 1931 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1932 return self._parse_command() 1933 1934 expression = self._parse_expression() 1935 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1936 return self._parse_query_modifiers(expression) 1937 1938 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1939 start = self._prev 1940 temporary = self._match(TokenType.TEMPORARY) 1941 materialized = self._match_text_seq("MATERIALIZED") 1942 1943 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1944 if not kind: 1945 return self._parse_as_command(start) 1946 1947 concurrently = self._match_text_seq("CONCURRENTLY") 1948 if_exists = exists or self._parse_exists() 1949 1950 if kind == "COLUMN": 1951 this = self._parse_column() 1952 else: 1953 this = self._parse_table_parts( 1954 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1955 ) 1956 1957 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1958 1959 if self._match(TokenType.L_PAREN, advance=False): 1960 expressions = self._parse_wrapped_csv(self._parse_types) 1961 else: 1962 expressions = None 1963 1964 return self.expression( 1965 exp.Drop, 1966 exists=if_exists, 1967 this=this, 1968 expressions=expressions, 1969 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1970 temporary=temporary, 1971 materialized=materialized, 1972 cascade=self._match_text_seq("CASCADE"), 1973 constraints=self._match_text_seq("CONSTRAINTS"), 1974 purge=self._match_text_seq("PURGE"), 1975 cluster=cluster, 1976 concurrently=concurrently, 1977 ) 1978 1979 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1980 return ( 1981 self._match_text_seq("IF") 1982 and (not not_ or self._match(TokenType.NOT)) 1983 and self._match(TokenType.EXISTS) 1984 ) 1985 1986 def _parse_create(self) -> exp.Create | exp.Command: 1987 # Note: this can't be None because we've matched a statement parser 1988 start = self._prev 1989 1990 replace = ( 1991 start.token_type == TokenType.REPLACE 1992 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1993 or self._match_pair(TokenType.OR, TokenType.ALTER) 1994 ) 1995 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1996 1997 unique = self._match(TokenType.UNIQUE) 1998 1999 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2000 clustered = True 2001 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2002 "COLUMNSTORE" 2003 ): 2004 clustered = False 2005 else: 2006 clustered = None 2007 2008 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2009 self._advance() 2010 2011 properties = None 2012 create_token = self._match_set(self.CREATABLES) and self._prev 2013 2014 if not create_token: 2015 # exp.Properties.Location.POST_CREATE 2016 properties = self._parse_properties() 2017 create_token = self._match_set(self.CREATABLES) and self._prev 2018 2019 if not properties or not create_token: 2020 return self._parse_as_command(start) 2021 2022 concurrently = self._match_text_seq("CONCURRENTLY") 2023 exists = self._parse_exists(not_=True) 2024 this = None 2025 expression: t.Optional[exp.Expression] = None 2026 indexes = None 2027 no_schema_binding = None 2028 begin = None 2029 end = None 2030 clone = None 2031 2032 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2033 nonlocal properties 2034 if properties and temp_props: 2035 properties.expressions.extend(temp_props.expressions) 2036 elif temp_props: 2037 properties = temp_props 2038 2039 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2040 this = self._parse_user_defined_function(kind=create_token.token_type) 2041 2042 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2043 extend_props(self._parse_properties()) 2044 2045 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2046 extend_props(self._parse_properties()) 2047 2048 if not expression: 2049 if self._match(TokenType.COMMAND): 2050 expression = self._parse_as_command(self._prev) 2051 else: 2052 begin = self._match(TokenType.BEGIN) 2053 return_ = self._match_text_seq("RETURN") 2054 2055 if self._match(TokenType.STRING, advance=False): 2056 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2057 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2058 expression = self._parse_string() 2059 extend_props(self._parse_properties()) 2060 else: 2061 expression = self._parse_user_defined_function_expression() 2062 2063 end = self._match_text_seq("END") 2064 2065 if return_: 2066 expression = self.expression(exp.Return, this=expression) 2067 elif create_token.token_type == TokenType.INDEX: 2068 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2069 if not self._match(TokenType.ON): 2070 index = self._parse_id_var() 2071 anonymous = False 2072 else: 2073 index = None 2074 anonymous = True 2075 2076 this = self._parse_index(index=index, anonymous=anonymous) 2077 elif create_token.token_type in self.DB_CREATABLES: 2078 table_parts = self._parse_table_parts( 2079 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2080 ) 2081 2082 # exp.Properties.Location.POST_NAME 2083 self._match(TokenType.COMMA) 2084 extend_props(self._parse_properties(before=True)) 2085 2086 this = self._parse_schema(this=table_parts) 2087 2088 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2089 extend_props(self._parse_properties()) 2090 2091 has_alias = self._match(TokenType.ALIAS) 2092 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2093 # exp.Properties.Location.POST_ALIAS 2094 extend_props(self._parse_properties()) 2095 2096 if create_token.token_type == TokenType.SEQUENCE: 2097 expression = self._parse_types() 2098 props = self._parse_properties() 2099 if props: 2100 sequence_props = exp.SequenceProperties() 2101 options = [] 2102 for prop in props: 2103 if isinstance(prop, exp.SequenceProperties): 2104 for arg, value in prop.args.items(): 2105 if arg == "options": 2106 options.extend(value) 2107 else: 2108 sequence_props.set(arg, value) 2109 prop.pop() 2110 2111 if options: 2112 sequence_props.set("options", options) 2113 2114 props.append("expressions", sequence_props) 2115 extend_props(props) 2116 else: 2117 expression = self._parse_ddl_select() 2118 2119 # Some dialects also support using a table as an alias instead of a SELECT. 2120 # Here we fallback to this as an alternative. 2121 if not expression and has_alias: 2122 expression = self._try_parse(self._parse_table_parts) 2123 2124 if create_token.token_type == TokenType.TABLE: 2125 # exp.Properties.Location.POST_EXPRESSION 2126 extend_props(self._parse_properties()) 2127 2128 indexes = [] 2129 while True: 2130 index = self._parse_index() 2131 2132 # exp.Properties.Location.POST_INDEX 2133 extend_props(self._parse_properties()) 2134 if not index: 2135 break 2136 else: 2137 self._match(TokenType.COMMA) 2138 indexes.append(index) 2139 elif create_token.token_type == TokenType.VIEW: 2140 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2141 no_schema_binding = True 2142 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2143 extend_props(self._parse_properties()) 2144 2145 shallow = self._match_text_seq("SHALLOW") 2146 2147 if self._match_texts(self.CLONE_KEYWORDS): 2148 copy = self._prev.text.lower() == "copy" 2149 clone = self.expression( 2150 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2151 ) 2152 2153 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2154 return self._parse_as_command(start) 2155 2156 create_kind_text = create_token.text.upper() 2157 return self.expression( 2158 exp.Create, 2159 this=this, 2160 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2161 replace=replace, 2162 refresh=refresh, 2163 unique=unique, 2164 expression=expression, 2165 exists=exists, 2166 properties=properties, 2167 indexes=indexes, 2168 no_schema_binding=no_schema_binding, 2169 begin=begin, 2170 end=end, 2171 clone=clone, 2172 concurrently=concurrently, 2173 clustered=clustered, 2174 ) 2175 2176 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2177 seq = exp.SequenceProperties() 2178 2179 options = [] 2180 index = self._index 2181 2182 while self._curr: 2183 self._match(TokenType.COMMA) 2184 if self._match_text_seq("INCREMENT"): 2185 self._match_text_seq("BY") 2186 self._match_text_seq("=") 2187 seq.set("increment", self._parse_term()) 2188 elif self._match_text_seq("MINVALUE"): 2189 seq.set("minvalue", self._parse_term()) 2190 elif self._match_text_seq("MAXVALUE"): 2191 seq.set("maxvalue", self._parse_term()) 2192 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2193 self._match_text_seq("=") 2194 seq.set("start", self._parse_term()) 2195 elif self._match_text_seq("CACHE"): 2196 # T-SQL allows empty CACHE which is initialized dynamically 2197 seq.set("cache", self._parse_number() or True) 2198 elif self._match_text_seq("OWNED", "BY"): 2199 # "OWNED BY NONE" is the default 2200 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2201 else: 2202 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2203 if opt: 2204 options.append(opt) 2205 else: 2206 break 2207 2208 seq.set("options", options if options else None) 2209 return None if self._index == index else seq 2210 2211 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2212 # only used for teradata currently 2213 self._match(TokenType.COMMA) 2214 2215 kwargs = { 2216 "no": self._match_text_seq("NO"), 2217 "dual": self._match_text_seq("DUAL"), 2218 "before": self._match_text_seq("BEFORE"), 2219 "default": self._match_text_seq("DEFAULT"), 2220 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2221 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2222 "after": self._match_text_seq("AFTER"), 2223 "minimum": self._match_texts(("MIN", "MINIMUM")), 2224 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2225 } 2226 2227 if self._match_texts(self.PROPERTY_PARSERS): 2228 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2229 try: 2230 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2231 except TypeError: 2232 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2233 2234 return None 2235 2236 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2237 return self._parse_wrapped_csv(self._parse_property) 2238 2239 def _parse_property(self) -> t.Optional[exp.Expression]: 2240 if self._match_texts(self.PROPERTY_PARSERS): 2241 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2242 2243 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2244 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2245 2246 if self._match_text_seq("COMPOUND", "SORTKEY"): 2247 return self._parse_sortkey(compound=True) 2248 2249 if self._match_text_seq("SQL", "SECURITY"): 2250 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2251 2252 index = self._index 2253 2254 seq_props = self._parse_sequence_properties() 2255 if seq_props: 2256 return seq_props 2257 2258 self._retreat(index) 2259 key = self._parse_column() 2260 2261 if not self._match(TokenType.EQ): 2262 self._retreat(index) 2263 return None 2264 2265 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2266 if isinstance(key, exp.Column): 2267 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2268 2269 value = self._parse_bitwise() or self._parse_var(any_token=True) 2270 2271 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2272 if isinstance(value, exp.Column): 2273 value = exp.var(value.name) 2274 2275 return self.expression(exp.Property, this=key, value=value) 2276 2277 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2278 if self._match_text_seq("BY"): 2279 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2280 2281 self._match(TokenType.ALIAS) 2282 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2283 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2284 2285 return self.expression( 2286 exp.FileFormatProperty, 2287 this=( 2288 self.expression( 2289 exp.InputOutputFormat, 2290 input_format=input_format, 2291 output_format=output_format, 2292 ) 2293 if input_format or output_format 2294 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2295 ), 2296 hive_format=True, 2297 ) 2298 2299 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2300 field = self._parse_field() 2301 if isinstance(field, exp.Identifier) and not field.quoted: 2302 field = exp.var(field) 2303 2304 return field 2305 2306 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2307 self._match(TokenType.EQ) 2308 self._match(TokenType.ALIAS) 2309 2310 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2311 2312 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2313 properties = [] 2314 while True: 2315 if before: 2316 prop = self._parse_property_before() 2317 else: 2318 prop = self._parse_property() 2319 if not prop: 2320 break 2321 for p in ensure_list(prop): 2322 properties.append(p) 2323 2324 if properties: 2325 return self.expression(exp.Properties, expressions=properties) 2326 2327 return None 2328 2329 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2330 return self.expression( 2331 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2332 ) 2333 2334 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2335 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2336 security_specifier = self._prev.text.upper() 2337 return self.expression(exp.SecurityProperty, this=security_specifier) 2338 return None 2339 2340 def _parse_settings_property(self) -> exp.SettingsProperty: 2341 return self.expression( 2342 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2343 ) 2344 2345 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2346 if self._index >= 2: 2347 pre_volatile_token = self._tokens[self._index - 2] 2348 else: 2349 pre_volatile_token = None 2350 2351 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2352 return exp.VolatileProperty() 2353 2354 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2355 2356 def _parse_retention_period(self) -> exp.Var: 2357 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2358 number = self._parse_number() 2359 number_str = f"{number} " if number else "" 2360 unit = self._parse_var(any_token=True) 2361 return exp.var(f"{number_str}{unit}") 2362 2363 def _parse_system_versioning_property( 2364 self, with_: bool = False 2365 ) -> exp.WithSystemVersioningProperty: 2366 self._match(TokenType.EQ) 2367 prop = self.expression( 2368 exp.WithSystemVersioningProperty, 2369 **{ # type: ignore 2370 "on": True, 2371 "with": with_, 2372 }, 2373 ) 2374 2375 if self._match_text_seq("OFF"): 2376 prop.set("on", False) 2377 return prop 2378 2379 self._match(TokenType.ON) 2380 if self._match(TokenType.L_PAREN): 2381 while self._curr and not self._match(TokenType.R_PAREN): 2382 if self._match_text_seq("HISTORY_TABLE", "="): 2383 prop.set("this", self._parse_table_parts()) 2384 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2385 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2386 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2387 prop.set("retention_period", self._parse_retention_period()) 2388 2389 self._match(TokenType.COMMA) 2390 2391 return prop 2392 2393 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2394 self._match(TokenType.EQ) 2395 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2396 prop = self.expression(exp.DataDeletionProperty, on=on) 2397 2398 if self._match(TokenType.L_PAREN): 2399 while self._curr and not self._match(TokenType.R_PAREN): 2400 if self._match_text_seq("FILTER_COLUMN", "="): 2401 prop.set("filter_column", self._parse_column()) 2402 elif self._match_text_seq("RETENTION_PERIOD", "="): 2403 prop.set("retention_period", self._parse_retention_period()) 2404 2405 self._match(TokenType.COMMA) 2406 2407 return prop 2408 2409 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2410 kind = "HASH" 2411 expressions: t.Optional[t.List[exp.Expression]] = None 2412 if self._match_text_seq("BY", "HASH"): 2413 expressions = self._parse_wrapped_csv(self._parse_id_var) 2414 elif self._match_text_seq("BY", "RANDOM"): 2415 kind = "RANDOM" 2416 2417 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2418 buckets: t.Optional[exp.Expression] = None 2419 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2420 buckets = self._parse_number() 2421 2422 return self.expression( 2423 exp.DistributedByProperty, 2424 expressions=expressions, 2425 kind=kind, 2426 buckets=buckets, 2427 order=self._parse_order(), 2428 ) 2429 2430 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2431 self._match_text_seq("KEY") 2432 expressions = self._parse_wrapped_id_vars() 2433 return self.expression(expr_type, expressions=expressions) 2434 2435 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2436 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2437 prop = self._parse_system_versioning_property(with_=True) 2438 self._match_r_paren() 2439 return prop 2440 2441 if self._match(TokenType.L_PAREN, advance=False): 2442 return self._parse_wrapped_properties() 2443 2444 if self._match_text_seq("JOURNAL"): 2445 return self._parse_withjournaltable() 2446 2447 if self._match_texts(self.VIEW_ATTRIBUTES): 2448 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2449 2450 if self._match_text_seq("DATA"): 2451 return self._parse_withdata(no=False) 2452 elif self._match_text_seq("NO", "DATA"): 2453 return self._parse_withdata(no=True) 2454 2455 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2456 return self._parse_serde_properties(with_=True) 2457 2458 if self._match(TokenType.SCHEMA): 2459 return self.expression( 2460 exp.WithSchemaBindingProperty, 2461 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2462 ) 2463 2464 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2465 return self.expression( 2466 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2467 ) 2468 2469 if not self._next: 2470 return None 2471 2472 return self._parse_withisolatedloading() 2473 2474 def _parse_procedure_option(self) -> exp.Expression | None: 2475 if self._match_text_seq("EXECUTE", "AS"): 2476 return self.expression( 2477 exp.ExecuteAsProperty, 2478 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2479 or self._parse_string(), 2480 ) 2481 2482 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2483 2484 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2485 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2486 self._match(TokenType.EQ) 2487 2488 user = self._parse_id_var() 2489 self._match(TokenType.PARAMETER) 2490 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2491 2492 if not user or not host: 2493 return None 2494 2495 return exp.DefinerProperty(this=f"{user}@{host}") 2496 2497 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2498 self._match(TokenType.TABLE) 2499 self._match(TokenType.EQ) 2500 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2501 2502 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2503 return self.expression(exp.LogProperty, no=no) 2504 2505 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2506 return self.expression(exp.JournalProperty, **kwargs) 2507 2508 def _parse_checksum(self) -> exp.ChecksumProperty: 2509 self._match(TokenType.EQ) 2510 2511 on = None 2512 if self._match(TokenType.ON): 2513 on = True 2514 elif self._match_text_seq("OFF"): 2515 on = False 2516 2517 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2518 2519 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2520 return self.expression( 2521 exp.Cluster, 2522 expressions=( 2523 self._parse_wrapped_csv(self._parse_ordered) 2524 if wrapped 2525 else self._parse_csv(self._parse_ordered) 2526 ), 2527 ) 2528 2529 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2530 self._match_text_seq("BY") 2531 2532 self._match_l_paren() 2533 expressions = self._parse_csv(self._parse_column) 2534 self._match_r_paren() 2535 2536 if self._match_text_seq("SORTED", "BY"): 2537 self._match_l_paren() 2538 sorted_by = self._parse_csv(self._parse_ordered) 2539 self._match_r_paren() 2540 else: 2541 sorted_by = None 2542 2543 self._match(TokenType.INTO) 2544 buckets = self._parse_number() 2545 self._match_text_seq("BUCKETS") 2546 2547 return self.expression( 2548 exp.ClusteredByProperty, 2549 expressions=expressions, 2550 sorted_by=sorted_by, 2551 buckets=buckets, 2552 ) 2553 2554 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2555 if not self._match_text_seq("GRANTS"): 2556 self._retreat(self._index - 1) 2557 return None 2558 2559 return self.expression(exp.CopyGrantsProperty) 2560 2561 def _parse_freespace(self) -> exp.FreespaceProperty: 2562 self._match(TokenType.EQ) 2563 return self.expression( 2564 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2565 ) 2566 2567 def _parse_mergeblockratio( 2568 self, no: bool = False, default: bool = False 2569 ) -> exp.MergeBlockRatioProperty: 2570 if self._match(TokenType.EQ): 2571 return self.expression( 2572 exp.MergeBlockRatioProperty, 2573 this=self._parse_number(), 2574 percent=self._match(TokenType.PERCENT), 2575 ) 2576 2577 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2578 2579 def _parse_datablocksize( 2580 self, 2581 default: t.Optional[bool] = None, 2582 minimum: t.Optional[bool] = None, 2583 maximum: t.Optional[bool] = None, 2584 ) -> exp.DataBlocksizeProperty: 2585 self._match(TokenType.EQ) 2586 size = self._parse_number() 2587 2588 units = None 2589 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2590 units = self._prev.text 2591 2592 return self.expression( 2593 exp.DataBlocksizeProperty, 2594 size=size, 2595 units=units, 2596 default=default, 2597 minimum=minimum, 2598 maximum=maximum, 2599 ) 2600 2601 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2602 self._match(TokenType.EQ) 2603 always = self._match_text_seq("ALWAYS") 2604 manual = self._match_text_seq("MANUAL") 2605 never = self._match_text_seq("NEVER") 2606 default = self._match_text_seq("DEFAULT") 2607 2608 autotemp = None 2609 if self._match_text_seq("AUTOTEMP"): 2610 autotemp = self._parse_schema() 2611 2612 return self.expression( 2613 exp.BlockCompressionProperty, 2614 always=always, 2615 manual=manual, 2616 never=never, 2617 default=default, 2618 autotemp=autotemp, 2619 ) 2620 2621 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2622 index = self._index 2623 no = self._match_text_seq("NO") 2624 concurrent = self._match_text_seq("CONCURRENT") 2625 2626 if not self._match_text_seq("ISOLATED", "LOADING"): 2627 self._retreat(index) 2628 return None 2629 2630 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2631 return self.expression( 2632 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2633 ) 2634 2635 def _parse_locking(self) -> exp.LockingProperty: 2636 if self._match(TokenType.TABLE): 2637 kind = "TABLE" 2638 elif self._match(TokenType.VIEW): 2639 kind = "VIEW" 2640 elif self._match(TokenType.ROW): 2641 kind = "ROW" 2642 elif self._match_text_seq("DATABASE"): 2643 kind = "DATABASE" 2644 else: 2645 kind = None 2646 2647 if kind in ("DATABASE", "TABLE", "VIEW"): 2648 this = self._parse_table_parts() 2649 else: 2650 this = None 2651 2652 if self._match(TokenType.FOR): 2653 for_or_in = "FOR" 2654 elif self._match(TokenType.IN): 2655 for_or_in = "IN" 2656 else: 2657 for_or_in = None 2658 2659 if self._match_text_seq("ACCESS"): 2660 lock_type = "ACCESS" 2661 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2662 lock_type = "EXCLUSIVE" 2663 elif self._match_text_seq("SHARE"): 2664 lock_type = "SHARE" 2665 elif self._match_text_seq("READ"): 2666 lock_type = "READ" 2667 elif self._match_text_seq("WRITE"): 2668 lock_type = "WRITE" 2669 elif self._match_text_seq("CHECKSUM"): 2670 lock_type = "CHECKSUM" 2671 else: 2672 lock_type = None 2673 2674 override = self._match_text_seq("OVERRIDE") 2675 2676 return self.expression( 2677 exp.LockingProperty, 2678 this=this, 2679 kind=kind, 2680 for_or_in=for_or_in, 2681 lock_type=lock_type, 2682 override=override, 2683 ) 2684 2685 def _parse_partition_by(self) -> t.List[exp.Expression]: 2686 if self._match(TokenType.PARTITION_BY): 2687 return self._parse_csv(self._parse_assignment) 2688 return [] 2689 2690 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2691 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2692 if self._match_text_seq("MINVALUE"): 2693 return exp.var("MINVALUE") 2694 if self._match_text_seq("MAXVALUE"): 2695 return exp.var("MAXVALUE") 2696 return self._parse_bitwise() 2697 2698 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2699 expression = None 2700 from_expressions = None 2701 to_expressions = None 2702 2703 if self._match(TokenType.IN): 2704 this = self._parse_wrapped_csv(self._parse_bitwise) 2705 elif self._match(TokenType.FROM): 2706 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 self._match_text_seq("TO") 2708 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2709 elif self._match_text_seq("WITH", "(", "MODULUS"): 2710 this = self._parse_number() 2711 self._match_text_seq(",", "REMAINDER") 2712 expression = self._parse_number() 2713 self._match_r_paren() 2714 else: 2715 self.raise_error("Failed to parse partition bound spec.") 2716 2717 return self.expression( 2718 exp.PartitionBoundSpec, 2719 this=this, 2720 expression=expression, 2721 from_expressions=from_expressions, 2722 to_expressions=to_expressions, 2723 ) 2724 2725 # https://www.postgresql.org/docs/current/sql-createtable.html 2726 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2727 if not self._match_text_seq("OF"): 2728 self._retreat(self._index - 1) 2729 return None 2730 2731 this = self._parse_table(schema=True) 2732 2733 if self._match(TokenType.DEFAULT): 2734 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2735 elif self._match_text_seq("FOR", "VALUES"): 2736 expression = self._parse_partition_bound_spec() 2737 else: 2738 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2739 2740 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2741 2742 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2743 self._match(TokenType.EQ) 2744 return self.expression( 2745 exp.PartitionedByProperty, 2746 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2747 ) 2748 2749 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2750 if self._match_text_seq("AND", "STATISTICS"): 2751 statistics = True 2752 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2753 statistics = False 2754 else: 2755 statistics = None 2756 2757 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2758 2759 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2760 if self._match_text_seq("SQL"): 2761 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2762 return None 2763 2764 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2765 if self._match_text_seq("SQL", "DATA"): 2766 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2767 return None 2768 2769 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2770 if self._match_text_seq("PRIMARY", "INDEX"): 2771 return exp.NoPrimaryIndexProperty() 2772 if self._match_text_seq("SQL"): 2773 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2774 return None 2775 2776 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2777 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2778 return exp.OnCommitProperty() 2779 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2780 return exp.OnCommitProperty(delete=True) 2781 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2782 2783 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2784 if self._match_text_seq("SQL", "DATA"): 2785 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2786 return None 2787 2788 def _parse_distkey(self) -> exp.DistKeyProperty: 2789 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2790 2791 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2792 table = self._parse_table(schema=True) 2793 2794 options = [] 2795 while self._match_texts(("INCLUDING", "EXCLUDING")): 2796 this = self._prev.text.upper() 2797 2798 id_var = self._parse_id_var() 2799 if not id_var: 2800 return None 2801 2802 options.append( 2803 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2804 ) 2805 2806 return self.expression(exp.LikeProperty, this=table, expressions=options) 2807 2808 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2809 return self.expression( 2810 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2811 ) 2812 2813 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2814 self._match(TokenType.EQ) 2815 return self.expression( 2816 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2817 ) 2818 2819 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2820 self._match_text_seq("WITH", "CONNECTION") 2821 return self.expression( 2822 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2823 ) 2824 2825 def _parse_returns(self) -> exp.ReturnsProperty: 2826 value: t.Optional[exp.Expression] 2827 null = None 2828 is_table = self._match(TokenType.TABLE) 2829 2830 if is_table: 2831 if self._match(TokenType.LT): 2832 value = self.expression( 2833 exp.Schema, 2834 this="TABLE", 2835 expressions=self._parse_csv(self._parse_struct_types), 2836 ) 2837 if not self._match(TokenType.GT): 2838 self.raise_error("Expecting >") 2839 else: 2840 value = self._parse_schema(exp.var("TABLE")) 2841 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2842 null = True 2843 value = None 2844 else: 2845 value = self._parse_types() 2846 2847 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2848 2849 def _parse_describe(self) -> exp.Describe: 2850 kind = self._match_set(self.CREATABLES) and self._prev.text 2851 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2852 if self._match(TokenType.DOT): 2853 style = None 2854 self._retreat(self._index - 2) 2855 2856 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2857 2858 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2859 this = self._parse_statement() 2860 else: 2861 this = self._parse_table(schema=True) 2862 2863 properties = self._parse_properties() 2864 expressions = properties.expressions if properties else None 2865 partition = self._parse_partition() 2866 return self.expression( 2867 exp.Describe, 2868 this=this, 2869 style=style, 2870 kind=kind, 2871 expressions=expressions, 2872 partition=partition, 2873 format=format, 2874 ) 2875 2876 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2877 kind = self._prev.text.upper() 2878 expressions = [] 2879 2880 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2881 if self._match(TokenType.WHEN): 2882 expression = self._parse_disjunction() 2883 self._match(TokenType.THEN) 2884 else: 2885 expression = None 2886 2887 else_ = self._match(TokenType.ELSE) 2888 2889 if not self._match(TokenType.INTO): 2890 return None 2891 2892 return self.expression( 2893 exp.ConditionalInsert, 2894 this=self.expression( 2895 exp.Insert, 2896 this=self._parse_table(schema=True), 2897 expression=self._parse_derived_table_values(), 2898 ), 2899 expression=expression, 2900 else_=else_, 2901 ) 2902 2903 expression = parse_conditional_insert() 2904 while expression is not None: 2905 expressions.append(expression) 2906 expression = parse_conditional_insert() 2907 2908 return self.expression( 2909 exp.MultitableInserts, 2910 kind=kind, 2911 comments=comments, 2912 expressions=expressions, 2913 source=self._parse_table(), 2914 ) 2915 2916 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2917 comments = [] 2918 hint = self._parse_hint() 2919 overwrite = self._match(TokenType.OVERWRITE) 2920 ignore = self._match(TokenType.IGNORE) 2921 local = self._match_text_seq("LOCAL") 2922 alternative = None 2923 is_function = None 2924 2925 if self._match_text_seq("DIRECTORY"): 2926 this: t.Optional[exp.Expression] = self.expression( 2927 exp.Directory, 2928 this=self._parse_var_or_string(), 2929 local=local, 2930 row_format=self._parse_row_format(match_row=True), 2931 ) 2932 else: 2933 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2934 comments += ensure_list(self._prev_comments) 2935 return self._parse_multitable_inserts(comments) 2936 2937 if self._match(TokenType.OR): 2938 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2939 2940 self._match(TokenType.INTO) 2941 comments += ensure_list(self._prev_comments) 2942 self._match(TokenType.TABLE) 2943 is_function = self._match(TokenType.FUNCTION) 2944 2945 this = ( 2946 self._parse_table(schema=True, parse_partition=True) 2947 if not is_function 2948 else self._parse_function() 2949 ) 2950 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2951 this.set("alias", self._parse_table_alias()) 2952 2953 returning = self._parse_returning() 2954 2955 return self.expression( 2956 exp.Insert, 2957 comments=comments, 2958 hint=hint, 2959 is_function=is_function, 2960 this=this, 2961 stored=self._match_text_seq("STORED") and self._parse_stored(), 2962 by_name=self._match_text_seq("BY", "NAME"), 2963 exists=self._parse_exists(), 2964 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2965 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2966 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2967 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2968 conflict=self._parse_on_conflict(), 2969 returning=returning or self._parse_returning(), 2970 overwrite=overwrite, 2971 alternative=alternative, 2972 ignore=ignore, 2973 source=self._match(TokenType.TABLE) and self._parse_table(), 2974 ) 2975 2976 def _parse_kill(self) -> exp.Kill: 2977 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2978 2979 return self.expression( 2980 exp.Kill, 2981 this=self._parse_primary(), 2982 kind=kind, 2983 ) 2984 2985 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2986 conflict = self._match_text_seq("ON", "CONFLICT") 2987 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2988 2989 if not conflict and not duplicate: 2990 return None 2991 2992 conflict_keys = None 2993 constraint = None 2994 2995 if conflict: 2996 if self._match_text_seq("ON", "CONSTRAINT"): 2997 constraint = self._parse_id_var() 2998 elif self._match(TokenType.L_PAREN): 2999 conflict_keys = self._parse_csv(self._parse_id_var) 3000 self._match_r_paren() 3001 3002 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3003 if self._prev.token_type == TokenType.UPDATE: 3004 self._match(TokenType.SET) 3005 expressions = self._parse_csv(self._parse_equality) 3006 else: 3007 expressions = None 3008 3009 return self.expression( 3010 exp.OnConflict, 3011 duplicate=duplicate, 3012 expressions=expressions, 3013 action=action, 3014 conflict_keys=conflict_keys, 3015 constraint=constraint, 3016 where=self._parse_where(), 3017 ) 3018 3019 def _parse_returning(self) -> t.Optional[exp.Returning]: 3020 if not self._match(TokenType.RETURNING): 3021 return None 3022 return self.expression( 3023 exp.Returning, 3024 expressions=self._parse_csv(self._parse_expression), 3025 into=self._match(TokenType.INTO) and self._parse_table_part(), 3026 ) 3027 3028 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3029 if not self._match(TokenType.FORMAT): 3030 return None 3031 return self._parse_row_format() 3032 3033 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3034 index = self._index 3035 with_ = with_ or self._match_text_seq("WITH") 3036 3037 if not self._match(TokenType.SERDE_PROPERTIES): 3038 self._retreat(index) 3039 return None 3040 return self.expression( 3041 exp.SerdeProperties, 3042 **{ # type: ignore 3043 "expressions": self._parse_wrapped_properties(), 3044 "with": with_, 3045 }, 3046 ) 3047 3048 def _parse_row_format( 3049 self, match_row: bool = False 3050 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3051 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3052 return None 3053 3054 if self._match_text_seq("SERDE"): 3055 this = self._parse_string() 3056 3057 serde_properties = self._parse_serde_properties() 3058 3059 return self.expression( 3060 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3061 ) 3062 3063 self._match_text_seq("DELIMITED") 3064 3065 kwargs = {} 3066 3067 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3068 kwargs["fields"] = self._parse_string() 3069 if self._match_text_seq("ESCAPED", "BY"): 3070 kwargs["escaped"] = self._parse_string() 3071 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3072 kwargs["collection_items"] = self._parse_string() 3073 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3074 kwargs["map_keys"] = self._parse_string() 3075 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3076 kwargs["lines"] = self._parse_string() 3077 if self._match_text_seq("NULL", "DEFINED", "AS"): 3078 kwargs["null"] = self._parse_string() 3079 3080 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3081 3082 def _parse_load(self) -> exp.LoadData | exp.Command: 3083 if self._match_text_seq("DATA"): 3084 local = self._match_text_seq("LOCAL") 3085 self._match_text_seq("INPATH") 3086 inpath = self._parse_string() 3087 overwrite = self._match(TokenType.OVERWRITE) 3088 self._match_pair(TokenType.INTO, TokenType.TABLE) 3089 3090 return self.expression( 3091 exp.LoadData, 3092 this=self._parse_table(schema=True), 3093 local=local, 3094 overwrite=overwrite, 3095 inpath=inpath, 3096 partition=self._parse_partition(), 3097 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3098 serde=self._match_text_seq("SERDE") and self._parse_string(), 3099 ) 3100 return self._parse_as_command(self._prev) 3101 3102 def _parse_delete(self) -> exp.Delete: 3103 # This handles MySQL's "Multiple-Table Syntax" 3104 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3105 tables = None 3106 if not self._match(TokenType.FROM, advance=False): 3107 tables = self._parse_csv(self._parse_table) or None 3108 3109 returning = self._parse_returning() 3110 3111 return self.expression( 3112 exp.Delete, 3113 tables=tables, 3114 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3115 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3116 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3117 where=self._parse_where(), 3118 returning=returning or self._parse_returning(), 3119 limit=self._parse_limit(), 3120 ) 3121 3122 def _parse_update(self) -> exp.Update: 3123 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3124 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3125 returning = self._parse_returning() 3126 return self.expression( 3127 exp.Update, 3128 **{ # type: ignore 3129 "this": this, 3130 "expressions": expressions, 3131 "from": self._parse_from(joins=True), 3132 "where": self._parse_where(), 3133 "returning": returning or self._parse_returning(), 3134 "order": self._parse_order(), 3135 "limit": self._parse_limit(), 3136 }, 3137 ) 3138 3139 def _parse_use(self) -> exp.Use: 3140 return self.expression( 3141 exp.Use, 3142 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3143 this=self._parse_table(schema=False), 3144 ) 3145 3146 def _parse_uncache(self) -> exp.Uncache: 3147 if not self._match(TokenType.TABLE): 3148 self.raise_error("Expecting TABLE after UNCACHE") 3149 3150 return self.expression( 3151 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3152 ) 3153 3154 def _parse_cache(self) -> exp.Cache: 3155 lazy = self._match_text_seq("LAZY") 3156 self._match(TokenType.TABLE) 3157 table = self._parse_table(schema=True) 3158 3159 options = [] 3160 if self._match_text_seq("OPTIONS"): 3161 self._match_l_paren() 3162 k = self._parse_string() 3163 self._match(TokenType.EQ) 3164 v = self._parse_string() 3165 options = [k, v] 3166 self._match_r_paren() 3167 3168 self._match(TokenType.ALIAS) 3169 return self.expression( 3170 exp.Cache, 3171 this=table, 3172 lazy=lazy, 3173 options=options, 3174 expression=self._parse_select(nested=True), 3175 ) 3176 3177 def _parse_partition(self) -> t.Optional[exp.Partition]: 3178 if not self._match_texts(self.PARTITION_KEYWORDS): 3179 return None 3180 3181 return self.expression( 3182 exp.Partition, 3183 subpartition=self._prev.text.upper() == "SUBPARTITION", 3184 expressions=self._parse_wrapped_csv(self._parse_assignment), 3185 ) 3186 3187 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3188 def _parse_value_expression() -> t.Optional[exp.Expression]: 3189 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3190 return exp.var(self._prev.text.upper()) 3191 return self._parse_expression() 3192 3193 if self._match(TokenType.L_PAREN): 3194 expressions = self._parse_csv(_parse_value_expression) 3195 self._match_r_paren() 3196 return self.expression(exp.Tuple, expressions=expressions) 3197 3198 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3199 expression = self._parse_expression() 3200 if expression: 3201 return self.expression(exp.Tuple, expressions=[expression]) 3202 return None 3203 3204 def _parse_projections(self) -> t.List[exp.Expression]: 3205 return self._parse_expressions() 3206 3207 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3208 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3209 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3210 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3211 ) 3212 elif self._match(TokenType.FROM): 3213 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3214 # Support parentheses for duckdb FROM-first syntax 3215 select = self._parse_select(from_=from_) 3216 if select: 3217 if not select.args.get("from"): 3218 select.set("from", from_) 3219 this = select 3220 else: 3221 this = exp.select("*").from_(t.cast(exp.From, from_)) 3222 else: 3223 this = ( 3224 self._parse_table(consume_pipe=True) 3225 if table 3226 else self._parse_select(nested=True, parse_set_operation=False) 3227 ) 3228 3229 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3230 # in case a modifier (e.g. join) is following 3231 if table and isinstance(this, exp.Values) and this.alias: 3232 alias = this.args["alias"].pop() 3233 this = exp.Table(this=this, alias=alias) 3234 3235 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3236 3237 return this 3238 3239 def _parse_select( 3240 self, 3241 nested: bool = False, 3242 table: bool = False, 3243 parse_subquery_alias: bool = True, 3244 parse_set_operation: bool = True, 3245 consume_pipe: bool = True, 3246 from_: t.Optional[exp.From] = None, 3247 ) -> t.Optional[exp.Expression]: 3248 query = self._parse_select_query( 3249 nested=nested, 3250 table=table, 3251 parse_subquery_alias=parse_subquery_alias, 3252 parse_set_operation=parse_set_operation, 3253 ) 3254 3255 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3256 if not query and from_: 3257 query = exp.select("*").from_(from_) 3258 if isinstance(query, exp.Query): 3259 query = self._parse_pipe_syntax_query(query) 3260 query = query.subquery(copy=False) if query and table else query 3261 3262 return query 3263 3264 def _parse_select_query( 3265 self, 3266 nested: bool = False, 3267 table: bool = False, 3268 parse_subquery_alias: bool = True, 3269 parse_set_operation: bool = True, 3270 ) -> t.Optional[exp.Expression]: 3271 cte = self._parse_with() 3272 3273 if cte: 3274 this = self._parse_statement() 3275 3276 if not this: 3277 self.raise_error("Failed to parse any statement following CTE") 3278 return cte 3279 3280 if "with" in this.arg_types: 3281 this.set("with", cte) 3282 else: 3283 self.raise_error(f"{this.key} does not support CTE") 3284 this = cte 3285 3286 return this 3287 3288 # duckdb supports leading with FROM x 3289 from_ = ( 3290 self._parse_from(consume_pipe=True) 3291 if self._match(TokenType.FROM, advance=False) 3292 else None 3293 ) 3294 3295 if self._match(TokenType.SELECT): 3296 comments = self._prev_comments 3297 3298 hint = self._parse_hint() 3299 3300 if self._next and not self._next.token_type == TokenType.DOT: 3301 all_ = self._match(TokenType.ALL) 3302 distinct = self._match_set(self.DISTINCT_TOKENS) 3303 else: 3304 all_, distinct = None, None 3305 3306 kind = ( 3307 self._match(TokenType.ALIAS) 3308 and self._match_texts(("STRUCT", "VALUE")) 3309 and self._prev.text.upper() 3310 ) 3311 3312 if distinct: 3313 distinct = self.expression( 3314 exp.Distinct, 3315 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3316 ) 3317 3318 if all_ and distinct: 3319 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3320 3321 operation_modifiers = [] 3322 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3323 operation_modifiers.append(exp.var(self._prev.text.upper())) 3324 3325 limit = self._parse_limit(top=True) 3326 projections = self._parse_projections() 3327 3328 this = self.expression( 3329 exp.Select, 3330 kind=kind, 3331 hint=hint, 3332 distinct=distinct, 3333 expressions=projections, 3334 limit=limit, 3335 operation_modifiers=operation_modifiers or None, 3336 ) 3337 this.comments = comments 3338 3339 into = self._parse_into() 3340 if into: 3341 this.set("into", into) 3342 3343 if not from_: 3344 from_ = self._parse_from() 3345 3346 if from_: 3347 this.set("from", from_) 3348 3349 this = self._parse_query_modifiers(this) 3350 elif (table or nested) and self._match(TokenType.L_PAREN): 3351 this = self._parse_wrapped_select(table=table) 3352 3353 # We return early here so that the UNION isn't attached to the subquery by the 3354 # following call to _parse_set_operations, but instead becomes the parent node 3355 self._match_r_paren() 3356 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3357 elif self._match(TokenType.VALUES, advance=False): 3358 this = self._parse_derived_table_values() 3359 elif from_: 3360 this = exp.select("*").from_(from_.this, copy=False) 3361 elif self._match(TokenType.SUMMARIZE): 3362 table = self._match(TokenType.TABLE) 3363 this = self._parse_select() or self._parse_string() or self._parse_table() 3364 return self.expression(exp.Summarize, this=this, table=table) 3365 elif self._match(TokenType.DESCRIBE): 3366 this = self._parse_describe() 3367 elif self._match_text_seq("STREAM"): 3368 this = self._parse_function() 3369 if this: 3370 this = self.expression(exp.Stream, this=this) 3371 else: 3372 self._retreat(self._index - 1) 3373 else: 3374 this = None 3375 3376 return self._parse_set_operations(this) if parse_set_operation else this 3377 3378 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3379 self._match_text_seq("SEARCH") 3380 3381 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3382 3383 if not kind: 3384 return None 3385 3386 self._match_text_seq("FIRST", "BY") 3387 3388 return self.expression( 3389 exp.RecursiveWithSearch, 3390 kind=kind, 3391 this=self._parse_id_var(), 3392 expression=self._match_text_seq("SET") and self._parse_id_var(), 3393 using=self._match_text_seq("USING") and self._parse_id_var(), 3394 ) 3395 3396 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3397 if not skip_with_token and not self._match(TokenType.WITH): 3398 return None 3399 3400 comments = self._prev_comments 3401 recursive = self._match(TokenType.RECURSIVE) 3402 3403 last_comments = None 3404 expressions = [] 3405 while True: 3406 cte = self._parse_cte() 3407 if isinstance(cte, exp.CTE): 3408 expressions.append(cte) 3409 if last_comments: 3410 cte.add_comments(last_comments) 3411 3412 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3413 break 3414 else: 3415 self._match(TokenType.WITH) 3416 3417 last_comments = self._prev_comments 3418 3419 return self.expression( 3420 exp.With, 3421 comments=comments, 3422 expressions=expressions, 3423 recursive=recursive, 3424 search=self._parse_recursive_with_search(), 3425 ) 3426 3427 def _parse_cte(self) -> t.Optional[exp.CTE]: 3428 index = self._index 3429 3430 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3431 if not alias or not alias.this: 3432 self.raise_error("Expected CTE to have alias") 3433 3434 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3435 self._retreat(index) 3436 return None 3437 3438 comments = self._prev_comments 3439 3440 if self._match_text_seq("NOT", "MATERIALIZED"): 3441 materialized = False 3442 elif self._match_text_seq("MATERIALIZED"): 3443 materialized = True 3444 else: 3445 materialized = None 3446 3447 cte = self.expression( 3448 exp.CTE, 3449 this=self._parse_wrapped(self._parse_statement), 3450 alias=alias, 3451 materialized=materialized, 3452 comments=comments, 3453 ) 3454 3455 values = cte.this 3456 if isinstance(values, exp.Values): 3457 if values.alias: 3458 cte.set("this", exp.select("*").from_(values)) 3459 else: 3460 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3461 3462 return cte 3463 3464 def _parse_table_alias( 3465 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3466 ) -> t.Optional[exp.TableAlias]: 3467 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3468 # so this section tries to parse the clause version and if it fails, it treats the token 3469 # as an identifier (alias) 3470 if self._can_parse_limit_or_offset(): 3471 return None 3472 3473 any_token = self._match(TokenType.ALIAS) 3474 alias = ( 3475 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3476 or self._parse_string_as_identifier() 3477 ) 3478 3479 index = self._index 3480 if self._match(TokenType.L_PAREN): 3481 columns = self._parse_csv(self._parse_function_parameter) 3482 self._match_r_paren() if columns else self._retreat(index) 3483 else: 3484 columns = None 3485 3486 if not alias and not columns: 3487 return None 3488 3489 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3490 3491 # We bubble up comments from the Identifier to the TableAlias 3492 if isinstance(alias, exp.Identifier): 3493 table_alias.add_comments(alias.pop_comments()) 3494 3495 return table_alias 3496 3497 def _parse_subquery( 3498 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3499 ) -> t.Optional[exp.Subquery]: 3500 if not this: 3501 return None 3502 3503 return self.expression( 3504 exp.Subquery, 3505 this=this, 3506 pivots=self._parse_pivots(), 3507 alias=self._parse_table_alias() if parse_alias else None, 3508 sample=self._parse_table_sample(), 3509 ) 3510 3511 def _implicit_unnests_to_explicit(self, this: E) -> E: 3512 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3513 3514 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3515 for i, join in enumerate(this.args.get("joins") or []): 3516 table = join.this 3517 normalized_table = table.copy() 3518 normalized_table.meta["maybe_column"] = True 3519 normalized_table = _norm(normalized_table, dialect=self.dialect) 3520 3521 if isinstance(table, exp.Table) and not join.args.get("on"): 3522 if normalized_table.parts[0].name in refs: 3523 table_as_column = table.to_column() 3524 unnest = exp.Unnest(expressions=[table_as_column]) 3525 3526 # Table.to_column creates a parent Alias node that we want to convert to 3527 # a TableAlias and attach to the Unnest, so it matches the parser's output 3528 if isinstance(table.args.get("alias"), exp.TableAlias): 3529 table_as_column.replace(table_as_column.this) 3530 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3531 3532 table.replace(unnest) 3533 3534 refs.add(normalized_table.alias_or_name) 3535 3536 return this 3537 3538 def _parse_query_modifiers( 3539 self, this: t.Optional[exp.Expression] 3540 ) -> t.Optional[exp.Expression]: 3541 if isinstance(this, self.MODIFIABLES): 3542 for join in self._parse_joins(): 3543 this.append("joins", join) 3544 for lateral in iter(self._parse_lateral, None): 3545 this.append("laterals", lateral) 3546 3547 while True: 3548 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3549 modifier_token = self._curr 3550 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3551 key, expression = parser(self) 3552 3553 if expression: 3554 if this.args.get(key): 3555 self.raise_error( 3556 f"Found multiple '{modifier_token.text.upper()}' clauses", 3557 token=modifier_token, 3558 ) 3559 3560 this.set(key, expression) 3561 if key == "limit": 3562 offset = expression.args.pop("offset", None) 3563 3564 if offset: 3565 offset = exp.Offset(expression=offset) 3566 this.set("offset", offset) 3567 3568 limit_by_expressions = expression.expressions 3569 expression.set("expressions", None) 3570 offset.set("expressions", limit_by_expressions) 3571 continue 3572 break 3573 3574 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3575 this = self._implicit_unnests_to_explicit(this) 3576 3577 return this 3578 3579 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3580 start = self._curr 3581 while self._curr: 3582 self._advance() 3583 3584 end = self._tokens[self._index - 1] 3585 return exp.Hint(expressions=[self._find_sql(start, end)]) 3586 3587 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3588 return self._parse_function_call() 3589 3590 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3591 start_index = self._index 3592 should_fallback_to_string = False 3593 3594 hints = [] 3595 try: 3596 for hint in iter( 3597 lambda: self._parse_csv( 3598 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3599 ), 3600 [], 3601 ): 3602 hints.extend(hint) 3603 except ParseError: 3604 should_fallback_to_string = True 3605 3606 if should_fallback_to_string or self._curr: 3607 self._retreat(start_index) 3608 return self._parse_hint_fallback_to_string() 3609 3610 return self.expression(exp.Hint, expressions=hints) 3611 3612 def _parse_hint(self) -> t.Optional[exp.Hint]: 3613 if self._match(TokenType.HINT) and self._prev_comments: 3614 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3615 3616 return None 3617 3618 def _parse_into(self) -> t.Optional[exp.Into]: 3619 if not self._match(TokenType.INTO): 3620 return None 3621 3622 temp = self._match(TokenType.TEMPORARY) 3623 unlogged = self._match_text_seq("UNLOGGED") 3624 self._match(TokenType.TABLE) 3625 3626 return self.expression( 3627 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3628 ) 3629 3630 def _parse_from( 3631 self, 3632 joins: bool = False, 3633 skip_from_token: bool = False, 3634 consume_pipe: bool = False, 3635 ) -> t.Optional[exp.From]: 3636 if not skip_from_token and not self._match(TokenType.FROM): 3637 return None 3638 3639 return self.expression( 3640 exp.From, 3641 comments=self._prev_comments, 3642 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3643 ) 3644 3645 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3646 return self.expression( 3647 exp.MatchRecognizeMeasure, 3648 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3649 this=self._parse_expression(), 3650 ) 3651 3652 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3653 if not self._match(TokenType.MATCH_RECOGNIZE): 3654 return None 3655 3656 self._match_l_paren() 3657 3658 partition = self._parse_partition_by() 3659 order = self._parse_order() 3660 3661 measures = ( 3662 self._parse_csv(self._parse_match_recognize_measure) 3663 if self._match_text_seq("MEASURES") 3664 else None 3665 ) 3666 3667 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3668 rows = exp.var("ONE ROW PER MATCH") 3669 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3670 text = "ALL ROWS PER MATCH" 3671 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3672 text += " SHOW EMPTY MATCHES" 3673 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3674 text += " OMIT EMPTY MATCHES" 3675 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3676 text += " WITH UNMATCHED ROWS" 3677 rows = exp.var(text) 3678 else: 3679 rows = None 3680 3681 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3682 text = "AFTER MATCH SKIP" 3683 if self._match_text_seq("PAST", "LAST", "ROW"): 3684 text += " PAST LAST ROW" 3685 elif self._match_text_seq("TO", "NEXT", "ROW"): 3686 text += " TO NEXT ROW" 3687 elif self._match_text_seq("TO", "FIRST"): 3688 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3689 elif self._match_text_seq("TO", "LAST"): 3690 text += f" TO LAST {self._advance_any().text}" # type: ignore 3691 after = exp.var(text) 3692 else: 3693 after = None 3694 3695 if self._match_text_seq("PATTERN"): 3696 self._match_l_paren() 3697 3698 if not self._curr: 3699 self.raise_error("Expecting )", self._curr) 3700 3701 paren = 1 3702 start = self._curr 3703 3704 while self._curr and paren > 0: 3705 if self._curr.token_type == TokenType.L_PAREN: 3706 paren += 1 3707 if self._curr.token_type == TokenType.R_PAREN: 3708 paren -= 1 3709 3710 end = self._prev 3711 self._advance() 3712 3713 if paren > 0: 3714 self.raise_error("Expecting )", self._curr) 3715 3716 pattern = exp.var(self._find_sql(start, end)) 3717 else: 3718 pattern = None 3719 3720 define = ( 3721 self._parse_csv(self._parse_name_as_expression) 3722 if self._match_text_seq("DEFINE") 3723 else None 3724 ) 3725 3726 self._match_r_paren() 3727 3728 return self.expression( 3729 exp.MatchRecognize, 3730 partition_by=partition, 3731 order=order, 3732 measures=measures, 3733 rows=rows, 3734 after=after, 3735 pattern=pattern, 3736 define=define, 3737 alias=self._parse_table_alias(), 3738 ) 3739 3740 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3741 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3742 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3743 cross_apply = False 3744 3745 if cross_apply is not None: 3746 this = self._parse_select(table=True) 3747 view = None 3748 outer = None 3749 elif self._match(TokenType.LATERAL): 3750 this = self._parse_select(table=True) 3751 view = self._match(TokenType.VIEW) 3752 outer = self._match(TokenType.OUTER) 3753 else: 3754 return None 3755 3756 if not this: 3757 this = ( 3758 self._parse_unnest() 3759 or self._parse_function() 3760 or self._parse_id_var(any_token=False) 3761 ) 3762 3763 while self._match(TokenType.DOT): 3764 this = exp.Dot( 3765 this=this, 3766 expression=self._parse_function() or self._parse_id_var(any_token=False), 3767 ) 3768 3769 ordinality: t.Optional[bool] = None 3770 3771 if view: 3772 table = self._parse_id_var(any_token=False) 3773 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3774 table_alias: t.Optional[exp.TableAlias] = self.expression( 3775 exp.TableAlias, this=table, columns=columns 3776 ) 3777 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3778 # We move the alias from the lateral's child node to the lateral itself 3779 table_alias = this.args["alias"].pop() 3780 else: 3781 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3782 table_alias = self._parse_table_alias() 3783 3784 return self.expression( 3785 exp.Lateral, 3786 this=this, 3787 view=view, 3788 outer=outer, 3789 alias=table_alias, 3790 cross_apply=cross_apply, 3791 ordinality=ordinality, 3792 ) 3793 3794 def _parse_join_parts( 3795 self, 3796 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3797 return ( 3798 self._match_set(self.JOIN_METHODS) and self._prev, 3799 self._match_set(self.JOIN_SIDES) and self._prev, 3800 self._match_set(self.JOIN_KINDS) and self._prev, 3801 ) 3802 3803 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3804 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3805 this = self._parse_column() 3806 if isinstance(this, exp.Column): 3807 return this.this 3808 return this 3809 3810 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3811 3812 def _parse_join( 3813 self, skip_join_token: bool = False, parse_bracket: bool = False 3814 ) -> t.Optional[exp.Join]: 3815 if self._match(TokenType.COMMA): 3816 table = self._try_parse(self._parse_table) 3817 cross_join = self.expression(exp.Join, this=table) if table else None 3818 3819 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3820 cross_join.set("kind", "CROSS") 3821 3822 return cross_join 3823 3824 index = self._index 3825 method, side, kind = self._parse_join_parts() 3826 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3827 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3828 join_comments = self._prev_comments 3829 3830 if not skip_join_token and not join: 3831 self._retreat(index) 3832 kind = None 3833 method = None 3834 side = None 3835 3836 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3837 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3838 3839 if not skip_join_token and not join and not outer_apply and not cross_apply: 3840 return None 3841 3842 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3843 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3844 kwargs["expressions"] = self._parse_csv( 3845 lambda: self._parse_table(parse_bracket=parse_bracket) 3846 ) 3847 3848 if method: 3849 kwargs["method"] = method.text 3850 if side: 3851 kwargs["side"] = side.text 3852 if kind: 3853 kwargs["kind"] = kind.text 3854 if hint: 3855 kwargs["hint"] = hint 3856 3857 if self._match(TokenType.MATCH_CONDITION): 3858 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3859 3860 if self._match(TokenType.ON): 3861 kwargs["on"] = self._parse_assignment() 3862 elif self._match(TokenType.USING): 3863 kwargs["using"] = self._parse_using_identifiers() 3864 elif ( 3865 not method 3866 and not (outer_apply or cross_apply) 3867 and not isinstance(kwargs["this"], exp.Unnest) 3868 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3869 ): 3870 index = self._index 3871 joins: t.Optional[list] = list(self._parse_joins()) 3872 3873 if joins and self._match(TokenType.ON): 3874 kwargs["on"] = self._parse_assignment() 3875 elif joins and self._match(TokenType.USING): 3876 kwargs["using"] = self._parse_using_identifiers() 3877 else: 3878 joins = None 3879 self._retreat(index) 3880 3881 kwargs["this"].set("joins", joins if joins else None) 3882 3883 kwargs["pivots"] = self._parse_pivots() 3884 3885 comments = [c for token in (method, side, kind) if token for c in token.comments] 3886 comments = (join_comments or []) + comments 3887 3888 if ( 3889 self.ADD_JOIN_ON_TRUE 3890 and not kwargs.get("on") 3891 and not kwargs.get("using") 3892 and not kwargs.get("method") 3893 and kwargs.get("kind") in (None, "INNER", "OUTER") 3894 ): 3895 kwargs["on"] = exp.true() 3896 3897 return self.expression(exp.Join, comments=comments, **kwargs) 3898 3899 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3900 this = self._parse_assignment() 3901 3902 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3903 return this 3904 3905 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3906 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3907 3908 return this 3909 3910 def _parse_index_params(self) -> exp.IndexParameters: 3911 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3912 3913 if self._match(TokenType.L_PAREN, advance=False): 3914 columns = self._parse_wrapped_csv(self._parse_with_operator) 3915 else: 3916 columns = None 3917 3918 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3919 partition_by = self._parse_partition_by() 3920 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3921 tablespace = ( 3922 self._parse_var(any_token=True) 3923 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3924 else None 3925 ) 3926 where = self._parse_where() 3927 3928 on = self._parse_field() if self._match(TokenType.ON) else None 3929 3930 return self.expression( 3931 exp.IndexParameters, 3932 using=using, 3933 columns=columns, 3934 include=include, 3935 partition_by=partition_by, 3936 where=where, 3937 with_storage=with_storage, 3938 tablespace=tablespace, 3939 on=on, 3940 ) 3941 3942 def _parse_index( 3943 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3944 ) -> t.Optional[exp.Index]: 3945 if index or anonymous: 3946 unique = None 3947 primary = None 3948 amp = None 3949 3950 self._match(TokenType.ON) 3951 self._match(TokenType.TABLE) # hive 3952 table = self._parse_table_parts(schema=True) 3953 else: 3954 unique = self._match(TokenType.UNIQUE) 3955 primary = self._match_text_seq("PRIMARY") 3956 amp = self._match_text_seq("AMP") 3957 3958 if not self._match(TokenType.INDEX): 3959 return None 3960 3961 index = self._parse_id_var() 3962 table = None 3963 3964 params = self._parse_index_params() 3965 3966 return self.expression( 3967 exp.Index, 3968 this=index, 3969 table=table, 3970 unique=unique, 3971 primary=primary, 3972 amp=amp, 3973 params=params, 3974 ) 3975 3976 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3977 hints: t.List[exp.Expression] = [] 3978 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3979 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3980 hints.append( 3981 self.expression( 3982 exp.WithTableHint, 3983 expressions=self._parse_csv( 3984 lambda: self._parse_function() or self._parse_var(any_token=True) 3985 ), 3986 ) 3987 ) 3988 self._match_r_paren() 3989 else: 3990 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3991 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3992 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3993 3994 self._match_set((TokenType.INDEX, TokenType.KEY)) 3995 if self._match(TokenType.FOR): 3996 hint.set("target", self._advance_any() and self._prev.text.upper()) 3997 3998 hint.set("expressions", self._parse_wrapped_id_vars()) 3999 hints.append(hint) 4000 4001 return hints or None 4002 4003 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4004 return ( 4005 (not schema and self._parse_function(optional_parens=False)) 4006 or self._parse_id_var(any_token=False) 4007 or self._parse_string_as_identifier() 4008 or self._parse_placeholder() 4009 ) 4010 4011 def _parse_table_parts( 4012 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4013 ) -> exp.Table: 4014 catalog = None 4015 db = None 4016 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4017 4018 while self._match(TokenType.DOT): 4019 if catalog: 4020 # This allows nesting the table in arbitrarily many dot expressions if needed 4021 table = self.expression( 4022 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4023 ) 4024 else: 4025 catalog = db 4026 db = table 4027 # "" used for tsql FROM a..b case 4028 table = self._parse_table_part(schema=schema) or "" 4029 4030 if ( 4031 wildcard 4032 and self._is_connected() 4033 and (isinstance(table, exp.Identifier) or not table) 4034 and self._match(TokenType.STAR) 4035 ): 4036 if isinstance(table, exp.Identifier): 4037 table.args["this"] += "*" 4038 else: 4039 table = exp.Identifier(this="*") 4040 4041 # We bubble up comments from the Identifier to the Table 4042 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4043 4044 if is_db_reference: 4045 catalog = db 4046 db = table 4047 table = None 4048 4049 if not table and not is_db_reference: 4050 self.raise_error(f"Expected table name but got {self._curr}") 4051 if not db and is_db_reference: 4052 self.raise_error(f"Expected database name but got {self._curr}") 4053 4054 table = self.expression( 4055 exp.Table, 4056 comments=comments, 4057 this=table, 4058 db=db, 4059 catalog=catalog, 4060 ) 4061 4062 changes = self._parse_changes() 4063 if changes: 4064 table.set("changes", changes) 4065 4066 at_before = self._parse_historical_data() 4067 if at_before: 4068 table.set("when", at_before) 4069 4070 pivots = self._parse_pivots() 4071 if pivots: 4072 table.set("pivots", pivots) 4073 4074 return table 4075 4076 def _parse_table( 4077 self, 4078 schema: bool = False, 4079 joins: bool = False, 4080 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4081 parse_bracket: bool = False, 4082 is_db_reference: bool = False, 4083 parse_partition: bool = False, 4084 consume_pipe: bool = False, 4085 ) -> t.Optional[exp.Expression]: 4086 lateral = self._parse_lateral() 4087 if lateral: 4088 return lateral 4089 4090 unnest = self._parse_unnest() 4091 if unnest: 4092 return unnest 4093 4094 values = self._parse_derived_table_values() 4095 if values: 4096 return values 4097 4098 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4099 if subquery: 4100 if not subquery.args.get("pivots"): 4101 subquery.set("pivots", self._parse_pivots()) 4102 return subquery 4103 4104 bracket = parse_bracket and self._parse_bracket(None) 4105 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4106 4107 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4108 self._parse_table 4109 ) 4110 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4111 4112 only = self._match(TokenType.ONLY) 4113 4114 this = t.cast( 4115 exp.Expression, 4116 bracket 4117 or rows_from 4118 or self._parse_bracket( 4119 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4120 ), 4121 ) 4122 4123 if only: 4124 this.set("only", only) 4125 4126 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4127 self._match_text_seq("*") 4128 4129 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4130 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4131 this.set("partition", self._parse_partition()) 4132 4133 if schema: 4134 return self._parse_schema(this=this) 4135 4136 version = self._parse_version() 4137 4138 if version: 4139 this.set("version", version) 4140 4141 if self.dialect.ALIAS_POST_TABLESAMPLE: 4142 this.set("sample", self._parse_table_sample()) 4143 4144 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4145 if alias: 4146 this.set("alias", alias) 4147 4148 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4149 return self.expression( 4150 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4151 ) 4152 4153 this.set("hints", self._parse_table_hints()) 4154 4155 if not this.args.get("pivots"): 4156 this.set("pivots", self._parse_pivots()) 4157 4158 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4159 this.set("sample", self._parse_table_sample()) 4160 4161 if joins: 4162 for join in self._parse_joins(): 4163 this.append("joins", join) 4164 4165 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4166 this.set("ordinality", True) 4167 this.set("alias", self._parse_table_alias()) 4168 4169 return this 4170 4171 def _parse_version(self) -> t.Optional[exp.Version]: 4172 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4173 this = "TIMESTAMP" 4174 elif self._match(TokenType.VERSION_SNAPSHOT): 4175 this = "VERSION" 4176 else: 4177 return None 4178 4179 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4180 kind = self._prev.text.upper() 4181 start = self._parse_bitwise() 4182 self._match_texts(("TO", "AND")) 4183 end = self._parse_bitwise() 4184 expression: t.Optional[exp.Expression] = self.expression( 4185 exp.Tuple, expressions=[start, end] 4186 ) 4187 elif self._match_text_seq("CONTAINED", "IN"): 4188 kind = "CONTAINED IN" 4189 expression = self.expression( 4190 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4191 ) 4192 elif self._match(TokenType.ALL): 4193 kind = "ALL" 4194 expression = None 4195 else: 4196 self._match_text_seq("AS", "OF") 4197 kind = "AS OF" 4198 expression = self._parse_type() 4199 4200 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4201 4202 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4203 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4204 index = self._index 4205 historical_data = None 4206 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4207 this = self._prev.text.upper() 4208 kind = ( 4209 self._match(TokenType.L_PAREN) 4210 and self._match_texts(self.HISTORICAL_DATA_KIND) 4211 and self._prev.text.upper() 4212 ) 4213 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4214 4215 if expression: 4216 self._match_r_paren() 4217 historical_data = self.expression( 4218 exp.HistoricalData, this=this, kind=kind, expression=expression 4219 ) 4220 else: 4221 self._retreat(index) 4222 4223 return historical_data 4224 4225 def _parse_changes(self) -> t.Optional[exp.Changes]: 4226 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4227 return None 4228 4229 information = self._parse_var(any_token=True) 4230 self._match_r_paren() 4231 4232 return self.expression( 4233 exp.Changes, 4234 information=information, 4235 at_before=self._parse_historical_data(), 4236 end=self._parse_historical_data(), 4237 ) 4238 4239 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4240 if not self._match(TokenType.UNNEST): 4241 return None 4242 4243 expressions = self._parse_wrapped_csv(self._parse_equality) 4244 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4245 4246 alias = self._parse_table_alias() if with_alias else None 4247 4248 if alias: 4249 if self.dialect.UNNEST_COLUMN_ONLY: 4250 if alias.args.get("columns"): 4251 self.raise_error("Unexpected extra column alias in unnest.") 4252 4253 alias.set("columns", [alias.this]) 4254 alias.set("this", None) 4255 4256 columns = alias.args.get("columns") or [] 4257 if offset and len(expressions) < len(columns): 4258 offset = columns.pop() 4259 4260 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4261 self._match(TokenType.ALIAS) 4262 offset = self._parse_id_var( 4263 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4264 ) or exp.to_identifier("offset") 4265 4266 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4267 4268 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4269 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4270 if not is_derived and not ( 4271 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4272 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4273 ): 4274 return None 4275 4276 expressions = self._parse_csv(self._parse_value) 4277 alias = self._parse_table_alias() 4278 4279 if is_derived: 4280 self._match_r_paren() 4281 4282 return self.expression( 4283 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4284 ) 4285 4286 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4287 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4288 as_modifier and self._match_text_seq("USING", "SAMPLE") 4289 ): 4290 return None 4291 4292 bucket_numerator = None 4293 bucket_denominator = None 4294 bucket_field = None 4295 percent = None 4296 size = None 4297 seed = None 4298 4299 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4300 matched_l_paren = self._match(TokenType.L_PAREN) 4301 4302 if self.TABLESAMPLE_CSV: 4303 num = None 4304 expressions = self._parse_csv(self._parse_primary) 4305 else: 4306 expressions = None 4307 num = ( 4308 self._parse_factor() 4309 if self._match(TokenType.NUMBER, advance=False) 4310 else self._parse_primary() or self._parse_placeholder() 4311 ) 4312 4313 if self._match_text_seq("BUCKET"): 4314 bucket_numerator = self._parse_number() 4315 self._match_text_seq("OUT", "OF") 4316 bucket_denominator = bucket_denominator = self._parse_number() 4317 self._match(TokenType.ON) 4318 bucket_field = self._parse_field() 4319 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4320 percent = num 4321 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4322 size = num 4323 else: 4324 percent = num 4325 4326 if matched_l_paren: 4327 self._match_r_paren() 4328 4329 if self._match(TokenType.L_PAREN): 4330 method = self._parse_var(upper=True) 4331 seed = self._match(TokenType.COMMA) and self._parse_number() 4332 self._match_r_paren() 4333 elif self._match_texts(("SEED", "REPEATABLE")): 4334 seed = self._parse_wrapped(self._parse_number) 4335 4336 if not method and self.DEFAULT_SAMPLING_METHOD: 4337 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4338 4339 return self.expression( 4340 exp.TableSample, 4341 expressions=expressions, 4342 method=method, 4343 bucket_numerator=bucket_numerator, 4344 bucket_denominator=bucket_denominator, 4345 bucket_field=bucket_field, 4346 percent=percent, 4347 size=size, 4348 seed=seed, 4349 ) 4350 4351 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4352 return list(iter(self._parse_pivot, None)) or None 4353 4354 def _parse_joins(self) -> t.Iterator[exp.Join]: 4355 return iter(self._parse_join, None) 4356 4357 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4358 if not self._match(TokenType.INTO): 4359 return None 4360 4361 return self.expression( 4362 exp.UnpivotColumns, 4363 this=self._match_text_seq("NAME") and self._parse_column(), 4364 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4365 ) 4366 4367 # https://duckdb.org/docs/sql/statements/pivot 4368 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4369 def _parse_on() -> t.Optional[exp.Expression]: 4370 this = self._parse_bitwise() 4371 4372 if self._match(TokenType.IN): 4373 # PIVOT ... ON col IN (row_val1, row_val2) 4374 return self._parse_in(this) 4375 if self._match(TokenType.ALIAS, advance=False): 4376 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4377 return self._parse_alias(this) 4378 4379 return this 4380 4381 this = self._parse_table() 4382 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4383 into = self._parse_unpivot_columns() 4384 using = self._match(TokenType.USING) and self._parse_csv( 4385 lambda: self._parse_alias(self._parse_function()) 4386 ) 4387 group = self._parse_group() 4388 4389 return self.expression( 4390 exp.Pivot, 4391 this=this, 4392 expressions=expressions, 4393 using=using, 4394 group=group, 4395 unpivot=is_unpivot, 4396 into=into, 4397 ) 4398 4399 def _parse_pivot_in(self) -> exp.In: 4400 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4401 this = self._parse_select_or_expression() 4402 4403 self._match(TokenType.ALIAS) 4404 alias = self._parse_bitwise() 4405 if alias: 4406 if isinstance(alias, exp.Column) and not alias.db: 4407 alias = alias.this 4408 return self.expression(exp.PivotAlias, this=this, alias=alias) 4409 4410 return this 4411 4412 value = self._parse_column() 4413 4414 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4415 self.raise_error("Expecting IN (") 4416 4417 if self._match(TokenType.ANY): 4418 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4419 else: 4420 exprs = self._parse_csv(_parse_aliased_expression) 4421 4422 self._match_r_paren() 4423 return self.expression(exp.In, this=value, expressions=exprs) 4424 4425 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4426 func = self._parse_function() 4427 if not func: 4428 if self._prev and self._prev.token_type == TokenType.COMMA: 4429 return None 4430 self.raise_error("Expecting an aggregation function in PIVOT") 4431 4432 return self._parse_alias(func) 4433 4434 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4435 index = self._index 4436 include_nulls = None 4437 4438 if self._match(TokenType.PIVOT): 4439 unpivot = False 4440 elif self._match(TokenType.UNPIVOT): 4441 unpivot = True 4442 4443 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4444 if self._match_text_seq("INCLUDE", "NULLS"): 4445 include_nulls = True 4446 elif self._match_text_seq("EXCLUDE", "NULLS"): 4447 include_nulls = False 4448 else: 4449 return None 4450 4451 expressions = [] 4452 4453 if not self._match(TokenType.L_PAREN): 4454 self._retreat(index) 4455 return None 4456 4457 if unpivot: 4458 expressions = self._parse_csv(self._parse_column) 4459 else: 4460 expressions = self._parse_csv(self._parse_pivot_aggregation) 4461 4462 if not expressions: 4463 self.raise_error("Failed to parse PIVOT's aggregation list") 4464 4465 if not self._match(TokenType.FOR): 4466 self.raise_error("Expecting FOR") 4467 4468 fields = [] 4469 while True: 4470 field = self._try_parse(self._parse_pivot_in) 4471 if not field: 4472 break 4473 fields.append(field) 4474 4475 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4476 self._parse_bitwise 4477 ) 4478 4479 group = self._parse_group() 4480 4481 self._match_r_paren() 4482 4483 pivot = self.expression( 4484 exp.Pivot, 4485 expressions=expressions, 4486 fields=fields, 4487 unpivot=unpivot, 4488 include_nulls=include_nulls, 4489 default_on_null=default_on_null, 4490 group=group, 4491 ) 4492 4493 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4494 pivot.set("alias", self._parse_table_alias()) 4495 4496 if not unpivot: 4497 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4498 4499 columns: t.List[exp.Expression] = [] 4500 all_fields = [] 4501 for pivot_field in pivot.fields: 4502 pivot_field_expressions = pivot_field.expressions 4503 4504 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4505 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4506 continue 4507 4508 all_fields.append( 4509 [ 4510 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4511 for fld in pivot_field_expressions 4512 ] 4513 ) 4514 4515 if all_fields: 4516 if names: 4517 all_fields.append(names) 4518 4519 # Generate all possible combinations of the pivot columns 4520 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4521 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4522 for fld_parts_tuple in itertools.product(*all_fields): 4523 fld_parts = list(fld_parts_tuple) 4524 4525 if names and self.PREFIXED_PIVOT_COLUMNS: 4526 # Move the "name" to the front of the list 4527 fld_parts.insert(0, fld_parts.pop(-1)) 4528 4529 columns.append(exp.to_identifier("_".join(fld_parts))) 4530 4531 pivot.set("columns", columns) 4532 4533 return pivot 4534 4535 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4536 return [agg.alias for agg in aggregations if agg.alias] 4537 4538 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4539 if not skip_where_token and not self._match(TokenType.PREWHERE): 4540 return None 4541 4542 return self.expression( 4543 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4544 ) 4545 4546 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4547 if not skip_where_token and not self._match(TokenType.WHERE): 4548 return None 4549 4550 return self.expression( 4551 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4552 ) 4553 4554 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4555 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4556 return None 4557 comments = self._prev_comments 4558 4559 elements: t.Dict[str, t.Any] = defaultdict(list) 4560 4561 if self._match(TokenType.ALL): 4562 elements["all"] = True 4563 elif self._match(TokenType.DISTINCT): 4564 elements["all"] = False 4565 4566 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4567 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4568 4569 while True: 4570 index = self._index 4571 4572 elements["expressions"].extend( 4573 self._parse_csv( 4574 lambda: None 4575 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4576 else self._parse_assignment() 4577 ) 4578 ) 4579 4580 before_with_index = self._index 4581 with_prefix = self._match(TokenType.WITH) 4582 4583 if self._match(TokenType.ROLLUP): 4584 elements["rollup"].append( 4585 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4586 ) 4587 elif self._match(TokenType.CUBE): 4588 elements["cube"].append( 4589 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4590 ) 4591 elif self._match(TokenType.GROUPING_SETS): 4592 elements["grouping_sets"].append( 4593 self.expression( 4594 exp.GroupingSets, 4595 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4596 ) 4597 ) 4598 elif self._match_text_seq("TOTALS"): 4599 elements["totals"] = True # type: ignore 4600 4601 if before_with_index <= self._index <= before_with_index + 1: 4602 self._retreat(before_with_index) 4603 break 4604 4605 if index == self._index: 4606 break 4607 4608 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4609 4610 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4611 return self.expression( 4612 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4613 ) 4614 4615 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4616 if self._match(TokenType.L_PAREN): 4617 grouping_set = self._parse_csv(self._parse_column) 4618 self._match_r_paren() 4619 return self.expression(exp.Tuple, expressions=grouping_set) 4620 4621 return self._parse_column() 4622 4623 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4624 if not skip_having_token and not self._match(TokenType.HAVING): 4625 return None 4626 return self.expression( 4627 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4628 ) 4629 4630 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4631 if not self._match(TokenType.QUALIFY): 4632 return None 4633 return self.expression(exp.Qualify, this=self._parse_assignment()) 4634 4635 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4636 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4637 exp.Prior, this=self._parse_bitwise() 4638 ) 4639 connect = self._parse_assignment() 4640 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4641 return connect 4642 4643 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4644 if skip_start_token: 4645 start = None 4646 elif self._match(TokenType.START_WITH): 4647 start = self._parse_assignment() 4648 else: 4649 return None 4650 4651 self._match(TokenType.CONNECT_BY) 4652 nocycle = self._match_text_seq("NOCYCLE") 4653 connect = self._parse_connect_with_prior() 4654 4655 if not start and self._match(TokenType.START_WITH): 4656 start = self._parse_assignment() 4657 4658 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4659 4660 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4661 this = self._parse_id_var(any_token=True) 4662 if self._match(TokenType.ALIAS): 4663 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4664 return this 4665 4666 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4667 if self._match_text_seq("INTERPOLATE"): 4668 return self._parse_wrapped_csv(self._parse_name_as_expression) 4669 return None 4670 4671 def _parse_order( 4672 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4673 ) -> t.Optional[exp.Expression]: 4674 siblings = None 4675 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4676 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4677 return this 4678 4679 siblings = True 4680 4681 return self.expression( 4682 exp.Order, 4683 comments=self._prev_comments, 4684 this=this, 4685 expressions=self._parse_csv(self._parse_ordered), 4686 siblings=siblings, 4687 ) 4688 4689 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4690 if not self._match(token): 4691 return None 4692 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4693 4694 def _parse_ordered( 4695 self, parse_method: t.Optional[t.Callable] = None 4696 ) -> t.Optional[exp.Ordered]: 4697 this = parse_method() if parse_method else self._parse_assignment() 4698 if not this: 4699 return None 4700 4701 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4702 this = exp.var("ALL") 4703 4704 asc = self._match(TokenType.ASC) 4705 desc = self._match(TokenType.DESC) or (asc and False) 4706 4707 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4708 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4709 4710 nulls_first = is_nulls_first or False 4711 explicitly_null_ordered = is_nulls_first or is_nulls_last 4712 4713 if ( 4714 not explicitly_null_ordered 4715 and ( 4716 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4717 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4718 ) 4719 and self.dialect.NULL_ORDERING != "nulls_are_last" 4720 ): 4721 nulls_first = True 4722 4723 if self._match_text_seq("WITH", "FILL"): 4724 with_fill = self.expression( 4725 exp.WithFill, 4726 **{ # type: ignore 4727 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4728 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4729 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4730 "interpolate": self._parse_interpolate(), 4731 }, 4732 ) 4733 else: 4734 with_fill = None 4735 4736 return self.expression( 4737 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4738 ) 4739 4740 def _parse_limit_options(self) -> exp.LimitOptions: 4741 percent = self._match(TokenType.PERCENT) 4742 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4743 self._match_text_seq("ONLY") 4744 with_ties = self._match_text_seq("WITH", "TIES") 4745 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4746 4747 def _parse_limit( 4748 self, 4749 this: t.Optional[exp.Expression] = None, 4750 top: bool = False, 4751 skip_limit_token: bool = False, 4752 ) -> t.Optional[exp.Expression]: 4753 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4754 comments = self._prev_comments 4755 if top: 4756 limit_paren = self._match(TokenType.L_PAREN) 4757 expression = self._parse_term() if limit_paren else self._parse_number() 4758 4759 if limit_paren: 4760 self._match_r_paren() 4761 4762 limit_options = self._parse_limit_options() 4763 else: 4764 limit_options = None 4765 expression = self._parse_term() 4766 4767 if self._match(TokenType.COMMA): 4768 offset = expression 4769 expression = self._parse_term() 4770 else: 4771 offset = None 4772 4773 limit_exp = self.expression( 4774 exp.Limit, 4775 this=this, 4776 expression=expression, 4777 offset=offset, 4778 comments=comments, 4779 limit_options=limit_options, 4780 expressions=self._parse_limit_by(), 4781 ) 4782 4783 return limit_exp 4784 4785 if self._match(TokenType.FETCH): 4786 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4787 direction = self._prev.text.upper() if direction else "FIRST" 4788 4789 count = self._parse_field(tokens=self.FETCH_TOKENS) 4790 4791 return self.expression( 4792 exp.Fetch, 4793 direction=direction, 4794 count=count, 4795 limit_options=self._parse_limit_options(), 4796 ) 4797 4798 return this 4799 4800 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4801 if not self._match(TokenType.OFFSET): 4802 return this 4803 4804 count = self._parse_term() 4805 self._match_set((TokenType.ROW, TokenType.ROWS)) 4806 4807 return self.expression( 4808 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4809 ) 4810 4811 def _can_parse_limit_or_offset(self) -> bool: 4812 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4813 return False 4814 4815 index = self._index 4816 result = bool( 4817 self._try_parse(self._parse_limit, retreat=True) 4818 or self._try_parse(self._parse_offset, retreat=True) 4819 ) 4820 self._retreat(index) 4821 return result 4822 4823 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4824 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4825 4826 def _parse_locks(self) -> t.List[exp.Lock]: 4827 locks = [] 4828 while True: 4829 update, key = None, None 4830 if self._match_text_seq("FOR", "UPDATE"): 4831 update = True 4832 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4833 "LOCK", "IN", "SHARE", "MODE" 4834 ): 4835 update = False 4836 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4837 update, key = False, True 4838 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4839 update, key = True, True 4840 else: 4841 break 4842 4843 expressions = None 4844 if self._match_text_seq("OF"): 4845 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4846 4847 wait: t.Optional[bool | exp.Expression] = None 4848 if self._match_text_seq("NOWAIT"): 4849 wait = True 4850 elif self._match_text_seq("WAIT"): 4851 wait = self._parse_primary() 4852 elif self._match_text_seq("SKIP", "LOCKED"): 4853 wait = False 4854 4855 locks.append( 4856 self.expression( 4857 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4858 ) 4859 ) 4860 4861 return locks 4862 4863 def parse_set_operation( 4864 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4865 ) -> t.Optional[exp.Expression]: 4866 start = self._index 4867 _, side_token, kind_token = self._parse_join_parts() 4868 4869 side = side_token.text if side_token else None 4870 kind = kind_token.text if kind_token else None 4871 4872 if not self._match_set(self.SET_OPERATIONS): 4873 self._retreat(start) 4874 return None 4875 4876 token_type = self._prev.token_type 4877 4878 if token_type == TokenType.UNION: 4879 operation: t.Type[exp.SetOperation] = exp.Union 4880 elif token_type == TokenType.EXCEPT: 4881 operation = exp.Except 4882 else: 4883 operation = exp.Intersect 4884 4885 comments = self._prev.comments 4886 4887 if self._match(TokenType.DISTINCT): 4888 distinct: t.Optional[bool] = True 4889 elif self._match(TokenType.ALL): 4890 distinct = False 4891 else: 4892 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4893 if distinct is None: 4894 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4895 4896 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4897 "STRICT", "CORRESPONDING" 4898 ) 4899 if self._match_text_seq("CORRESPONDING"): 4900 by_name = True 4901 if not side and not kind: 4902 kind = "INNER" 4903 4904 on_column_list = None 4905 if by_name and self._match_texts(("ON", "BY")): 4906 on_column_list = self._parse_wrapped_csv(self._parse_column) 4907 4908 expression = self._parse_select( 4909 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4910 ) 4911 4912 return self.expression( 4913 operation, 4914 comments=comments, 4915 this=this, 4916 distinct=distinct, 4917 by_name=by_name, 4918 expression=expression, 4919 side=side, 4920 kind=kind, 4921 on=on_column_list, 4922 ) 4923 4924 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4925 while this: 4926 setop = self.parse_set_operation(this) 4927 if not setop: 4928 break 4929 this = setop 4930 4931 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4932 expression = this.expression 4933 4934 if expression: 4935 for arg in self.SET_OP_MODIFIERS: 4936 expr = expression.args.get(arg) 4937 if expr: 4938 this.set(arg, expr.pop()) 4939 4940 return this 4941 4942 def _parse_expression(self) -> t.Optional[exp.Expression]: 4943 return self._parse_alias(self._parse_assignment()) 4944 4945 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4946 this = self._parse_disjunction() 4947 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4948 # This allows us to parse <non-identifier token> := <expr> 4949 this = exp.column( 4950 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4951 ) 4952 4953 while self._match_set(self.ASSIGNMENT): 4954 if isinstance(this, exp.Column) and len(this.parts) == 1: 4955 this = this.this 4956 4957 this = self.expression( 4958 self.ASSIGNMENT[self._prev.token_type], 4959 this=this, 4960 comments=self._prev_comments, 4961 expression=self._parse_assignment(), 4962 ) 4963 4964 return this 4965 4966 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4967 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4968 4969 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4970 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4971 4972 def _parse_equality(self) -> t.Optional[exp.Expression]: 4973 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4974 4975 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4976 return self._parse_tokens(self._parse_range, self.COMPARISON) 4977 4978 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4979 this = this or self._parse_bitwise() 4980 negate = self._match(TokenType.NOT) 4981 4982 if self._match_set(self.RANGE_PARSERS): 4983 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4984 if not expression: 4985 return this 4986 4987 this = expression 4988 elif self._match(TokenType.ISNULL): 4989 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4990 4991 # Postgres supports ISNULL and NOTNULL for conditions. 4992 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4993 if self._match(TokenType.NOTNULL): 4994 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4995 this = self.expression(exp.Not, this=this) 4996 4997 if negate: 4998 this = self._negate_range(this) 4999 5000 if self._match(TokenType.IS): 5001 this = self._parse_is(this) 5002 5003 return this 5004 5005 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5006 if not this: 5007 return this 5008 5009 return self.expression(exp.Not, this=this) 5010 5011 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5012 index = self._index - 1 5013 negate = self._match(TokenType.NOT) 5014 5015 if self._match_text_seq("DISTINCT", "FROM"): 5016 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5017 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5018 5019 if self._match(TokenType.JSON): 5020 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5021 5022 if self._match_text_seq("WITH"): 5023 _with = True 5024 elif self._match_text_seq("WITHOUT"): 5025 _with = False 5026 else: 5027 _with = None 5028 5029 unique = self._match(TokenType.UNIQUE) 5030 self._match_text_seq("KEYS") 5031 expression: t.Optional[exp.Expression] = self.expression( 5032 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5033 ) 5034 else: 5035 expression = self._parse_primary() or self._parse_null() 5036 if not expression: 5037 self._retreat(index) 5038 return None 5039 5040 this = self.expression(exp.Is, this=this, expression=expression) 5041 return self.expression(exp.Not, this=this) if negate else this 5042 5043 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5044 unnest = self._parse_unnest(with_alias=False) 5045 if unnest: 5046 this = self.expression(exp.In, this=this, unnest=unnest) 5047 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5048 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5049 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5050 5051 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5052 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5053 else: 5054 this = self.expression(exp.In, this=this, expressions=expressions) 5055 5056 if matched_l_paren: 5057 self._match_r_paren(this) 5058 elif not self._match(TokenType.R_BRACKET, expression=this): 5059 self.raise_error("Expecting ]") 5060 else: 5061 this = self.expression(exp.In, this=this, field=self._parse_column()) 5062 5063 return this 5064 5065 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5066 symmetric = None 5067 if self._match_text_seq("SYMMETRIC"): 5068 symmetric = True 5069 elif self._match_text_seq("ASYMMETRIC"): 5070 symmetric = False 5071 5072 low = self._parse_bitwise() 5073 self._match(TokenType.AND) 5074 high = self._parse_bitwise() 5075 5076 return self.expression( 5077 exp.Between, 5078 this=this, 5079 low=low, 5080 high=high, 5081 symmetric=symmetric, 5082 ) 5083 5084 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5085 if not self._match(TokenType.ESCAPE): 5086 return this 5087 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5088 5089 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5090 index = self._index 5091 5092 if not self._match(TokenType.INTERVAL) and match_interval: 5093 return None 5094 5095 if self._match(TokenType.STRING, advance=False): 5096 this = self._parse_primary() 5097 else: 5098 this = self._parse_term() 5099 5100 if not this or ( 5101 isinstance(this, exp.Column) 5102 and not this.table 5103 and not this.this.quoted 5104 and this.name.upper() in ("IS", "ROWS") 5105 ): 5106 self._retreat(index) 5107 return None 5108 5109 unit = self._parse_function() or ( 5110 not self._match(TokenType.ALIAS, advance=False) 5111 and self._parse_var(any_token=True, upper=True) 5112 ) 5113 5114 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5115 # each INTERVAL expression into this canonical form so it's easy to transpile 5116 if this and this.is_number: 5117 this = exp.Literal.string(this.to_py()) 5118 elif this and this.is_string: 5119 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5120 if parts and unit: 5121 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5122 unit = None 5123 self._retreat(self._index - 1) 5124 5125 if len(parts) == 1: 5126 this = exp.Literal.string(parts[0][0]) 5127 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5128 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5129 unit = self.expression( 5130 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5131 ) 5132 5133 interval = self.expression(exp.Interval, this=this, unit=unit) 5134 5135 index = self._index 5136 self._match(TokenType.PLUS) 5137 5138 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5139 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5140 return self.expression( 5141 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5142 ) 5143 5144 self._retreat(index) 5145 return interval 5146 5147 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5148 this = self._parse_term() 5149 5150 while True: 5151 if self._match_set(self.BITWISE): 5152 this = self.expression( 5153 self.BITWISE[self._prev.token_type], 5154 this=this, 5155 expression=self._parse_term(), 5156 ) 5157 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5158 this = self.expression( 5159 exp.DPipe, 5160 this=this, 5161 expression=self._parse_term(), 5162 safe=not self.dialect.STRICT_STRING_CONCAT, 5163 ) 5164 elif self._match(TokenType.DQMARK): 5165 this = self.expression( 5166 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5167 ) 5168 elif self._match_pair(TokenType.LT, TokenType.LT): 5169 this = self.expression( 5170 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5171 ) 5172 elif self._match_pair(TokenType.GT, TokenType.GT): 5173 this = self.expression( 5174 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5175 ) 5176 else: 5177 break 5178 5179 return this 5180 5181 def _parse_term(self) -> t.Optional[exp.Expression]: 5182 this = self._parse_factor() 5183 5184 while self._match_set(self.TERM): 5185 klass = self.TERM[self._prev.token_type] 5186 comments = self._prev_comments 5187 expression = self._parse_factor() 5188 5189 this = self.expression(klass, this=this, comments=comments, expression=expression) 5190 5191 if isinstance(this, exp.Collate): 5192 expr = this.expression 5193 5194 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5195 # fallback to Identifier / Var 5196 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5197 ident = expr.this 5198 if isinstance(ident, exp.Identifier): 5199 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5200 5201 return this 5202 5203 def _parse_factor(self) -> t.Optional[exp.Expression]: 5204 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5205 this = parse_method() 5206 5207 while self._match_set(self.FACTOR): 5208 klass = self.FACTOR[self._prev.token_type] 5209 comments = self._prev_comments 5210 expression = parse_method() 5211 5212 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5213 self._retreat(self._index - 1) 5214 return this 5215 5216 this = self.expression(klass, this=this, comments=comments, expression=expression) 5217 5218 if isinstance(this, exp.Div): 5219 this.args["typed"] = self.dialect.TYPED_DIVISION 5220 this.args["safe"] = self.dialect.SAFE_DIVISION 5221 5222 return this 5223 5224 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5225 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5226 5227 def _parse_unary(self) -> t.Optional[exp.Expression]: 5228 if self._match_set(self.UNARY_PARSERS): 5229 return self.UNARY_PARSERS[self._prev.token_type](self) 5230 return self._parse_at_time_zone(self._parse_type()) 5231 5232 def _parse_type( 5233 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5234 ) -> t.Optional[exp.Expression]: 5235 interval = parse_interval and self._parse_interval() 5236 if interval: 5237 return interval 5238 5239 index = self._index 5240 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5241 5242 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5243 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5244 if isinstance(data_type, exp.Cast): 5245 # This constructor can contain ops directly after it, for instance struct unnesting: 5246 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5247 return self._parse_column_ops(data_type) 5248 5249 if data_type: 5250 index2 = self._index 5251 this = self._parse_primary() 5252 5253 if isinstance(this, exp.Literal): 5254 literal = this.name 5255 this = self._parse_column_ops(this) 5256 5257 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5258 if parser: 5259 return parser(self, this, data_type) 5260 5261 if ( 5262 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5263 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5264 and TIME_ZONE_RE.search(literal) 5265 ): 5266 data_type = exp.DataType.build("TIMESTAMPTZ") 5267 5268 return self.expression(exp.Cast, this=this, to=data_type) 5269 5270 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5271 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5272 # 5273 # If the index difference here is greater than 1, that means the parser itself must have 5274 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5275 # 5276 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5277 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5278 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5279 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5280 # 5281 # In these cases, we don't really want to return the converted type, but instead retreat 5282 # and try to parse a Column or Identifier in the section below. 5283 if data_type.expressions and index2 - index > 1: 5284 self._retreat(index2) 5285 return self._parse_column_ops(data_type) 5286 5287 self._retreat(index) 5288 5289 if fallback_to_identifier: 5290 return self._parse_id_var() 5291 5292 this = self._parse_column() 5293 return this and self._parse_column_ops(this) 5294 5295 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5296 this = self._parse_type() 5297 if not this: 5298 return None 5299 5300 if isinstance(this, exp.Column) and not this.table: 5301 this = exp.var(this.name.upper()) 5302 5303 return self.expression( 5304 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5305 ) 5306 5307 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5308 type_name = identifier.name 5309 5310 while self._match(TokenType.DOT): 5311 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5312 5313 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5314 5315 def _parse_types( 5316 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5317 ) -> t.Optional[exp.Expression]: 5318 index = self._index 5319 5320 this: t.Optional[exp.Expression] = None 5321 prefix = self._match_text_seq("SYSUDTLIB", ".") 5322 5323 if self._match_set(self.TYPE_TOKENS): 5324 type_token = self._prev.token_type 5325 else: 5326 type_token = None 5327 identifier = allow_identifiers and self._parse_id_var( 5328 any_token=False, tokens=(TokenType.VAR,) 5329 ) 5330 if isinstance(identifier, exp.Identifier): 5331 try: 5332 tokens = self.dialect.tokenize(identifier.name) 5333 except TokenError: 5334 tokens = None 5335 5336 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5337 type_token = tokens[0].token_type 5338 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5339 this = self._parse_user_defined_type(identifier) 5340 else: 5341 self._retreat(self._index - 1) 5342 return None 5343 else: 5344 return None 5345 5346 if type_token == TokenType.PSEUDO_TYPE: 5347 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5348 5349 if type_token == TokenType.OBJECT_IDENTIFIER: 5350 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5351 5352 # https://materialize.com/docs/sql/types/map/ 5353 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5354 key_type = self._parse_types( 5355 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5356 ) 5357 if not self._match(TokenType.FARROW): 5358 self._retreat(index) 5359 return None 5360 5361 value_type = self._parse_types( 5362 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5363 ) 5364 if not self._match(TokenType.R_BRACKET): 5365 self._retreat(index) 5366 return None 5367 5368 return exp.DataType( 5369 this=exp.DataType.Type.MAP, 5370 expressions=[key_type, value_type], 5371 nested=True, 5372 prefix=prefix, 5373 ) 5374 5375 nested = type_token in self.NESTED_TYPE_TOKENS 5376 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5377 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5378 expressions = None 5379 maybe_func = False 5380 5381 if self._match(TokenType.L_PAREN): 5382 if is_struct: 5383 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5384 elif nested: 5385 expressions = self._parse_csv( 5386 lambda: self._parse_types( 5387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5388 ) 5389 ) 5390 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5391 this = expressions[0] 5392 this.set("nullable", True) 5393 self._match_r_paren() 5394 return this 5395 elif type_token in self.ENUM_TYPE_TOKENS: 5396 expressions = self._parse_csv(self._parse_equality) 5397 elif is_aggregate: 5398 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5399 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5400 ) 5401 if not func_or_ident: 5402 return None 5403 expressions = [func_or_ident] 5404 if self._match(TokenType.COMMA): 5405 expressions.extend( 5406 self._parse_csv( 5407 lambda: self._parse_types( 5408 check_func=check_func, 5409 schema=schema, 5410 allow_identifiers=allow_identifiers, 5411 ) 5412 ) 5413 ) 5414 else: 5415 expressions = self._parse_csv(self._parse_type_size) 5416 5417 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5418 if type_token == TokenType.VECTOR and len(expressions) == 2: 5419 expressions = self._parse_vector_expressions(expressions) 5420 5421 if not self._match(TokenType.R_PAREN): 5422 self._retreat(index) 5423 return None 5424 5425 maybe_func = True 5426 5427 values: t.Optional[t.List[exp.Expression]] = None 5428 5429 if nested and self._match(TokenType.LT): 5430 if is_struct: 5431 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5432 else: 5433 expressions = self._parse_csv( 5434 lambda: self._parse_types( 5435 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5436 ) 5437 ) 5438 5439 if not self._match(TokenType.GT): 5440 self.raise_error("Expecting >") 5441 5442 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5443 values = self._parse_csv(self._parse_assignment) 5444 if not values and is_struct: 5445 values = None 5446 self._retreat(self._index - 1) 5447 else: 5448 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5449 5450 if type_token in self.TIMESTAMPS: 5451 if self._match_text_seq("WITH", "TIME", "ZONE"): 5452 maybe_func = False 5453 tz_type = ( 5454 exp.DataType.Type.TIMETZ 5455 if type_token in self.TIMES 5456 else exp.DataType.Type.TIMESTAMPTZ 5457 ) 5458 this = exp.DataType(this=tz_type, expressions=expressions) 5459 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5460 maybe_func = False 5461 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5462 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5463 maybe_func = False 5464 elif type_token == TokenType.INTERVAL: 5465 unit = self._parse_var(upper=True) 5466 if unit: 5467 if self._match_text_seq("TO"): 5468 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5469 5470 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5471 else: 5472 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5473 elif type_token == TokenType.VOID: 5474 this = exp.DataType(this=exp.DataType.Type.NULL) 5475 5476 if maybe_func and check_func: 5477 index2 = self._index 5478 peek = self._parse_string() 5479 5480 if not peek: 5481 self._retreat(index) 5482 return None 5483 5484 self._retreat(index2) 5485 5486 if not this: 5487 if self._match_text_seq("UNSIGNED"): 5488 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5489 if not unsigned_type_token: 5490 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5491 5492 type_token = unsigned_type_token or type_token 5493 5494 this = exp.DataType( 5495 this=exp.DataType.Type[type_token.value], 5496 expressions=expressions, 5497 nested=nested, 5498 prefix=prefix, 5499 ) 5500 5501 # Empty arrays/structs are allowed 5502 if values is not None: 5503 cls = exp.Struct if is_struct else exp.Array 5504 this = exp.cast(cls(expressions=values), this, copy=False) 5505 5506 elif expressions: 5507 this.set("expressions", expressions) 5508 5509 # https://materialize.com/docs/sql/types/list/#type-name 5510 while self._match(TokenType.LIST): 5511 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5512 5513 index = self._index 5514 5515 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5516 matched_array = self._match(TokenType.ARRAY) 5517 5518 while self._curr: 5519 datatype_token = self._prev.token_type 5520 matched_l_bracket = self._match(TokenType.L_BRACKET) 5521 5522 if (not matched_l_bracket and not matched_array) or ( 5523 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5524 ): 5525 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5526 # not to be confused with the fixed size array parsing 5527 break 5528 5529 matched_array = False 5530 values = self._parse_csv(self._parse_assignment) or None 5531 if ( 5532 values 5533 and not schema 5534 and ( 5535 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5536 ) 5537 ): 5538 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5539 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5540 self._retreat(index) 5541 break 5542 5543 this = exp.DataType( 5544 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5545 ) 5546 self._match(TokenType.R_BRACKET) 5547 5548 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5549 converter = self.TYPE_CONVERTERS.get(this.this) 5550 if converter: 5551 this = converter(t.cast(exp.DataType, this)) 5552 5553 return this 5554 5555 def _parse_vector_expressions( 5556 self, expressions: t.List[exp.Expression] 5557 ) -> t.List[exp.Expression]: 5558 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5559 5560 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5561 index = self._index 5562 5563 if ( 5564 self._curr 5565 and self._next 5566 and self._curr.token_type in self.TYPE_TOKENS 5567 and self._next.token_type in self.TYPE_TOKENS 5568 ): 5569 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5570 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5571 this = self._parse_id_var() 5572 else: 5573 this = ( 5574 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5575 or self._parse_id_var() 5576 ) 5577 5578 self._match(TokenType.COLON) 5579 5580 if ( 5581 type_required 5582 and not isinstance(this, exp.DataType) 5583 and not self._match_set(self.TYPE_TOKENS, advance=False) 5584 ): 5585 self._retreat(index) 5586 return self._parse_types() 5587 5588 return self._parse_column_def(this) 5589 5590 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5591 if not self._match_text_seq("AT", "TIME", "ZONE"): 5592 return this 5593 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5594 5595 def _parse_column(self) -> t.Optional[exp.Expression]: 5596 this = self._parse_column_reference() 5597 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5598 5599 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5600 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5601 5602 return column 5603 5604 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5605 this = self._parse_field() 5606 if ( 5607 not this 5608 and self._match(TokenType.VALUES, advance=False) 5609 and self.VALUES_FOLLOWED_BY_PAREN 5610 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5611 ): 5612 this = self._parse_id_var() 5613 5614 if isinstance(this, exp.Identifier): 5615 # We bubble up comments from the Identifier to the Column 5616 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5617 5618 return this 5619 5620 def _parse_colon_as_variant_extract( 5621 self, this: t.Optional[exp.Expression] 5622 ) -> t.Optional[exp.Expression]: 5623 casts = [] 5624 json_path = [] 5625 escape = None 5626 5627 while self._match(TokenType.COLON): 5628 start_index = self._index 5629 5630 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5631 path = self._parse_column_ops( 5632 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5633 ) 5634 5635 # The cast :: operator has a lower precedence than the extraction operator :, so 5636 # we rearrange the AST appropriately to avoid casting the JSON path 5637 while isinstance(path, exp.Cast): 5638 casts.append(path.to) 5639 path = path.this 5640 5641 if casts: 5642 dcolon_offset = next( 5643 i 5644 for i, t in enumerate(self._tokens[start_index:]) 5645 if t.token_type == TokenType.DCOLON 5646 ) 5647 end_token = self._tokens[start_index + dcolon_offset - 1] 5648 else: 5649 end_token = self._prev 5650 5651 if path: 5652 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5653 # it'll roundtrip to a string literal in GET_PATH 5654 if isinstance(path, exp.Identifier) and path.quoted: 5655 escape = True 5656 5657 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5658 5659 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5660 # Databricks transforms it back to the colon/dot notation 5661 if json_path: 5662 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5663 5664 if json_path_expr: 5665 json_path_expr.set("escape", escape) 5666 5667 this = self.expression( 5668 exp.JSONExtract, 5669 this=this, 5670 expression=json_path_expr, 5671 variant_extract=True, 5672 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5673 ) 5674 5675 while casts: 5676 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5677 5678 return this 5679 5680 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5681 return self._parse_types() 5682 5683 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5684 this = self._parse_bracket(this) 5685 5686 while self._match_set(self.COLUMN_OPERATORS): 5687 op_token = self._prev.token_type 5688 op = self.COLUMN_OPERATORS.get(op_token) 5689 5690 if op_token in self.CAST_COLUMN_OPERATORS: 5691 field = self._parse_dcolon() 5692 if not field: 5693 self.raise_error("Expected type") 5694 elif op and self._curr: 5695 field = self._parse_column_reference() or self._parse_bracket() 5696 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5697 field = self._parse_column_ops(field) 5698 else: 5699 field = self._parse_field(any_token=True, anonymous_func=True) 5700 5701 # Function calls can be qualified, e.g., x.y.FOO() 5702 # This converts the final AST to a series of Dots leading to the function call 5703 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5704 if isinstance(field, (exp.Func, exp.Window)) and this: 5705 this = this.transform( 5706 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5707 ) 5708 5709 if op: 5710 this = op(self, this, field) 5711 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5712 this = self.expression( 5713 exp.Column, 5714 comments=this.comments, 5715 this=field, 5716 table=this.this, 5717 db=this.args.get("table"), 5718 catalog=this.args.get("db"), 5719 ) 5720 elif isinstance(field, exp.Window): 5721 # Move the exp.Dot's to the window's function 5722 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5723 field.set("this", window_func) 5724 this = field 5725 else: 5726 this = self.expression(exp.Dot, this=this, expression=field) 5727 5728 if field and field.comments: 5729 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5730 5731 this = self._parse_bracket(this) 5732 5733 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5734 5735 def _parse_paren(self) -> t.Optional[exp.Expression]: 5736 if not self._match(TokenType.L_PAREN): 5737 return None 5738 5739 comments = self._prev_comments 5740 query = self._parse_select() 5741 5742 if query: 5743 expressions = [query] 5744 else: 5745 expressions = self._parse_expressions() 5746 5747 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5748 5749 if not this and self._match(TokenType.R_PAREN, advance=False): 5750 this = self.expression(exp.Tuple) 5751 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5752 this = self._parse_subquery(this=this, parse_alias=False) 5753 elif isinstance(this, exp.Subquery): 5754 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5755 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5756 this = self.expression(exp.Tuple, expressions=expressions) 5757 else: 5758 this = self.expression(exp.Paren, this=this) 5759 5760 if this: 5761 this.add_comments(comments) 5762 5763 self._match_r_paren(expression=this) 5764 return this 5765 5766 def _parse_primary(self) -> t.Optional[exp.Expression]: 5767 if self._match_set(self.PRIMARY_PARSERS): 5768 token_type = self._prev.token_type 5769 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5770 5771 if token_type == TokenType.STRING: 5772 expressions = [primary] 5773 while self._match(TokenType.STRING): 5774 expressions.append(exp.Literal.string(self._prev.text)) 5775 5776 if len(expressions) > 1: 5777 return self.expression(exp.Concat, expressions=expressions) 5778 5779 return primary 5780 5781 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5782 return exp.Literal.number(f"0.{self._prev.text}") 5783 5784 return self._parse_paren() 5785 5786 def _parse_field( 5787 self, 5788 any_token: bool = False, 5789 tokens: t.Optional[t.Collection[TokenType]] = None, 5790 anonymous_func: bool = False, 5791 ) -> t.Optional[exp.Expression]: 5792 if anonymous_func: 5793 field = ( 5794 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5795 or self._parse_primary() 5796 ) 5797 else: 5798 field = self._parse_primary() or self._parse_function( 5799 anonymous=anonymous_func, any_token=any_token 5800 ) 5801 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5802 5803 def _parse_function( 5804 self, 5805 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5806 anonymous: bool = False, 5807 optional_parens: bool = True, 5808 any_token: bool = False, 5809 ) -> t.Optional[exp.Expression]: 5810 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5811 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5812 fn_syntax = False 5813 if ( 5814 self._match(TokenType.L_BRACE, advance=False) 5815 and self._next 5816 and self._next.text.upper() == "FN" 5817 ): 5818 self._advance(2) 5819 fn_syntax = True 5820 5821 func = self._parse_function_call( 5822 functions=functions, 5823 anonymous=anonymous, 5824 optional_parens=optional_parens, 5825 any_token=any_token, 5826 ) 5827 5828 if fn_syntax: 5829 self._match(TokenType.R_BRACE) 5830 5831 return func 5832 5833 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5834 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5835 5836 def _parse_function_call( 5837 self, 5838 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5839 anonymous: bool = False, 5840 optional_parens: bool = True, 5841 any_token: bool = False, 5842 ) -> t.Optional[exp.Expression]: 5843 if not self._curr: 5844 return None 5845 5846 comments = self._curr.comments 5847 prev = self._prev 5848 token = self._curr 5849 token_type = self._curr.token_type 5850 this = self._curr.text 5851 upper = this.upper() 5852 5853 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5854 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5855 self._advance() 5856 return self._parse_window(parser(self)) 5857 5858 if not self._next or self._next.token_type != TokenType.L_PAREN: 5859 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5860 self._advance() 5861 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5862 5863 return None 5864 5865 if any_token: 5866 if token_type in self.RESERVED_TOKENS: 5867 return None 5868 elif token_type not in self.FUNC_TOKENS: 5869 return None 5870 5871 self._advance(2) 5872 5873 parser = self.FUNCTION_PARSERS.get(upper) 5874 if parser and not anonymous: 5875 this = parser(self) 5876 else: 5877 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5878 5879 if subquery_predicate: 5880 expr = None 5881 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5882 expr = self._parse_select() 5883 self._match_r_paren() 5884 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5885 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5886 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5887 self._advance(-1) 5888 expr = self._parse_bitwise() 5889 5890 if expr: 5891 return self.expression(subquery_predicate, comments=comments, this=expr) 5892 5893 if functions is None: 5894 functions = self.FUNCTIONS 5895 5896 function = functions.get(upper) 5897 known_function = function and not anonymous 5898 5899 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5900 args = self._parse_function_args(alias) 5901 5902 post_func_comments = self._curr and self._curr.comments 5903 if known_function and post_func_comments: 5904 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5905 # call we'll construct it as exp.Anonymous, even if it's "known" 5906 if any( 5907 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5908 for comment in post_func_comments 5909 ): 5910 known_function = False 5911 5912 if alias and known_function: 5913 args = self._kv_to_prop_eq(args) 5914 5915 if known_function: 5916 func_builder = t.cast(t.Callable, function) 5917 5918 if "dialect" in func_builder.__code__.co_varnames: 5919 func = func_builder(args, dialect=self.dialect) 5920 else: 5921 func = func_builder(args) 5922 5923 func = self.validate_expression(func, args) 5924 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5925 func.meta["name"] = this 5926 5927 this = func 5928 else: 5929 if token_type == TokenType.IDENTIFIER: 5930 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5931 5932 this = self.expression(exp.Anonymous, this=this, expressions=args) 5933 this = this.update_positions(token) 5934 5935 if isinstance(this, exp.Expression): 5936 this.add_comments(comments) 5937 5938 self._match_r_paren(this) 5939 return self._parse_window(this) 5940 5941 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5942 return expression 5943 5944 def _kv_to_prop_eq( 5945 self, expressions: t.List[exp.Expression], parse_map: bool = False 5946 ) -> t.List[exp.Expression]: 5947 transformed = [] 5948 5949 for index, e in enumerate(expressions): 5950 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5951 if isinstance(e, exp.Alias): 5952 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5953 5954 if not isinstance(e, exp.PropertyEQ): 5955 e = self.expression( 5956 exp.PropertyEQ, 5957 this=e.this if parse_map else exp.to_identifier(e.this.name), 5958 expression=e.expression, 5959 ) 5960 5961 if isinstance(e.this, exp.Column): 5962 e.this.replace(e.this.this) 5963 else: 5964 e = self._to_prop_eq(e, index) 5965 5966 transformed.append(e) 5967 5968 return transformed 5969 5970 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5971 return self._parse_statement() 5972 5973 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5974 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5975 5976 def _parse_user_defined_function( 5977 self, kind: t.Optional[TokenType] = None 5978 ) -> t.Optional[exp.Expression]: 5979 this = self._parse_table_parts(schema=True) 5980 5981 if not self._match(TokenType.L_PAREN): 5982 return this 5983 5984 expressions = self._parse_csv(self._parse_function_parameter) 5985 self._match_r_paren() 5986 return self.expression( 5987 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5988 ) 5989 5990 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5991 literal = self._parse_primary() 5992 if literal: 5993 return self.expression(exp.Introducer, this=token.text, expression=literal) 5994 5995 return self._identifier_expression(token) 5996 5997 def _parse_session_parameter(self) -> exp.SessionParameter: 5998 kind = None 5999 this = self._parse_id_var() or self._parse_primary() 6000 6001 if this and self._match(TokenType.DOT): 6002 kind = this.name 6003 this = self._parse_var() or self._parse_primary() 6004 6005 return self.expression(exp.SessionParameter, this=this, kind=kind) 6006 6007 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6008 return self._parse_id_var() 6009 6010 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6011 index = self._index 6012 6013 if self._match(TokenType.L_PAREN): 6014 expressions = t.cast( 6015 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6016 ) 6017 6018 if not self._match(TokenType.R_PAREN): 6019 self._retreat(index) 6020 else: 6021 expressions = [self._parse_lambda_arg()] 6022 6023 if self._match_set(self.LAMBDAS): 6024 return self.LAMBDAS[self._prev.token_type](self, expressions) 6025 6026 self._retreat(index) 6027 6028 this: t.Optional[exp.Expression] 6029 6030 if self._match(TokenType.DISTINCT): 6031 this = self.expression( 6032 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6033 ) 6034 else: 6035 this = self._parse_select_or_expression(alias=alias) 6036 6037 return self._parse_limit( 6038 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6039 ) 6040 6041 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6042 index = self._index 6043 if not self._match(TokenType.L_PAREN): 6044 return this 6045 6046 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6047 # expr can be of both types 6048 if self._match_set(self.SELECT_START_TOKENS): 6049 self._retreat(index) 6050 return this 6051 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6052 self._match_r_paren() 6053 return self.expression(exp.Schema, this=this, expressions=args) 6054 6055 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6056 return self._parse_column_def(self._parse_field(any_token=True)) 6057 6058 def _parse_column_def( 6059 self, this: t.Optional[exp.Expression], computed_column: bool = True 6060 ) -> t.Optional[exp.Expression]: 6061 # column defs are not really columns, they're identifiers 6062 if isinstance(this, exp.Column): 6063 this = this.this 6064 6065 if not computed_column: 6066 self._match(TokenType.ALIAS) 6067 6068 kind = self._parse_types(schema=True) 6069 6070 if self._match_text_seq("FOR", "ORDINALITY"): 6071 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6072 6073 constraints: t.List[exp.Expression] = [] 6074 6075 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6076 ("ALIAS", "MATERIALIZED") 6077 ): 6078 persisted = self._prev.text.upper() == "MATERIALIZED" 6079 constraint_kind = exp.ComputedColumnConstraint( 6080 this=self._parse_assignment(), 6081 persisted=persisted or self._match_text_seq("PERSISTED"), 6082 data_type=exp.Var(this="AUTO") 6083 if self._match_text_seq("AUTO") 6084 else self._parse_types(), 6085 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6086 ) 6087 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6088 elif ( 6089 kind 6090 and self._match(TokenType.ALIAS, advance=False) 6091 and ( 6092 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6093 or (self._next and self._next.token_type == TokenType.L_PAREN) 6094 ) 6095 ): 6096 self._advance() 6097 constraints.append( 6098 self.expression( 6099 exp.ColumnConstraint, 6100 kind=exp.ComputedColumnConstraint( 6101 this=self._parse_disjunction(), 6102 persisted=self._match_texts(("STORED", "VIRTUAL")) 6103 and self._prev.text.upper() == "STORED", 6104 ), 6105 ) 6106 ) 6107 6108 while True: 6109 constraint = self._parse_column_constraint() 6110 if not constraint: 6111 break 6112 constraints.append(constraint) 6113 6114 if not kind and not constraints: 6115 return this 6116 6117 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6118 6119 def _parse_auto_increment( 6120 self, 6121 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6122 start = None 6123 increment = None 6124 order = None 6125 6126 if self._match(TokenType.L_PAREN, advance=False): 6127 args = self._parse_wrapped_csv(self._parse_bitwise) 6128 start = seq_get(args, 0) 6129 increment = seq_get(args, 1) 6130 elif self._match_text_seq("START"): 6131 start = self._parse_bitwise() 6132 self._match_text_seq("INCREMENT") 6133 increment = self._parse_bitwise() 6134 if self._match_text_seq("ORDER"): 6135 order = True 6136 elif self._match_text_seq("NOORDER"): 6137 order = False 6138 6139 if start and increment: 6140 return exp.GeneratedAsIdentityColumnConstraint( 6141 start=start, increment=increment, this=False, order=order 6142 ) 6143 6144 return exp.AutoIncrementColumnConstraint() 6145 6146 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6147 if not self._match_text_seq("REFRESH"): 6148 self._retreat(self._index - 1) 6149 return None 6150 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6151 6152 def _parse_compress(self) -> exp.CompressColumnConstraint: 6153 if self._match(TokenType.L_PAREN, advance=False): 6154 return self.expression( 6155 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6156 ) 6157 6158 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6159 6160 def _parse_generated_as_identity( 6161 self, 6162 ) -> ( 6163 exp.GeneratedAsIdentityColumnConstraint 6164 | exp.ComputedColumnConstraint 6165 | exp.GeneratedAsRowColumnConstraint 6166 ): 6167 if self._match_text_seq("BY", "DEFAULT"): 6168 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6169 this = self.expression( 6170 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6171 ) 6172 else: 6173 self._match_text_seq("ALWAYS") 6174 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6175 6176 self._match(TokenType.ALIAS) 6177 6178 if self._match_text_seq("ROW"): 6179 start = self._match_text_seq("START") 6180 if not start: 6181 self._match(TokenType.END) 6182 hidden = self._match_text_seq("HIDDEN") 6183 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6184 6185 identity = self._match_text_seq("IDENTITY") 6186 6187 if self._match(TokenType.L_PAREN): 6188 if self._match(TokenType.START_WITH): 6189 this.set("start", self._parse_bitwise()) 6190 if self._match_text_seq("INCREMENT", "BY"): 6191 this.set("increment", self._parse_bitwise()) 6192 if self._match_text_seq("MINVALUE"): 6193 this.set("minvalue", self._parse_bitwise()) 6194 if self._match_text_seq("MAXVALUE"): 6195 this.set("maxvalue", self._parse_bitwise()) 6196 6197 if self._match_text_seq("CYCLE"): 6198 this.set("cycle", True) 6199 elif self._match_text_seq("NO", "CYCLE"): 6200 this.set("cycle", False) 6201 6202 if not identity: 6203 this.set("expression", self._parse_range()) 6204 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6205 args = self._parse_csv(self._parse_bitwise) 6206 this.set("start", seq_get(args, 0)) 6207 this.set("increment", seq_get(args, 1)) 6208 6209 self._match_r_paren() 6210 6211 return this 6212 6213 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6214 self._match_text_seq("LENGTH") 6215 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6216 6217 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6218 if self._match_text_seq("NULL"): 6219 return self.expression(exp.NotNullColumnConstraint) 6220 if self._match_text_seq("CASESPECIFIC"): 6221 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6222 if self._match_text_seq("FOR", "REPLICATION"): 6223 return self.expression(exp.NotForReplicationColumnConstraint) 6224 6225 # Unconsume the `NOT` token 6226 self._retreat(self._index - 1) 6227 return None 6228 6229 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6230 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6231 6232 procedure_option_follows = ( 6233 self._match(TokenType.WITH, advance=False) 6234 and self._next 6235 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6236 ) 6237 6238 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6239 return self.expression( 6240 exp.ColumnConstraint, 6241 this=this, 6242 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6243 ) 6244 6245 return this 6246 6247 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6248 if not self._match(TokenType.CONSTRAINT): 6249 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6250 6251 return self.expression( 6252 exp.Constraint, 6253 this=self._parse_id_var(), 6254 expressions=self._parse_unnamed_constraints(), 6255 ) 6256 6257 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6258 constraints = [] 6259 while True: 6260 constraint = self._parse_unnamed_constraint() or self._parse_function() 6261 if not constraint: 6262 break 6263 constraints.append(constraint) 6264 6265 return constraints 6266 6267 def _parse_unnamed_constraint( 6268 self, constraints: t.Optional[t.Collection[str]] = None 6269 ) -> t.Optional[exp.Expression]: 6270 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6271 constraints or self.CONSTRAINT_PARSERS 6272 ): 6273 return None 6274 6275 constraint = self._prev.text.upper() 6276 if constraint not in self.CONSTRAINT_PARSERS: 6277 self.raise_error(f"No parser found for schema constraint {constraint}.") 6278 6279 return self.CONSTRAINT_PARSERS[constraint](self) 6280 6281 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6282 return self._parse_id_var(any_token=False) 6283 6284 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6285 self._match_texts(("KEY", "INDEX")) 6286 return self.expression( 6287 exp.UniqueColumnConstraint, 6288 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6289 this=self._parse_schema(self._parse_unique_key()), 6290 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6291 on_conflict=self._parse_on_conflict(), 6292 options=self._parse_key_constraint_options(), 6293 ) 6294 6295 def _parse_key_constraint_options(self) -> t.List[str]: 6296 options = [] 6297 while True: 6298 if not self._curr: 6299 break 6300 6301 if self._match(TokenType.ON): 6302 action = None 6303 on = self._advance_any() and self._prev.text 6304 6305 if self._match_text_seq("NO", "ACTION"): 6306 action = "NO ACTION" 6307 elif self._match_text_seq("CASCADE"): 6308 action = "CASCADE" 6309 elif self._match_text_seq("RESTRICT"): 6310 action = "RESTRICT" 6311 elif self._match_pair(TokenType.SET, TokenType.NULL): 6312 action = "SET NULL" 6313 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6314 action = "SET DEFAULT" 6315 else: 6316 self.raise_error("Invalid key constraint") 6317 6318 options.append(f"ON {on} {action}") 6319 else: 6320 var = self._parse_var_from_options( 6321 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6322 ) 6323 if not var: 6324 break 6325 options.append(var.name) 6326 6327 return options 6328 6329 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6330 if match and not self._match(TokenType.REFERENCES): 6331 return None 6332 6333 expressions = None 6334 this = self._parse_table(schema=True) 6335 options = self._parse_key_constraint_options() 6336 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6337 6338 def _parse_foreign_key(self) -> exp.ForeignKey: 6339 expressions = ( 6340 self._parse_wrapped_id_vars() 6341 if not self._match(TokenType.REFERENCES, advance=False) 6342 else None 6343 ) 6344 reference = self._parse_references() 6345 on_options = {} 6346 6347 while self._match(TokenType.ON): 6348 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6349 self.raise_error("Expected DELETE or UPDATE") 6350 6351 kind = self._prev.text.lower() 6352 6353 if self._match_text_seq("NO", "ACTION"): 6354 action = "NO ACTION" 6355 elif self._match(TokenType.SET): 6356 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6357 action = "SET " + self._prev.text.upper() 6358 else: 6359 self._advance() 6360 action = self._prev.text.upper() 6361 6362 on_options[kind] = action 6363 6364 return self.expression( 6365 exp.ForeignKey, 6366 expressions=expressions, 6367 reference=reference, 6368 options=self._parse_key_constraint_options(), 6369 **on_options, # type: ignore 6370 ) 6371 6372 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6373 return self._parse_ordered() or self._parse_field() 6374 6375 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6376 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6377 self._retreat(self._index - 1) 6378 return None 6379 6380 id_vars = self._parse_wrapped_id_vars() 6381 return self.expression( 6382 exp.PeriodForSystemTimeConstraint, 6383 this=seq_get(id_vars, 0), 6384 expression=seq_get(id_vars, 1), 6385 ) 6386 6387 def _parse_primary_key( 6388 self, wrapped_optional: bool = False, in_props: bool = False 6389 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6390 desc = ( 6391 self._match_set((TokenType.ASC, TokenType.DESC)) 6392 and self._prev.token_type == TokenType.DESC 6393 ) 6394 6395 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6396 return self.expression( 6397 exp.PrimaryKeyColumnConstraint, 6398 desc=desc, 6399 options=self._parse_key_constraint_options(), 6400 ) 6401 6402 expressions = self._parse_wrapped_csv( 6403 self._parse_primary_key_part, optional=wrapped_optional 6404 ) 6405 6406 return self.expression( 6407 exp.PrimaryKey, 6408 expressions=expressions, 6409 include=self._parse_index_params(), 6410 options=self._parse_key_constraint_options(), 6411 ) 6412 6413 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6414 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6415 6416 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6417 """ 6418 Parses a datetime column in ODBC format. We parse the column into the corresponding 6419 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6420 same as we did for `DATE('yyyy-mm-dd')`. 6421 6422 Reference: 6423 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6424 """ 6425 self._match(TokenType.VAR) 6426 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6427 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6428 if not self._match(TokenType.R_BRACE): 6429 self.raise_error("Expected }") 6430 return expression 6431 6432 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6433 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6434 return this 6435 6436 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6437 map_token = seq_get(self._tokens, self._index - 2) 6438 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6439 else: 6440 parse_map = False 6441 6442 bracket_kind = self._prev.token_type 6443 if ( 6444 bracket_kind == TokenType.L_BRACE 6445 and self._curr 6446 and self._curr.token_type == TokenType.VAR 6447 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6448 ): 6449 return self._parse_odbc_datetime_literal() 6450 6451 expressions = self._parse_csv( 6452 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6453 ) 6454 6455 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6456 self.raise_error("Expected ]") 6457 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6458 self.raise_error("Expected }") 6459 6460 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6461 if bracket_kind == TokenType.L_BRACE: 6462 this = self.expression( 6463 exp.Struct, 6464 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6465 ) 6466 elif not this: 6467 this = build_array_constructor( 6468 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6469 ) 6470 else: 6471 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6472 if constructor_type: 6473 return build_array_constructor( 6474 constructor_type, 6475 args=expressions, 6476 bracket_kind=bracket_kind, 6477 dialect=self.dialect, 6478 ) 6479 6480 expressions = apply_index_offset( 6481 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6482 ) 6483 this = self.expression( 6484 exp.Bracket, 6485 this=this, 6486 expressions=expressions, 6487 comments=this.pop_comments(), 6488 ) 6489 6490 self._add_comments(this) 6491 return self._parse_bracket(this) 6492 6493 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6494 if self._match(TokenType.COLON): 6495 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6496 return this 6497 6498 def _parse_case(self) -> t.Optional[exp.Expression]: 6499 if self._match(TokenType.DOT, advance=False): 6500 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6501 self._retreat(self._index - 1) 6502 return None 6503 6504 ifs = [] 6505 default = None 6506 6507 comments = self._prev_comments 6508 expression = self._parse_assignment() 6509 6510 while self._match(TokenType.WHEN): 6511 this = self._parse_assignment() 6512 self._match(TokenType.THEN) 6513 then = self._parse_assignment() 6514 ifs.append(self.expression(exp.If, this=this, true=then)) 6515 6516 if self._match(TokenType.ELSE): 6517 default = self._parse_assignment() 6518 6519 if not self._match(TokenType.END): 6520 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6521 default = exp.column("interval") 6522 else: 6523 self.raise_error("Expected END after CASE", self._prev) 6524 6525 return self.expression( 6526 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6527 ) 6528 6529 def _parse_if(self) -> t.Optional[exp.Expression]: 6530 if self._match(TokenType.L_PAREN): 6531 args = self._parse_csv( 6532 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6533 ) 6534 this = self.validate_expression(exp.If.from_arg_list(args), args) 6535 self._match_r_paren() 6536 else: 6537 index = self._index - 1 6538 6539 if self.NO_PAREN_IF_COMMANDS and index == 0: 6540 return self._parse_as_command(self._prev) 6541 6542 condition = self._parse_assignment() 6543 6544 if not condition: 6545 self._retreat(index) 6546 return None 6547 6548 self._match(TokenType.THEN) 6549 true = self._parse_assignment() 6550 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6551 self._match(TokenType.END) 6552 this = self.expression(exp.If, this=condition, true=true, false=false) 6553 6554 return this 6555 6556 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6557 if not self._match_text_seq("VALUE", "FOR"): 6558 self._retreat(self._index - 1) 6559 return None 6560 6561 return self.expression( 6562 exp.NextValueFor, 6563 this=self._parse_column(), 6564 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6565 ) 6566 6567 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6568 this = self._parse_function() or self._parse_var_or_string(upper=True) 6569 6570 if self._match(TokenType.FROM): 6571 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6572 6573 if not self._match(TokenType.COMMA): 6574 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6575 6576 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6577 6578 def _parse_gap_fill(self) -> exp.GapFill: 6579 self._match(TokenType.TABLE) 6580 this = self._parse_table() 6581 6582 self._match(TokenType.COMMA) 6583 args = [this, *self._parse_csv(self._parse_lambda)] 6584 6585 gap_fill = exp.GapFill.from_arg_list(args) 6586 return self.validate_expression(gap_fill, args) 6587 6588 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6589 this = self._parse_assignment() 6590 6591 if not self._match(TokenType.ALIAS): 6592 if self._match(TokenType.COMMA): 6593 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6594 6595 self.raise_error("Expected AS after CAST") 6596 6597 fmt = None 6598 to = self._parse_types() 6599 6600 default = self._match(TokenType.DEFAULT) 6601 if default: 6602 default = self._parse_bitwise() 6603 self._match_text_seq("ON", "CONVERSION", "ERROR") 6604 6605 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6606 fmt_string = self._parse_string() 6607 fmt = self._parse_at_time_zone(fmt_string) 6608 6609 if not to: 6610 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6611 if to.this in exp.DataType.TEMPORAL_TYPES: 6612 this = self.expression( 6613 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6614 this=this, 6615 format=exp.Literal.string( 6616 format_time( 6617 fmt_string.this if fmt_string else "", 6618 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6619 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6620 ) 6621 ), 6622 safe=safe, 6623 ) 6624 6625 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6626 this.set("zone", fmt.args["zone"]) 6627 return this 6628 elif not to: 6629 self.raise_error("Expected TYPE after CAST") 6630 elif isinstance(to, exp.Identifier): 6631 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6632 elif to.this == exp.DataType.Type.CHAR: 6633 if self._match(TokenType.CHARACTER_SET): 6634 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6635 6636 return self.build_cast( 6637 strict=strict, 6638 this=this, 6639 to=to, 6640 format=fmt, 6641 safe=safe, 6642 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6643 default=default, 6644 ) 6645 6646 def _parse_string_agg(self) -> exp.GroupConcat: 6647 if self._match(TokenType.DISTINCT): 6648 args: t.List[t.Optional[exp.Expression]] = [ 6649 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6650 ] 6651 if self._match(TokenType.COMMA): 6652 args.extend(self._parse_csv(self._parse_assignment)) 6653 else: 6654 args = self._parse_csv(self._parse_assignment) # type: ignore 6655 6656 if self._match_text_seq("ON", "OVERFLOW"): 6657 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6658 if self._match_text_seq("ERROR"): 6659 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6660 else: 6661 self._match_text_seq("TRUNCATE") 6662 on_overflow = self.expression( 6663 exp.OverflowTruncateBehavior, 6664 this=self._parse_string(), 6665 with_count=( 6666 self._match_text_seq("WITH", "COUNT") 6667 or not self._match_text_seq("WITHOUT", "COUNT") 6668 ), 6669 ) 6670 else: 6671 on_overflow = None 6672 6673 index = self._index 6674 if not self._match(TokenType.R_PAREN) and args: 6675 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6676 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6677 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6678 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6679 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6680 6681 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6682 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6683 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6684 if not self._match_text_seq("WITHIN", "GROUP"): 6685 self._retreat(index) 6686 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6687 6688 # The corresponding match_r_paren will be called in parse_function (caller) 6689 self._match_l_paren() 6690 6691 return self.expression( 6692 exp.GroupConcat, 6693 this=self._parse_order(this=seq_get(args, 0)), 6694 separator=seq_get(args, 1), 6695 on_overflow=on_overflow, 6696 ) 6697 6698 def _parse_convert( 6699 self, strict: bool, safe: t.Optional[bool] = None 6700 ) -> t.Optional[exp.Expression]: 6701 this = self._parse_bitwise() 6702 6703 if self._match(TokenType.USING): 6704 to: t.Optional[exp.Expression] = self.expression( 6705 exp.CharacterSet, this=self._parse_var() 6706 ) 6707 elif self._match(TokenType.COMMA): 6708 to = self._parse_types() 6709 else: 6710 to = None 6711 6712 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6713 6714 def _parse_xml_table(self) -> exp.XMLTable: 6715 namespaces = None 6716 passing = None 6717 columns = None 6718 6719 if self._match_text_seq("XMLNAMESPACES", "("): 6720 namespaces = self._parse_xml_namespace() 6721 self._match_text_seq(")", ",") 6722 6723 this = self._parse_string() 6724 6725 if self._match_text_seq("PASSING"): 6726 # The BY VALUE keywords are optional and are provided for semantic clarity 6727 self._match_text_seq("BY", "VALUE") 6728 passing = self._parse_csv(self._parse_column) 6729 6730 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6731 6732 if self._match_text_seq("COLUMNS"): 6733 columns = self._parse_csv(self._parse_field_def) 6734 6735 return self.expression( 6736 exp.XMLTable, 6737 this=this, 6738 namespaces=namespaces, 6739 passing=passing, 6740 columns=columns, 6741 by_ref=by_ref, 6742 ) 6743 6744 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6745 namespaces = [] 6746 6747 while True: 6748 if self._match(TokenType.DEFAULT): 6749 uri = self._parse_string() 6750 else: 6751 uri = self._parse_alias(self._parse_string()) 6752 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6753 if not self._match(TokenType.COMMA): 6754 break 6755 6756 return namespaces 6757 6758 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6759 args = self._parse_csv(self._parse_assignment) 6760 6761 if len(args) < 3: 6762 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6763 6764 return self.expression(exp.DecodeCase, expressions=args) 6765 6766 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6767 self._match_text_seq("KEY") 6768 key = self._parse_column() 6769 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6770 self._match_text_seq("VALUE") 6771 value = self._parse_bitwise() 6772 6773 if not key and not value: 6774 return None 6775 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6776 6777 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6778 if not this or not self._match_text_seq("FORMAT", "JSON"): 6779 return this 6780 6781 return self.expression(exp.FormatJson, this=this) 6782 6783 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6784 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6785 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6786 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6787 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6788 else: 6789 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6790 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6791 6792 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6793 6794 if not empty and not error and not null: 6795 return None 6796 6797 return self.expression( 6798 exp.OnCondition, 6799 empty=empty, 6800 error=error, 6801 null=null, 6802 ) 6803 6804 def _parse_on_handling( 6805 self, on: str, *values: str 6806 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6807 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6808 for value in values: 6809 if self._match_text_seq(value, "ON", on): 6810 return f"{value} ON {on}" 6811 6812 index = self._index 6813 if self._match(TokenType.DEFAULT): 6814 default_value = self._parse_bitwise() 6815 if self._match_text_seq("ON", on): 6816 return default_value 6817 6818 self._retreat(index) 6819 6820 return None 6821 6822 @t.overload 6823 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6824 6825 @t.overload 6826 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6827 6828 def _parse_json_object(self, agg=False): 6829 star = self._parse_star() 6830 expressions = ( 6831 [star] 6832 if star 6833 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6834 ) 6835 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6836 6837 unique_keys = None 6838 if self._match_text_seq("WITH", "UNIQUE"): 6839 unique_keys = True 6840 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6841 unique_keys = False 6842 6843 self._match_text_seq("KEYS") 6844 6845 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6846 self._parse_type() 6847 ) 6848 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6849 6850 return self.expression( 6851 exp.JSONObjectAgg if agg else exp.JSONObject, 6852 expressions=expressions, 6853 null_handling=null_handling, 6854 unique_keys=unique_keys, 6855 return_type=return_type, 6856 encoding=encoding, 6857 ) 6858 6859 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6860 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6861 if not self._match_text_seq("NESTED"): 6862 this = self._parse_id_var() 6863 kind = self._parse_types(allow_identifiers=False) 6864 nested = None 6865 else: 6866 this = None 6867 kind = None 6868 nested = True 6869 6870 path = self._match_text_seq("PATH") and self._parse_string() 6871 nested_schema = nested and self._parse_json_schema() 6872 6873 return self.expression( 6874 exp.JSONColumnDef, 6875 this=this, 6876 kind=kind, 6877 path=path, 6878 nested_schema=nested_schema, 6879 ) 6880 6881 def _parse_json_schema(self) -> exp.JSONSchema: 6882 self._match_text_seq("COLUMNS") 6883 return self.expression( 6884 exp.JSONSchema, 6885 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6886 ) 6887 6888 def _parse_json_table(self) -> exp.JSONTable: 6889 this = self._parse_format_json(self._parse_bitwise()) 6890 path = self._match(TokenType.COMMA) and self._parse_string() 6891 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6892 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6893 schema = self._parse_json_schema() 6894 6895 return exp.JSONTable( 6896 this=this, 6897 schema=schema, 6898 path=path, 6899 error_handling=error_handling, 6900 empty_handling=empty_handling, 6901 ) 6902 6903 def _parse_match_against(self) -> exp.MatchAgainst: 6904 if self._match_text_seq("TABLE"): 6905 # parse SingleStore MATCH(TABLE ...) syntax 6906 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6907 expressions = [] 6908 table = self._parse_table() 6909 if table: 6910 expressions = [table] 6911 else: 6912 expressions = self._parse_csv(self._parse_column) 6913 6914 self._match_text_seq(")", "AGAINST", "(") 6915 6916 this = self._parse_string() 6917 6918 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6919 modifier = "IN NATURAL LANGUAGE MODE" 6920 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6921 modifier = f"{modifier} WITH QUERY EXPANSION" 6922 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6923 modifier = "IN BOOLEAN MODE" 6924 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6925 modifier = "WITH QUERY EXPANSION" 6926 else: 6927 modifier = None 6928 6929 return self.expression( 6930 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6931 ) 6932 6933 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6934 def _parse_open_json(self) -> exp.OpenJSON: 6935 this = self._parse_bitwise() 6936 path = self._match(TokenType.COMMA) and self._parse_string() 6937 6938 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6939 this = self._parse_field(any_token=True) 6940 kind = self._parse_types() 6941 path = self._parse_string() 6942 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6943 6944 return self.expression( 6945 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6946 ) 6947 6948 expressions = None 6949 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6950 self._match_l_paren() 6951 expressions = self._parse_csv(_parse_open_json_column_def) 6952 6953 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6954 6955 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6956 args = self._parse_csv(self._parse_bitwise) 6957 6958 if self._match(TokenType.IN): 6959 return self.expression( 6960 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6961 ) 6962 6963 if haystack_first: 6964 haystack = seq_get(args, 0) 6965 needle = seq_get(args, 1) 6966 else: 6967 haystack = seq_get(args, 1) 6968 needle = seq_get(args, 0) 6969 6970 return self.expression( 6971 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6972 ) 6973 6974 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6975 args = self._parse_csv(self._parse_table) 6976 return exp.JoinHint(this=func_name.upper(), expressions=args) 6977 6978 def _parse_substring(self) -> exp.Substring: 6979 # Postgres supports the form: substring(string [from int] [for int]) 6980 # (despite being undocumented, the reverse order also works) 6981 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6982 6983 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6984 6985 start, length = None, None 6986 6987 while self._curr: 6988 if self._match(TokenType.FROM): 6989 start = self._parse_bitwise() 6990 elif self._match(TokenType.FOR): 6991 if not start: 6992 start = exp.Literal.number(1) 6993 length = self._parse_bitwise() 6994 else: 6995 break 6996 6997 if start: 6998 args.append(start) 6999 if length: 7000 args.append(length) 7001 7002 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7003 7004 def _parse_trim(self) -> exp.Trim: 7005 # https://www.w3resource.com/sql/character-functions/trim.php 7006 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7007 7008 position = None 7009 collation = None 7010 expression = None 7011 7012 if self._match_texts(self.TRIM_TYPES): 7013 position = self._prev.text.upper() 7014 7015 this = self._parse_bitwise() 7016 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7017 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7018 expression = self._parse_bitwise() 7019 7020 if invert_order: 7021 this, expression = expression, this 7022 7023 if self._match(TokenType.COLLATE): 7024 collation = self._parse_bitwise() 7025 7026 return self.expression( 7027 exp.Trim, this=this, position=position, expression=expression, collation=collation 7028 ) 7029 7030 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7031 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7032 7033 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7034 return self._parse_window(self._parse_id_var(), alias=True) 7035 7036 def _parse_respect_or_ignore_nulls( 7037 self, this: t.Optional[exp.Expression] 7038 ) -> t.Optional[exp.Expression]: 7039 if self._match_text_seq("IGNORE", "NULLS"): 7040 return self.expression(exp.IgnoreNulls, this=this) 7041 if self._match_text_seq("RESPECT", "NULLS"): 7042 return self.expression(exp.RespectNulls, this=this) 7043 return this 7044 7045 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7046 if self._match(TokenType.HAVING): 7047 self._match_texts(("MAX", "MIN")) 7048 max = self._prev.text.upper() != "MIN" 7049 return self.expression( 7050 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7051 ) 7052 7053 return this 7054 7055 def _parse_window( 7056 self, this: t.Optional[exp.Expression], alias: bool = False 7057 ) -> t.Optional[exp.Expression]: 7058 func = this 7059 comments = func.comments if isinstance(func, exp.Expression) else None 7060 7061 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7062 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7063 if self._match_text_seq("WITHIN", "GROUP"): 7064 order = self._parse_wrapped(self._parse_order) 7065 this = self.expression(exp.WithinGroup, this=this, expression=order) 7066 7067 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7068 self._match(TokenType.WHERE) 7069 this = self.expression( 7070 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7071 ) 7072 self._match_r_paren() 7073 7074 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7075 # Some dialects choose to implement and some do not. 7076 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7077 7078 # There is some code above in _parse_lambda that handles 7079 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7080 7081 # The below changes handle 7082 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7083 7084 # Oracle allows both formats 7085 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7086 # and Snowflake chose to do the same for familiarity 7087 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7088 if isinstance(this, exp.AggFunc): 7089 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7090 7091 if ignore_respect and ignore_respect is not this: 7092 ignore_respect.replace(ignore_respect.this) 7093 this = self.expression(ignore_respect.__class__, this=this) 7094 7095 this = self._parse_respect_or_ignore_nulls(this) 7096 7097 # bigquery select from window x AS (partition by ...) 7098 if alias: 7099 over = None 7100 self._match(TokenType.ALIAS) 7101 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7102 return this 7103 else: 7104 over = self._prev.text.upper() 7105 7106 if comments and isinstance(func, exp.Expression): 7107 func.pop_comments() 7108 7109 if not self._match(TokenType.L_PAREN): 7110 return self.expression( 7111 exp.Window, 7112 comments=comments, 7113 this=this, 7114 alias=self._parse_id_var(False), 7115 over=over, 7116 ) 7117 7118 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7119 7120 first = self._match(TokenType.FIRST) 7121 if self._match_text_seq("LAST"): 7122 first = False 7123 7124 partition, order = self._parse_partition_and_order() 7125 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7126 7127 if kind: 7128 self._match(TokenType.BETWEEN) 7129 start = self._parse_window_spec() 7130 7131 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7132 exclude = ( 7133 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7134 if self._match_text_seq("EXCLUDE") 7135 else None 7136 ) 7137 7138 spec = self.expression( 7139 exp.WindowSpec, 7140 kind=kind, 7141 start=start["value"], 7142 start_side=start["side"], 7143 end=end.get("value"), 7144 end_side=end.get("side"), 7145 exclude=exclude, 7146 ) 7147 else: 7148 spec = None 7149 7150 self._match_r_paren() 7151 7152 window = self.expression( 7153 exp.Window, 7154 comments=comments, 7155 this=this, 7156 partition_by=partition, 7157 order=order, 7158 spec=spec, 7159 alias=window_alias, 7160 over=over, 7161 first=first, 7162 ) 7163 7164 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7165 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7166 return self._parse_window(window, alias=alias) 7167 7168 return window 7169 7170 def _parse_partition_and_order( 7171 self, 7172 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7173 return self._parse_partition_by(), self._parse_order() 7174 7175 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7176 self._match(TokenType.BETWEEN) 7177 7178 return { 7179 "value": ( 7180 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7181 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7182 or self._parse_type() 7183 ), 7184 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7185 } 7186 7187 def _parse_alias( 7188 self, this: t.Optional[exp.Expression], explicit: bool = False 7189 ) -> t.Optional[exp.Expression]: 7190 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7191 # so this section tries to parse the clause version and if it fails, it treats the token 7192 # as an identifier (alias) 7193 if self._can_parse_limit_or_offset(): 7194 return this 7195 7196 any_token = self._match(TokenType.ALIAS) 7197 comments = self._prev_comments or [] 7198 7199 if explicit and not any_token: 7200 return this 7201 7202 if self._match(TokenType.L_PAREN): 7203 aliases = self.expression( 7204 exp.Aliases, 7205 comments=comments, 7206 this=this, 7207 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7208 ) 7209 self._match_r_paren(aliases) 7210 return aliases 7211 7212 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7213 self.STRING_ALIASES and self._parse_string_as_identifier() 7214 ) 7215 7216 if alias: 7217 comments.extend(alias.pop_comments()) 7218 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7219 column = this.this 7220 7221 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7222 if not this.comments and column and column.comments: 7223 this.comments = column.pop_comments() 7224 7225 return this 7226 7227 def _parse_id_var( 7228 self, 7229 any_token: bool = True, 7230 tokens: t.Optional[t.Collection[TokenType]] = None, 7231 ) -> t.Optional[exp.Expression]: 7232 expression = self._parse_identifier() 7233 if not expression and ( 7234 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7235 ): 7236 quoted = self._prev.token_type == TokenType.STRING 7237 expression = self._identifier_expression(quoted=quoted) 7238 7239 return expression 7240 7241 def _parse_string(self) -> t.Optional[exp.Expression]: 7242 if self._match_set(self.STRING_PARSERS): 7243 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7244 return self._parse_placeholder() 7245 7246 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7247 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7248 if output: 7249 output.update_positions(self._prev) 7250 return output 7251 7252 def _parse_number(self) -> t.Optional[exp.Expression]: 7253 if self._match_set(self.NUMERIC_PARSERS): 7254 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7255 return self._parse_placeholder() 7256 7257 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7258 if self._match(TokenType.IDENTIFIER): 7259 return self._identifier_expression(quoted=True) 7260 return self._parse_placeholder() 7261 7262 def _parse_var( 7263 self, 7264 any_token: bool = False, 7265 tokens: t.Optional[t.Collection[TokenType]] = None, 7266 upper: bool = False, 7267 ) -> t.Optional[exp.Expression]: 7268 if ( 7269 (any_token and self._advance_any()) 7270 or self._match(TokenType.VAR) 7271 or (self._match_set(tokens) if tokens else False) 7272 ): 7273 return self.expression( 7274 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7275 ) 7276 return self._parse_placeholder() 7277 7278 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7279 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7280 self._advance() 7281 return self._prev 7282 return None 7283 7284 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7285 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7286 7287 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7288 return self._parse_primary() or self._parse_var(any_token=True) 7289 7290 def _parse_null(self) -> t.Optional[exp.Expression]: 7291 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7292 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7293 return self._parse_placeholder() 7294 7295 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7296 if self._match(TokenType.TRUE): 7297 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7298 if self._match(TokenType.FALSE): 7299 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7300 return self._parse_placeholder() 7301 7302 def _parse_star(self) -> t.Optional[exp.Expression]: 7303 if self._match(TokenType.STAR): 7304 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7305 return self._parse_placeholder() 7306 7307 def _parse_parameter(self) -> exp.Parameter: 7308 this = self._parse_identifier() or self._parse_primary_or_var() 7309 return self.expression(exp.Parameter, this=this) 7310 7311 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7312 if self._match_set(self.PLACEHOLDER_PARSERS): 7313 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7314 if placeholder: 7315 return placeholder 7316 self._advance(-1) 7317 return None 7318 7319 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7320 if not self._match_texts(keywords): 7321 return None 7322 if self._match(TokenType.L_PAREN, advance=False): 7323 return self._parse_wrapped_csv(self._parse_expression) 7324 7325 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7326 return [expression] if expression else None 7327 7328 def _parse_csv( 7329 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7330 ) -> t.List[exp.Expression]: 7331 parse_result = parse_method() 7332 items = [parse_result] if parse_result is not None else [] 7333 7334 while self._match(sep): 7335 self._add_comments(parse_result) 7336 parse_result = parse_method() 7337 if parse_result is not None: 7338 items.append(parse_result) 7339 7340 return items 7341 7342 def _parse_tokens( 7343 self, parse_method: t.Callable, expressions: t.Dict 7344 ) -> t.Optional[exp.Expression]: 7345 this = parse_method() 7346 7347 while self._match_set(expressions): 7348 this = self.expression( 7349 expressions[self._prev.token_type], 7350 this=this, 7351 comments=self._prev_comments, 7352 expression=parse_method(), 7353 ) 7354 7355 return this 7356 7357 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7358 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7359 7360 def _parse_wrapped_csv( 7361 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7362 ) -> t.List[exp.Expression]: 7363 return self._parse_wrapped( 7364 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7365 ) 7366 7367 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7368 wrapped = self._match(TokenType.L_PAREN) 7369 if not wrapped and not optional: 7370 self.raise_error("Expecting (") 7371 parse_result = parse_method() 7372 if wrapped: 7373 self._match_r_paren() 7374 return parse_result 7375 7376 def _parse_expressions(self) -> t.List[exp.Expression]: 7377 return self._parse_csv(self._parse_expression) 7378 7379 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7380 return ( 7381 self._parse_set_operations( 7382 self._parse_alias(self._parse_assignment(), explicit=True) 7383 if alias 7384 else self._parse_assignment() 7385 ) 7386 or self._parse_select() 7387 ) 7388 7389 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7390 return self._parse_query_modifiers( 7391 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7392 ) 7393 7394 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7395 this = None 7396 if self._match_texts(self.TRANSACTION_KIND): 7397 this = self._prev.text 7398 7399 self._match_texts(("TRANSACTION", "WORK")) 7400 7401 modes = [] 7402 while True: 7403 mode = [] 7404 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7405 mode.append(self._prev.text) 7406 7407 if mode: 7408 modes.append(" ".join(mode)) 7409 if not self._match(TokenType.COMMA): 7410 break 7411 7412 return self.expression(exp.Transaction, this=this, modes=modes) 7413 7414 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7415 chain = None 7416 savepoint = None 7417 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7418 7419 self._match_texts(("TRANSACTION", "WORK")) 7420 7421 if self._match_text_seq("TO"): 7422 self._match_text_seq("SAVEPOINT") 7423 savepoint = self._parse_id_var() 7424 7425 if self._match(TokenType.AND): 7426 chain = not self._match_text_seq("NO") 7427 self._match_text_seq("CHAIN") 7428 7429 if is_rollback: 7430 return self.expression(exp.Rollback, savepoint=savepoint) 7431 7432 return self.expression(exp.Commit, chain=chain) 7433 7434 def _parse_refresh(self) -> exp.Refresh: 7435 self._match(TokenType.TABLE) 7436 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7437 7438 def _parse_column_def_with_exists(self): 7439 start = self._index 7440 self._match(TokenType.COLUMN) 7441 7442 exists_column = self._parse_exists(not_=True) 7443 expression = self._parse_field_def() 7444 7445 if not isinstance(expression, exp.ColumnDef): 7446 self._retreat(start) 7447 return None 7448 7449 expression.set("exists", exists_column) 7450 7451 return expression 7452 7453 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7454 if not self._prev.text.upper() == "ADD": 7455 return None 7456 7457 expression = self._parse_column_def_with_exists() 7458 if not expression: 7459 return None 7460 7461 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7462 if self._match_texts(("FIRST", "AFTER")): 7463 position = self._prev.text 7464 column_position = self.expression( 7465 exp.ColumnPosition, this=self._parse_column(), position=position 7466 ) 7467 expression.set("position", column_position) 7468 7469 return expression 7470 7471 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7472 drop = self._match(TokenType.DROP) and self._parse_drop() 7473 if drop and not isinstance(drop, exp.Command): 7474 drop.set("kind", drop.args.get("kind", "COLUMN")) 7475 return drop 7476 7477 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7478 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7479 return self.expression( 7480 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7481 ) 7482 7483 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7484 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7485 self._match_text_seq("ADD") 7486 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7487 return self.expression( 7488 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7489 ) 7490 7491 column_def = self._parse_add_column() 7492 if isinstance(column_def, exp.ColumnDef): 7493 return column_def 7494 7495 exists = self._parse_exists(not_=True) 7496 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7497 return self.expression( 7498 exp.AddPartition, 7499 exists=exists, 7500 this=self._parse_field(any_token=True), 7501 location=self._match_text_seq("LOCATION", advance=False) 7502 and self._parse_property(), 7503 ) 7504 7505 return None 7506 7507 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7508 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7509 or self._match_text_seq("COLUMNS") 7510 ): 7511 schema = self._parse_schema() 7512 7513 return ( 7514 ensure_list(schema) 7515 if schema 7516 else self._parse_csv(self._parse_column_def_with_exists) 7517 ) 7518 7519 return self._parse_csv(_parse_add_alteration) 7520 7521 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7522 if self._match_texts(self.ALTER_ALTER_PARSERS): 7523 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7524 7525 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7526 # keyword after ALTER we default to parsing this statement 7527 self._match(TokenType.COLUMN) 7528 column = self._parse_field(any_token=True) 7529 7530 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7531 return self.expression(exp.AlterColumn, this=column, drop=True) 7532 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7533 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7534 if self._match(TokenType.COMMENT): 7535 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7536 if self._match_text_seq("DROP", "NOT", "NULL"): 7537 return self.expression( 7538 exp.AlterColumn, 7539 this=column, 7540 drop=True, 7541 allow_null=True, 7542 ) 7543 if self._match_text_seq("SET", "NOT", "NULL"): 7544 return self.expression( 7545 exp.AlterColumn, 7546 this=column, 7547 allow_null=False, 7548 ) 7549 7550 if self._match_text_seq("SET", "VISIBLE"): 7551 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7552 if self._match_text_seq("SET", "INVISIBLE"): 7553 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7554 7555 self._match_text_seq("SET", "DATA") 7556 self._match_text_seq("TYPE") 7557 return self.expression( 7558 exp.AlterColumn, 7559 this=column, 7560 dtype=self._parse_types(), 7561 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7562 using=self._match(TokenType.USING) and self._parse_assignment(), 7563 ) 7564 7565 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7566 if self._match_texts(("ALL", "EVEN", "AUTO")): 7567 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7568 7569 self._match_text_seq("KEY", "DISTKEY") 7570 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7571 7572 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7573 if compound: 7574 self._match_text_seq("SORTKEY") 7575 7576 if self._match(TokenType.L_PAREN, advance=False): 7577 return self.expression( 7578 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7579 ) 7580 7581 self._match_texts(("AUTO", "NONE")) 7582 return self.expression( 7583 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7584 ) 7585 7586 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7587 index = self._index - 1 7588 7589 partition_exists = self._parse_exists() 7590 if self._match(TokenType.PARTITION, advance=False): 7591 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7592 7593 self._retreat(index) 7594 return self._parse_csv(self._parse_drop_column) 7595 7596 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7597 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7598 exists = self._parse_exists() 7599 old_column = self._parse_column() 7600 to = self._match_text_seq("TO") 7601 new_column = self._parse_column() 7602 7603 if old_column is None or to is None or new_column is None: 7604 return None 7605 7606 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7607 7608 self._match_text_seq("TO") 7609 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7610 7611 def _parse_alter_table_set(self) -> exp.AlterSet: 7612 alter_set = self.expression(exp.AlterSet) 7613 7614 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7615 "TABLE", "PROPERTIES" 7616 ): 7617 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7618 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7619 alter_set.set("expressions", [self._parse_assignment()]) 7620 elif self._match_texts(("LOGGED", "UNLOGGED")): 7621 alter_set.set("option", exp.var(self._prev.text.upper())) 7622 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7623 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7624 elif self._match_text_seq("LOCATION"): 7625 alter_set.set("location", self._parse_field()) 7626 elif self._match_text_seq("ACCESS", "METHOD"): 7627 alter_set.set("access_method", self._parse_field()) 7628 elif self._match_text_seq("TABLESPACE"): 7629 alter_set.set("tablespace", self._parse_field()) 7630 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7631 alter_set.set("file_format", [self._parse_field()]) 7632 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7633 alter_set.set("file_format", self._parse_wrapped_options()) 7634 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7635 alter_set.set("copy_options", self._parse_wrapped_options()) 7636 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7637 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7638 else: 7639 if self._match_text_seq("SERDE"): 7640 alter_set.set("serde", self._parse_field()) 7641 7642 properties = self._parse_wrapped(self._parse_properties, optional=True) 7643 alter_set.set("expressions", [properties]) 7644 7645 return alter_set 7646 7647 def _parse_alter_session(self) -> exp.AlterSession: 7648 """Parse ALTER SESSION SET/UNSET statements.""" 7649 if self._match(TokenType.SET): 7650 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7651 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7652 7653 self._match_text_seq("UNSET") 7654 expressions = self._parse_csv( 7655 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7656 ) 7657 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7658 7659 def _parse_alter(self) -> exp.Alter | exp.Command: 7660 start = self._prev 7661 7662 alter_token = self._match_set(self.ALTERABLES) and self._prev 7663 if not alter_token: 7664 return self._parse_as_command(start) 7665 7666 exists = self._parse_exists() 7667 only = self._match_text_seq("ONLY") 7668 7669 if alter_token.token_type == TokenType.SESSION: 7670 this = None 7671 check = None 7672 cluster = None 7673 else: 7674 this = self._parse_table(schema=True) 7675 check = self._match_text_seq("WITH", "CHECK") 7676 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7677 7678 if self._next: 7679 self._advance() 7680 7681 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7682 if parser: 7683 actions = ensure_list(parser(self)) 7684 not_valid = self._match_text_seq("NOT", "VALID") 7685 options = self._parse_csv(self._parse_property) 7686 7687 if not self._curr and actions: 7688 return self.expression( 7689 exp.Alter, 7690 this=this, 7691 kind=alter_token.text.upper(), 7692 exists=exists, 7693 actions=actions, 7694 only=only, 7695 options=options, 7696 cluster=cluster, 7697 not_valid=not_valid, 7698 check=check, 7699 ) 7700 7701 return self._parse_as_command(start) 7702 7703 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7704 start = self._prev 7705 # https://duckdb.org/docs/sql/statements/analyze 7706 if not self._curr: 7707 return self.expression(exp.Analyze) 7708 7709 options = [] 7710 while self._match_texts(self.ANALYZE_STYLES): 7711 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7712 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7713 else: 7714 options.append(self._prev.text.upper()) 7715 7716 this: t.Optional[exp.Expression] = None 7717 inner_expression: t.Optional[exp.Expression] = None 7718 7719 kind = self._curr and self._curr.text.upper() 7720 7721 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7722 this = self._parse_table_parts() 7723 elif self._match_text_seq("TABLES"): 7724 if self._match_set((TokenType.FROM, TokenType.IN)): 7725 kind = f"{kind} {self._prev.text.upper()}" 7726 this = self._parse_table(schema=True, is_db_reference=True) 7727 elif self._match_text_seq("DATABASE"): 7728 this = self._parse_table(schema=True, is_db_reference=True) 7729 elif self._match_text_seq("CLUSTER"): 7730 this = self._parse_table() 7731 # Try matching inner expr keywords before fallback to parse table. 7732 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7733 kind = None 7734 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7735 else: 7736 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7737 kind = None 7738 this = self._parse_table_parts() 7739 7740 partition = self._try_parse(self._parse_partition) 7741 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7742 return self._parse_as_command(start) 7743 7744 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7745 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7746 "WITH", "ASYNC", "MODE" 7747 ): 7748 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7749 else: 7750 mode = None 7751 7752 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7753 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7754 7755 properties = self._parse_properties() 7756 return self.expression( 7757 exp.Analyze, 7758 kind=kind, 7759 this=this, 7760 mode=mode, 7761 partition=partition, 7762 properties=properties, 7763 expression=inner_expression, 7764 options=options, 7765 ) 7766 7767 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7768 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7769 this = None 7770 kind = self._prev.text.upper() 7771 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7772 expressions = [] 7773 7774 if not self._match_text_seq("STATISTICS"): 7775 self.raise_error("Expecting token STATISTICS") 7776 7777 if self._match_text_seq("NOSCAN"): 7778 this = "NOSCAN" 7779 elif self._match(TokenType.FOR): 7780 if self._match_text_seq("ALL", "COLUMNS"): 7781 this = "FOR ALL COLUMNS" 7782 if self._match_texts("COLUMNS"): 7783 this = "FOR COLUMNS" 7784 expressions = self._parse_csv(self._parse_column_reference) 7785 elif self._match_text_seq("SAMPLE"): 7786 sample = self._parse_number() 7787 expressions = [ 7788 self.expression( 7789 exp.AnalyzeSample, 7790 sample=sample, 7791 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7792 ) 7793 ] 7794 7795 return self.expression( 7796 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7797 ) 7798 7799 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7800 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7801 kind = None 7802 this = None 7803 expression: t.Optional[exp.Expression] = None 7804 if self._match_text_seq("REF", "UPDATE"): 7805 kind = "REF" 7806 this = "UPDATE" 7807 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7808 this = "UPDATE SET DANGLING TO NULL" 7809 elif self._match_text_seq("STRUCTURE"): 7810 kind = "STRUCTURE" 7811 if self._match_text_seq("CASCADE", "FAST"): 7812 this = "CASCADE FAST" 7813 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7814 ("ONLINE", "OFFLINE") 7815 ): 7816 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7817 expression = self._parse_into() 7818 7819 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7820 7821 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7822 this = self._prev.text.upper() 7823 if self._match_text_seq("COLUMNS"): 7824 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7825 return None 7826 7827 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7828 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7829 if self._match_text_seq("STATISTICS"): 7830 return self.expression(exp.AnalyzeDelete, kind=kind) 7831 return None 7832 7833 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7834 if self._match_text_seq("CHAINED", "ROWS"): 7835 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7836 return None 7837 7838 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7839 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7840 this = self._prev.text.upper() 7841 expression: t.Optional[exp.Expression] = None 7842 expressions = [] 7843 update_options = None 7844 7845 if self._match_text_seq("HISTOGRAM", "ON"): 7846 expressions = self._parse_csv(self._parse_column_reference) 7847 with_expressions = [] 7848 while self._match(TokenType.WITH): 7849 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7850 if self._match_texts(("SYNC", "ASYNC")): 7851 if self._match_text_seq("MODE", advance=False): 7852 with_expressions.append(f"{self._prev.text.upper()} MODE") 7853 self._advance() 7854 else: 7855 buckets = self._parse_number() 7856 if self._match_text_seq("BUCKETS"): 7857 with_expressions.append(f"{buckets} BUCKETS") 7858 if with_expressions: 7859 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7860 7861 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7862 TokenType.UPDATE, advance=False 7863 ): 7864 update_options = self._prev.text.upper() 7865 self._advance() 7866 elif self._match_text_seq("USING", "DATA"): 7867 expression = self.expression(exp.UsingData, this=self._parse_string()) 7868 7869 return self.expression( 7870 exp.AnalyzeHistogram, 7871 this=this, 7872 expressions=expressions, 7873 expression=expression, 7874 update_options=update_options, 7875 ) 7876 7877 def _parse_merge(self) -> exp.Merge: 7878 self._match(TokenType.INTO) 7879 target = self._parse_table() 7880 7881 if target and self._match(TokenType.ALIAS, advance=False): 7882 target.set("alias", self._parse_table_alias()) 7883 7884 self._match(TokenType.USING) 7885 using = self._parse_table() 7886 7887 self._match(TokenType.ON) 7888 on = self._parse_assignment() 7889 7890 return self.expression( 7891 exp.Merge, 7892 this=target, 7893 using=using, 7894 on=on, 7895 whens=self._parse_when_matched(), 7896 returning=self._parse_returning(), 7897 ) 7898 7899 def _parse_when_matched(self) -> exp.Whens: 7900 whens = [] 7901 7902 while self._match(TokenType.WHEN): 7903 matched = not self._match(TokenType.NOT) 7904 self._match_text_seq("MATCHED") 7905 source = ( 7906 False 7907 if self._match_text_seq("BY", "TARGET") 7908 else self._match_text_seq("BY", "SOURCE") 7909 ) 7910 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7911 7912 self._match(TokenType.THEN) 7913 7914 if self._match(TokenType.INSERT): 7915 this = self._parse_star() 7916 if this: 7917 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7918 else: 7919 then = self.expression( 7920 exp.Insert, 7921 this=exp.var("ROW") 7922 if self._match_text_seq("ROW") 7923 else self._parse_value(values=False), 7924 expression=self._match_text_seq("VALUES") and self._parse_value(), 7925 ) 7926 elif self._match(TokenType.UPDATE): 7927 expressions = self._parse_star() 7928 if expressions: 7929 then = self.expression(exp.Update, expressions=expressions) 7930 else: 7931 then = self.expression( 7932 exp.Update, 7933 expressions=self._match(TokenType.SET) 7934 and self._parse_csv(self._parse_equality), 7935 ) 7936 elif self._match(TokenType.DELETE): 7937 then = self.expression(exp.Var, this=self._prev.text) 7938 else: 7939 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7940 7941 whens.append( 7942 self.expression( 7943 exp.When, 7944 matched=matched, 7945 source=source, 7946 condition=condition, 7947 then=then, 7948 ) 7949 ) 7950 return self.expression(exp.Whens, expressions=whens) 7951 7952 def _parse_show(self) -> t.Optional[exp.Expression]: 7953 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7954 if parser: 7955 return parser(self) 7956 return self._parse_as_command(self._prev) 7957 7958 def _parse_set_item_assignment( 7959 self, kind: t.Optional[str] = None 7960 ) -> t.Optional[exp.Expression]: 7961 index = self._index 7962 7963 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7964 return self._parse_set_transaction(global_=kind == "GLOBAL") 7965 7966 left = self._parse_primary() or self._parse_column() 7967 assignment_delimiter = self._match_texts(("=", "TO")) 7968 7969 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7970 self._retreat(index) 7971 return None 7972 7973 right = self._parse_statement() or self._parse_id_var() 7974 if isinstance(right, (exp.Column, exp.Identifier)): 7975 right = exp.var(right.name) 7976 7977 this = self.expression(exp.EQ, this=left, expression=right) 7978 return self.expression(exp.SetItem, this=this, kind=kind) 7979 7980 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7981 self._match_text_seq("TRANSACTION") 7982 characteristics = self._parse_csv( 7983 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7984 ) 7985 return self.expression( 7986 exp.SetItem, 7987 expressions=characteristics, 7988 kind="TRANSACTION", 7989 **{"global": global_}, # type: ignore 7990 ) 7991 7992 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7993 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7994 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7995 7996 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7997 index = self._index 7998 set_ = self.expression( 7999 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8000 ) 8001 8002 if self._curr: 8003 self._retreat(index) 8004 return self._parse_as_command(self._prev) 8005 8006 return set_ 8007 8008 def _parse_var_from_options( 8009 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8010 ) -> t.Optional[exp.Var]: 8011 start = self._curr 8012 if not start: 8013 return None 8014 8015 option = start.text.upper() 8016 continuations = options.get(option) 8017 8018 index = self._index 8019 self._advance() 8020 for keywords in continuations or []: 8021 if isinstance(keywords, str): 8022 keywords = (keywords,) 8023 8024 if self._match_text_seq(*keywords): 8025 option = f"{option} {' '.join(keywords)}" 8026 break 8027 else: 8028 if continuations or continuations is None: 8029 if raise_unmatched: 8030 self.raise_error(f"Unknown option {option}") 8031 8032 self._retreat(index) 8033 return None 8034 8035 return exp.var(option) 8036 8037 def _parse_as_command(self, start: Token) -> exp.Command: 8038 while self._curr: 8039 self._advance() 8040 text = self._find_sql(start, self._prev) 8041 size = len(start.text) 8042 self._warn_unsupported() 8043 return exp.Command(this=text[:size], expression=text[size:]) 8044 8045 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8046 settings = [] 8047 8048 self._match_l_paren() 8049 kind = self._parse_id_var() 8050 8051 if self._match(TokenType.L_PAREN): 8052 while True: 8053 key = self._parse_id_var() 8054 value = self._parse_primary() 8055 if not key and value is None: 8056 break 8057 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8058 self._match(TokenType.R_PAREN) 8059 8060 self._match_r_paren() 8061 8062 return self.expression( 8063 exp.DictProperty, 8064 this=this, 8065 kind=kind.this if kind else None, 8066 settings=settings, 8067 ) 8068 8069 def _parse_dict_range(self, this: str) -> exp.DictRange: 8070 self._match_l_paren() 8071 has_min = self._match_text_seq("MIN") 8072 if has_min: 8073 min = self._parse_var() or self._parse_primary() 8074 self._match_text_seq("MAX") 8075 max = self._parse_var() or self._parse_primary() 8076 else: 8077 max = self._parse_var() or self._parse_primary() 8078 min = exp.Literal.number(0) 8079 self._match_r_paren() 8080 return self.expression(exp.DictRange, this=this, min=min, max=max) 8081 8082 def _parse_comprehension( 8083 self, this: t.Optional[exp.Expression] 8084 ) -> t.Optional[exp.Comprehension]: 8085 index = self._index 8086 expression = self._parse_column() 8087 if not self._match(TokenType.IN): 8088 self._retreat(index - 1) 8089 return None 8090 iterator = self._parse_column() 8091 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8092 return self.expression( 8093 exp.Comprehension, 8094 this=this, 8095 expression=expression, 8096 iterator=iterator, 8097 condition=condition, 8098 ) 8099 8100 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8101 if self._match(TokenType.HEREDOC_STRING): 8102 return self.expression(exp.Heredoc, this=self._prev.text) 8103 8104 if not self._match_text_seq("$"): 8105 return None 8106 8107 tags = ["$"] 8108 tag_text = None 8109 8110 if self._is_connected(): 8111 self._advance() 8112 tags.append(self._prev.text.upper()) 8113 else: 8114 self.raise_error("No closing $ found") 8115 8116 if tags[-1] != "$": 8117 if self._is_connected() and self._match_text_seq("$"): 8118 tag_text = tags[-1] 8119 tags.append("$") 8120 else: 8121 self.raise_error("No closing $ found") 8122 8123 heredoc_start = self._curr 8124 8125 while self._curr: 8126 if self._match_text_seq(*tags, advance=False): 8127 this = self._find_sql(heredoc_start, self._prev) 8128 self._advance(len(tags)) 8129 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8130 8131 self._advance() 8132 8133 self.raise_error(f"No closing {''.join(tags)} found") 8134 return None 8135 8136 def _find_parser( 8137 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8138 ) -> t.Optional[t.Callable]: 8139 if not self._curr: 8140 return None 8141 8142 index = self._index 8143 this = [] 8144 while True: 8145 # The current token might be multiple words 8146 curr = self._curr.text.upper() 8147 key = curr.split(" ") 8148 this.append(curr) 8149 8150 self._advance() 8151 result, trie = in_trie(trie, key) 8152 if result == TrieResult.FAILED: 8153 break 8154 8155 if result == TrieResult.EXISTS: 8156 subparser = parsers[" ".join(this)] 8157 return subparser 8158 8159 self._retreat(index) 8160 return None 8161 8162 def _match(self, token_type, advance=True, expression=None): 8163 if not self._curr: 8164 return None 8165 8166 if self._curr.token_type == token_type: 8167 if advance: 8168 self._advance() 8169 self._add_comments(expression) 8170 return True 8171 8172 return None 8173 8174 def _match_set(self, types, advance=True): 8175 if not self._curr: 8176 return None 8177 8178 if self._curr.token_type in types: 8179 if advance: 8180 self._advance() 8181 return True 8182 8183 return None 8184 8185 def _match_pair(self, token_type_a, token_type_b, advance=True): 8186 if not self._curr or not self._next: 8187 return None 8188 8189 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8190 if advance: 8191 self._advance(2) 8192 return True 8193 8194 return None 8195 8196 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8197 if not self._match(TokenType.L_PAREN, expression=expression): 8198 self.raise_error("Expecting (") 8199 8200 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8201 if not self._match(TokenType.R_PAREN, expression=expression): 8202 self.raise_error("Expecting )") 8203 8204 def _match_texts(self, texts, advance=True): 8205 if ( 8206 self._curr 8207 and self._curr.token_type != TokenType.STRING 8208 and self._curr.text.upper() in texts 8209 ): 8210 if advance: 8211 self._advance() 8212 return True 8213 return None 8214 8215 def _match_text_seq(self, *texts, advance=True): 8216 index = self._index 8217 for text in texts: 8218 if ( 8219 self._curr 8220 and self._curr.token_type != TokenType.STRING 8221 and self._curr.text.upper() == text 8222 ): 8223 self._advance() 8224 else: 8225 self._retreat(index) 8226 return None 8227 8228 if not advance: 8229 self._retreat(index) 8230 8231 return True 8232 8233 def _replace_lambda( 8234 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8235 ) -> t.Optional[exp.Expression]: 8236 if not node: 8237 return node 8238 8239 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8240 8241 for column in node.find_all(exp.Column): 8242 typ = lambda_types.get(column.parts[0].name) 8243 if typ is not None: 8244 dot_or_id = column.to_dot() if column.table else column.this 8245 8246 if typ: 8247 dot_or_id = self.expression( 8248 exp.Cast, 8249 this=dot_or_id, 8250 to=typ, 8251 ) 8252 8253 parent = column.parent 8254 8255 while isinstance(parent, exp.Dot): 8256 if not isinstance(parent.parent, exp.Dot): 8257 parent.replace(dot_or_id) 8258 break 8259 parent = parent.parent 8260 else: 8261 if column is node: 8262 node = dot_or_id 8263 else: 8264 column.replace(dot_or_id) 8265 return node 8266 8267 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8268 start = self._prev 8269 8270 # Not to be confused with TRUNCATE(number, decimals) function call 8271 if self._match(TokenType.L_PAREN): 8272 self._retreat(self._index - 2) 8273 return self._parse_function() 8274 8275 # Clickhouse supports TRUNCATE DATABASE as well 8276 is_database = self._match(TokenType.DATABASE) 8277 8278 self._match(TokenType.TABLE) 8279 8280 exists = self._parse_exists(not_=False) 8281 8282 expressions = self._parse_csv( 8283 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8284 ) 8285 8286 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8287 8288 if self._match_text_seq("RESTART", "IDENTITY"): 8289 identity = "RESTART" 8290 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8291 identity = "CONTINUE" 8292 else: 8293 identity = None 8294 8295 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8296 option = self._prev.text 8297 else: 8298 option = None 8299 8300 partition = self._parse_partition() 8301 8302 # Fallback case 8303 if self._curr: 8304 return self._parse_as_command(start) 8305 8306 return self.expression( 8307 exp.TruncateTable, 8308 expressions=expressions, 8309 is_database=is_database, 8310 exists=exists, 8311 cluster=cluster, 8312 identity=identity, 8313 option=option, 8314 partition=partition, 8315 ) 8316 8317 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8318 this = self._parse_ordered(self._parse_opclass) 8319 8320 if not self._match(TokenType.WITH): 8321 return this 8322 8323 op = self._parse_var(any_token=True) 8324 8325 return self.expression(exp.WithOperator, this=this, op=op) 8326 8327 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8328 self._match(TokenType.EQ) 8329 self._match(TokenType.L_PAREN) 8330 8331 opts: t.List[t.Optional[exp.Expression]] = [] 8332 option: exp.Expression | None 8333 while self._curr and not self._match(TokenType.R_PAREN): 8334 if self._match_text_seq("FORMAT_NAME", "="): 8335 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8336 option = self._parse_format_name() 8337 else: 8338 option = self._parse_property() 8339 8340 if option is None: 8341 self.raise_error("Unable to parse option") 8342 break 8343 8344 opts.append(option) 8345 8346 return opts 8347 8348 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8349 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8350 8351 options = [] 8352 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8353 option = self._parse_var(any_token=True) 8354 prev = self._prev.text.upper() 8355 8356 # Different dialects might separate options and values by white space, "=" and "AS" 8357 self._match(TokenType.EQ) 8358 self._match(TokenType.ALIAS) 8359 8360 param = self.expression(exp.CopyParameter, this=option) 8361 8362 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8363 TokenType.L_PAREN, advance=False 8364 ): 8365 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8366 param.set("expressions", self._parse_wrapped_options()) 8367 elif prev == "FILE_FORMAT": 8368 # T-SQL's external file format case 8369 param.set("expression", self._parse_field()) 8370 else: 8371 param.set("expression", self._parse_unquoted_field()) 8372 8373 options.append(param) 8374 self._match(sep) 8375 8376 return options 8377 8378 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8379 expr = self.expression(exp.Credentials) 8380 8381 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8382 expr.set("storage", self._parse_field()) 8383 if self._match_text_seq("CREDENTIALS"): 8384 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8385 creds = ( 8386 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8387 ) 8388 expr.set("credentials", creds) 8389 if self._match_text_seq("ENCRYPTION"): 8390 expr.set("encryption", self._parse_wrapped_options()) 8391 if self._match_text_seq("IAM_ROLE"): 8392 expr.set("iam_role", self._parse_field()) 8393 if self._match_text_seq("REGION"): 8394 expr.set("region", self._parse_field()) 8395 8396 return expr 8397 8398 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8399 return self._parse_field() 8400 8401 def _parse_copy(self) -> exp.Copy | exp.Command: 8402 start = self._prev 8403 8404 self._match(TokenType.INTO) 8405 8406 this = ( 8407 self._parse_select(nested=True, parse_subquery_alias=False) 8408 if self._match(TokenType.L_PAREN, advance=False) 8409 else self._parse_table(schema=True) 8410 ) 8411 8412 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8413 8414 files = self._parse_csv(self._parse_file_location) 8415 if self._match(TokenType.EQ, advance=False): 8416 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8417 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8418 # list via `_parse_wrapped(..)` below. 8419 self._advance(-1) 8420 files = [] 8421 8422 credentials = self._parse_credentials() 8423 8424 self._match_text_seq("WITH") 8425 8426 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8427 8428 # Fallback case 8429 if self._curr: 8430 return self._parse_as_command(start) 8431 8432 return self.expression( 8433 exp.Copy, 8434 this=this, 8435 kind=kind, 8436 credentials=credentials, 8437 files=files, 8438 params=params, 8439 ) 8440 8441 def _parse_normalize(self) -> exp.Normalize: 8442 return self.expression( 8443 exp.Normalize, 8444 this=self._parse_bitwise(), 8445 form=self._match(TokenType.COMMA) and self._parse_var(), 8446 ) 8447 8448 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8449 args = self._parse_csv(lambda: self._parse_lambda()) 8450 8451 this = seq_get(args, 0) 8452 decimals = seq_get(args, 1) 8453 8454 return expr_type( 8455 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8456 ) 8457 8458 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8459 star_token = self._prev 8460 8461 if self._match_text_seq("COLUMNS", "(", advance=False): 8462 this = self._parse_function() 8463 if isinstance(this, exp.Columns): 8464 this.set("unpack", True) 8465 return this 8466 8467 return self.expression( 8468 exp.Star, 8469 **{ # type: ignore 8470 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8471 "replace": self._parse_star_op("REPLACE"), 8472 "rename": self._parse_star_op("RENAME"), 8473 }, 8474 ).update_positions(star_token) 8475 8476 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8477 privilege_parts = [] 8478 8479 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8480 # (end of privilege list) or L_PAREN (start of column list) are met 8481 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8482 privilege_parts.append(self._curr.text.upper()) 8483 self._advance() 8484 8485 this = exp.var(" ".join(privilege_parts)) 8486 expressions = ( 8487 self._parse_wrapped_csv(self._parse_column) 8488 if self._match(TokenType.L_PAREN, advance=False) 8489 else None 8490 ) 8491 8492 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8493 8494 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8495 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8496 principal = self._parse_id_var() 8497 8498 if not principal: 8499 return None 8500 8501 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8502 8503 def _parse_grant_revoke_common( 8504 self, 8505 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8506 privileges = self._parse_csv(self._parse_grant_privilege) 8507 8508 self._match(TokenType.ON) 8509 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8510 8511 # Attempt to parse the securable e.g. MySQL allows names 8512 # such as "foo.*", "*.*" which are not easily parseable yet 8513 securable = self._try_parse(self._parse_table_parts) 8514 8515 return privileges, kind, securable 8516 8517 def _parse_grant(self) -> exp.Grant | exp.Command: 8518 start = self._prev 8519 8520 privileges, kind, securable = self._parse_grant_revoke_common() 8521 8522 if not securable or not self._match_text_seq("TO"): 8523 return self._parse_as_command(start) 8524 8525 principals = self._parse_csv(self._parse_grant_principal) 8526 8527 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8528 8529 if self._curr: 8530 return self._parse_as_command(start) 8531 8532 return self.expression( 8533 exp.Grant, 8534 privileges=privileges, 8535 kind=kind, 8536 securable=securable, 8537 principals=principals, 8538 grant_option=grant_option, 8539 ) 8540 8541 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8542 start = self._prev 8543 8544 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8545 8546 privileges, kind, securable = self._parse_grant_revoke_common() 8547 8548 if not securable or not self._match_text_seq("FROM"): 8549 return self._parse_as_command(start) 8550 8551 principals = self._parse_csv(self._parse_grant_principal) 8552 8553 cascade = None 8554 if self._match_texts(("CASCADE", "RESTRICT")): 8555 cascade = self._prev.text.upper() 8556 8557 if self._curr: 8558 return self._parse_as_command(start) 8559 8560 return self.expression( 8561 exp.Revoke, 8562 privileges=privileges, 8563 kind=kind, 8564 securable=securable, 8565 principals=principals, 8566 grant_option=grant_option, 8567 cascade=cascade, 8568 ) 8569 8570 def _parse_overlay(self) -> exp.Overlay: 8571 return self.expression( 8572 exp.Overlay, 8573 **{ # type: ignore 8574 "this": self._parse_bitwise(), 8575 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8576 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8577 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8578 }, 8579 ) 8580 8581 def _parse_format_name(self) -> exp.Property: 8582 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8583 # for FILE_FORMAT = <format_name> 8584 return self.expression( 8585 exp.Property, 8586 this=exp.var("FORMAT_NAME"), 8587 value=self._parse_string() or self._parse_table_parts(), 8588 ) 8589 8590 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8591 args: t.List[exp.Expression] = [] 8592 8593 if self._match(TokenType.DISTINCT): 8594 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8595 self._match(TokenType.COMMA) 8596 8597 args.extend(self._parse_csv(self._parse_assignment)) 8598 8599 return self.expression( 8600 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8601 ) 8602 8603 def _identifier_expression( 8604 self, token: t.Optional[Token] = None, **kwargs: t.Any 8605 ) -> exp.Identifier: 8606 token = token or self._prev 8607 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8608 expression.update_positions(token) 8609 return expression 8610 8611 def _build_pipe_cte( 8612 self, 8613 query: exp.Query, 8614 expressions: t.List[exp.Expression], 8615 alias_cte: t.Optional[exp.TableAlias] = None, 8616 ) -> exp.Select: 8617 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8618 if alias_cte: 8619 new_cte = alias_cte 8620 else: 8621 self._pipe_cte_counter += 1 8622 new_cte = f"__tmp{self._pipe_cte_counter}" 8623 8624 with_ = query.args.get("with") 8625 ctes = with_.pop() if with_ else None 8626 8627 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8628 if ctes: 8629 new_select.set("with", ctes) 8630 8631 return new_select.with_(new_cte, as_=query, copy=False) 8632 8633 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8634 select = self._parse_select(consume_pipe=False) 8635 if not select: 8636 return query 8637 8638 return self._build_pipe_cte( 8639 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8640 ) 8641 8642 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8643 limit = self._parse_limit() 8644 offset = self._parse_offset() 8645 if limit: 8646 curr_limit = query.args.get("limit", limit) 8647 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8648 query.limit(limit, copy=False) 8649 if offset: 8650 curr_offset = query.args.get("offset") 8651 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8652 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8653 8654 return query 8655 8656 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8657 this = self._parse_assignment() 8658 if self._match_text_seq("GROUP", "AND", advance=False): 8659 return this 8660 8661 this = self._parse_alias(this) 8662 8663 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8664 return self._parse_ordered(lambda: this) 8665 8666 return this 8667 8668 def _parse_pipe_syntax_aggregate_group_order_by( 8669 self, query: exp.Select, group_by_exists: bool = True 8670 ) -> exp.Select: 8671 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8672 aggregates_or_groups, orders = [], [] 8673 for element in expr: 8674 if isinstance(element, exp.Ordered): 8675 this = element.this 8676 if isinstance(this, exp.Alias): 8677 element.set("this", this.args["alias"]) 8678 orders.append(element) 8679 else: 8680 this = element 8681 aggregates_or_groups.append(this) 8682 8683 if group_by_exists: 8684 query.select(*aggregates_or_groups, copy=False).group_by( 8685 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8686 copy=False, 8687 ) 8688 else: 8689 query.select(*aggregates_or_groups, append=False, copy=False) 8690 8691 if orders: 8692 return query.order_by(*orders, append=False, copy=False) 8693 8694 return query 8695 8696 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8697 self._match_text_seq("AGGREGATE") 8698 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8699 8700 if self._match(TokenType.GROUP_BY) or ( 8701 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8702 ): 8703 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8704 8705 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8706 8707 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8708 first_setop = self.parse_set_operation(this=query) 8709 if not first_setop: 8710 return None 8711 8712 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8713 expr = self._parse_paren() 8714 return expr.assert_is(exp.Subquery).unnest() if expr else None 8715 8716 first_setop.this.pop() 8717 8718 setops = [ 8719 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8720 *self._parse_csv(_parse_and_unwrap_query), 8721 ] 8722 8723 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8724 with_ = query.args.get("with") 8725 ctes = with_.pop() if with_ else None 8726 8727 if isinstance(first_setop, exp.Union): 8728 query = query.union(*setops, copy=False, **first_setop.args) 8729 elif isinstance(first_setop, exp.Except): 8730 query = query.except_(*setops, copy=False, **first_setop.args) 8731 else: 8732 query = query.intersect(*setops, copy=False, **first_setop.args) 8733 8734 query.set("with", ctes) 8735 8736 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8737 8738 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8739 join = self._parse_join() 8740 if not join: 8741 return None 8742 8743 if isinstance(query, exp.Select): 8744 return query.join(join, copy=False) 8745 8746 return query 8747 8748 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8749 pivots = self._parse_pivots() 8750 if not pivots: 8751 return query 8752 8753 from_ = query.args.get("from") 8754 if from_: 8755 from_.this.set("pivots", pivots) 8756 8757 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8758 8759 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8760 self._match_text_seq("EXTEND") 8761 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8762 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8763 8764 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8765 sample = self._parse_table_sample() 8766 8767 with_ = query.args.get("with") 8768 if with_: 8769 with_.expressions[-1].this.set("sample", sample) 8770 else: 8771 query.set("sample", sample) 8772 8773 return query 8774 8775 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8776 if isinstance(query, exp.Subquery): 8777 query = exp.select("*").from_(query, copy=False) 8778 8779 if not query.args.get("from"): 8780 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8781 8782 while self._match(TokenType.PIPE_GT): 8783 start = self._curr 8784 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8785 if not parser: 8786 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8787 # keywords, making it tricky to disambiguate them without lookahead. The approach 8788 # here is to try and parse a set operation and if that fails, then try to parse a 8789 # join operator. If that fails as well, then the operator is not supported. 8790 parsed_query = self._parse_pipe_syntax_set_operator(query) 8791 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8792 if not parsed_query: 8793 self._retreat(start) 8794 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8795 break 8796 query = parsed_query 8797 else: 8798 query = parser(self, query) 8799 8800 return query 8801 8802 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8803 vars = self._parse_csv(self._parse_id_var) 8804 if not vars: 8805 return None 8806 8807 return self.expression( 8808 exp.DeclareItem, 8809 this=vars, 8810 kind=self._parse_types(), 8811 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8812 ) 8813 8814 def _parse_declare(self) -> exp.Declare | exp.Command: 8815 start = self._prev 8816 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8817 8818 if not expressions or self._curr: 8819 return self._parse_as_command(start) 8820 8821 return self.expression(exp.Declare, expressions=expressions) 8822 8823 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8824 exp_class = exp.Cast if strict else exp.TryCast 8825 8826 if exp_class == exp.TryCast: 8827 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8828 8829 return self.expression(exp_class, **kwargs) 8830 8831 def _parse_json_value(self) -> exp.JSONValue: 8832 this = self._parse_bitwise() 8833 self._match(TokenType.COMMA) 8834 path = self._parse_bitwise() 8835 8836 returning = self._match(TokenType.RETURNING) and self._parse_type() 8837 8838 return self.expression( 8839 exp.JSONValue, 8840 this=this, 8841 path=self.dialect.to_json_path(path), 8842 returning=returning, 8843 on_condition=self._parse_on_condition(), 8844 ) 8845 8846 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8847 def concat_exprs( 8848 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8849 ) -> exp.Expression: 8850 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8851 concat_exprs = [ 8852 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8853 ] 8854 node.set("expressions", concat_exprs) 8855 return node 8856 if len(exprs) == 1: 8857 return exprs[0] 8858 return self.expression(exp.Concat, expressions=args, safe=True) 8859 8860 args = self._parse_csv(self._parse_lambda) 8861 8862 if args: 8863 order = args[-1] if isinstance(args[-1], exp.Order) else None 8864 8865 if order: 8866 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8867 # remove 'expr' from exp.Order and add it back to args 8868 args[-1] = order.this 8869 order.set("this", concat_exprs(order.this, args)) 8870 8871 this = order or concat_exprs(args[0], args) 8872 else: 8873 this = None 8874 8875 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8876 8877 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1585 def __init__( 1586 self, 1587 error_level: t.Optional[ErrorLevel] = None, 1588 error_message_context: int = 100, 1589 max_errors: int = 3, 1590 dialect: DialectType = None, 1591 ): 1592 from sqlglot.dialects import Dialect 1593 1594 self.error_level = error_level or ErrorLevel.IMMEDIATE 1595 self.error_message_context = error_message_context 1596 self.max_errors = max_errors 1597 self.dialect = Dialect.get_or_raise(dialect) 1598 self.reset()
1611 def parse( 1612 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1613 ) -> t.List[t.Optional[exp.Expression]]: 1614 """ 1615 Parses a list of tokens and returns a list of syntax trees, one tree 1616 per parsed SQL statement. 1617 1618 Args: 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The list of the produced syntax trees. 1624 """ 1625 return self._parse( 1626 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1627 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1629 def parse_into( 1630 self, 1631 expression_types: exp.IntoType, 1632 raw_tokens: t.List[Token], 1633 sql: t.Optional[str] = None, 1634 ) -> t.List[t.Optional[exp.Expression]]: 1635 """ 1636 Parses a list of tokens into a given Expression type. If a collection of Expression 1637 types is given instead, this method will try to parse the token list into each one 1638 of them, stopping at the first for which the parsing succeeds. 1639 1640 Args: 1641 expression_types: The expression type(s) to try and parse the token list into. 1642 raw_tokens: The list of tokens. 1643 sql: The original SQL string, used to produce helpful debug messages. 1644 1645 Returns: 1646 The target Expression. 1647 """ 1648 errors = [] 1649 for expression_type in ensure_list(expression_types): 1650 parser = self.EXPRESSION_PARSERS.get(expression_type) 1651 if not parser: 1652 raise TypeError(f"No parser registered for {expression_type}") 1653 1654 try: 1655 return self._parse(parser, raw_tokens, sql) 1656 except ParseError as e: 1657 e.errors[0]["into_expression"] = expression_type 1658 errors.append(e) 1659 1660 raise ParseError( 1661 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1662 errors=merge_errors(errors), 1663 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1703 def check_errors(self) -> None: 1704 """Logs or raises any found errors, depending on the chosen error level setting.""" 1705 if self.error_level == ErrorLevel.WARN: 1706 for error in self.errors: 1707 logger.error(str(error)) 1708 elif self.error_level == ErrorLevel.RAISE and self.errors: 1709 raise ParseError( 1710 concat_messages(self.errors, self.max_errors), 1711 errors=merge_errors(self.errors), 1712 )
Logs or raises any found errors, depending on the chosen error level setting.
1714 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1715 """ 1716 Appends an error in the list of recorded errors or raises it, depending on the chosen 1717 error level setting. 1718 """ 1719 token = token or self._curr or self._prev or Token.string("") 1720 start = token.start 1721 end = token.end + 1 1722 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1723 highlight = self.sql[start:end] 1724 end_context = self.sql[end : end + self.error_message_context] 1725 1726 error = ParseError.new( 1727 f"{message}. Line {token.line}, Col: {token.col}.\n" 1728 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1729 description=message, 1730 line=token.line, 1731 col=token.col, 1732 start_context=start_context, 1733 highlight=highlight, 1734 end_context=end_context, 1735 ) 1736 1737 if self.error_level == ErrorLevel.IMMEDIATE: 1738 raise error 1739 1740 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1742 def expression( 1743 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1744 ) -> E: 1745 """ 1746 Creates a new, validated Expression. 1747 1748 Args: 1749 exp_class: The expression class to instantiate. 1750 comments: An optional list of comments to attach to the expression. 1751 kwargs: The arguments to set for the expression along with their respective values. 1752 1753 Returns: 1754 The target expression. 1755 """ 1756 instance = exp_class(**kwargs) 1757 instance.add_comments(comments) if comments else self._add_comments(instance) 1758 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1765 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1766 """ 1767 Validates an Expression, making sure that all its mandatory arguments are set. 1768 1769 Args: 1770 expression: The expression to validate. 1771 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1772 1773 Returns: 1774 The validated expression. 1775 """ 1776 if self.error_level != ErrorLevel.IGNORE: 1777 for error_message in expression.error_messages(args): 1778 self.raise_error(error_message) 1779 1780 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4863 def parse_set_operation( 4864 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4865 ) -> t.Optional[exp.Expression]: 4866 start = self._index 4867 _, side_token, kind_token = self._parse_join_parts() 4868 4869 side = side_token.text if side_token else None 4870 kind = kind_token.text if kind_token else None 4871 4872 if not self._match_set(self.SET_OPERATIONS): 4873 self._retreat(start) 4874 return None 4875 4876 token_type = self._prev.token_type 4877 4878 if token_type == TokenType.UNION: 4879 operation: t.Type[exp.SetOperation] = exp.Union 4880 elif token_type == TokenType.EXCEPT: 4881 operation = exp.Except 4882 else: 4883 operation = exp.Intersect 4884 4885 comments = self._prev.comments 4886 4887 if self._match(TokenType.DISTINCT): 4888 distinct: t.Optional[bool] = True 4889 elif self._match(TokenType.ALL): 4890 distinct = False 4891 else: 4892 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4893 if distinct is None: 4894 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4895 4896 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4897 "STRICT", "CORRESPONDING" 4898 ) 4899 if self._match_text_seq("CORRESPONDING"): 4900 by_name = True 4901 if not side and not kind: 4902 kind = "INNER" 4903 4904 on_column_list = None 4905 if by_name and self._match_texts(("ON", "BY")): 4906 on_column_list = self._parse_wrapped_csv(self._parse_column) 4907 4908 expression = self._parse_select( 4909 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4910 ) 4911 4912 return self.expression( 4913 operation, 4914 comments=comments, 4915 this=this, 4916 distinct=distinct, 4917 by_name=by_name, 4918 expression=expression, 4919 side=side, 4920 kind=kind, 4921 on=on_column_list, 4922 )