sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.APPLY, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 CAST_COLUMN_OPERATORS = { 796 TokenType.DOTCOLON, 797 TokenType.DCOLON, 798 } 799 800 EXPRESSION_PARSERS = { 801 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 802 exp.Column: lambda self: self._parse_column(), 803 exp.Condition: lambda self: self._parse_assignment(), 804 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 805 exp.Expression: lambda self: self._parse_expression(), 806 exp.From: lambda self: self._parse_from(joins=True), 807 exp.Group: lambda self: self._parse_group(), 808 exp.Having: lambda self: self._parse_having(), 809 exp.Hint: lambda self: self._parse_hint_body(), 810 exp.Identifier: lambda self: self._parse_id_var(), 811 exp.Join: lambda self: self._parse_join(), 812 exp.Lambda: lambda self: self._parse_lambda(), 813 exp.Lateral: lambda self: self._parse_lateral(), 814 exp.Limit: lambda self: self._parse_limit(), 815 exp.Offset: lambda self: self._parse_offset(), 816 exp.Order: lambda self: self._parse_order(), 817 exp.Ordered: lambda self: self._parse_ordered(), 818 exp.Properties: lambda self: self._parse_properties(), 819 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 820 exp.Qualify: lambda self: self._parse_qualify(), 821 exp.Returning: lambda self: self._parse_returning(), 822 exp.Select: lambda self: self._parse_select(), 823 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 824 exp.Table: lambda self: self._parse_table_parts(), 825 exp.TableAlias: lambda self: self._parse_table_alias(), 826 exp.Tuple: lambda self: self._parse_value(values=False), 827 exp.Whens: lambda self: self._parse_when_matched(), 828 exp.Where: lambda self: self._parse_where(), 829 exp.Window: lambda self: self._parse_named_window(), 830 exp.With: lambda self: self._parse_with(), 831 "JOIN_TYPE": lambda self: self._parse_join_parts(), 832 } 833 834 STATEMENT_PARSERS = { 835 TokenType.ALTER: lambda self: self._parse_alter(), 836 TokenType.ANALYZE: lambda self: self._parse_analyze(), 837 TokenType.BEGIN: lambda self: self._parse_transaction(), 838 TokenType.CACHE: lambda self: self._parse_cache(), 839 TokenType.COMMENT: lambda self: self._parse_comment(), 840 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 841 TokenType.COPY: lambda self: self._parse_copy(), 842 TokenType.CREATE: lambda self: self._parse_create(), 843 TokenType.DELETE: lambda self: self._parse_delete(), 844 TokenType.DESC: lambda self: self._parse_describe(), 845 TokenType.DESCRIBE: lambda self: self._parse_describe(), 846 TokenType.DROP: lambda self: self._parse_drop(), 847 TokenType.GRANT: lambda self: self._parse_grant(), 848 TokenType.INSERT: lambda self: self._parse_insert(), 849 TokenType.KILL: lambda self: self._parse_kill(), 850 TokenType.LOAD: lambda self: self._parse_load(), 851 TokenType.MERGE: lambda self: self._parse_merge(), 852 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 853 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 854 TokenType.REFRESH: lambda self: self._parse_refresh(), 855 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 856 TokenType.SET: lambda self: self._parse_set(), 857 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 858 TokenType.UNCACHE: lambda self: self._parse_uncache(), 859 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 860 TokenType.UPDATE: lambda self: self._parse_update(), 861 TokenType.USE: lambda self: self._parse_use(), 862 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 863 } 864 865 UNARY_PARSERS = { 866 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 867 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 868 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 869 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 870 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 871 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 872 } 873 874 STRING_PARSERS = { 875 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 876 exp.RawString, this=token.text 877 ), 878 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 879 exp.National, this=token.text 880 ), 881 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 882 TokenType.STRING: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=True 884 ), 885 TokenType.UNICODE_STRING: lambda self, token: self.expression( 886 exp.UnicodeString, 887 this=token.text, 888 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 889 ), 890 } 891 892 NUMERIC_PARSERS = { 893 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 894 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 895 TokenType.HEX_STRING: lambda self, token: self.expression( 896 exp.HexString, 897 this=token.text, 898 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 899 ), 900 TokenType.NUMBER: lambda self, token: self.expression( 901 exp.Literal, this=token.text, is_string=False 902 ), 903 } 904 905 PRIMARY_PARSERS = { 906 **STRING_PARSERS, 907 **NUMERIC_PARSERS, 908 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 909 TokenType.NULL: lambda self, _: self.expression(exp.Null), 910 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 911 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 912 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 913 TokenType.STAR: lambda self, _: self._parse_star_ops(), 914 } 915 916 PLACEHOLDER_PARSERS = { 917 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 918 TokenType.PARAMETER: lambda self: self._parse_parameter(), 919 TokenType.COLON: lambda self: ( 920 self.expression(exp.Placeholder, this=self._prev.text) 921 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 922 else None 923 ), 924 } 925 926 RANGE_PARSERS = { 927 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 928 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 929 TokenType.GLOB: binary_range_parser(exp.Glob), 930 TokenType.ILIKE: binary_range_parser(exp.ILike), 931 TokenType.IN: lambda self, this: self._parse_in(this), 932 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 933 TokenType.IS: lambda self, this: self._parse_is(this), 934 TokenType.LIKE: binary_range_parser(exp.Like), 935 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 936 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 937 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 938 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 939 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 940 } 941 942 PIPE_SYNTAX_TRANSFORM_PARSERS = { 943 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 944 "AS": lambda self, query: self._build_pipe_cte( 945 query, [exp.Star()], self._parse_table_alias() 946 ), 947 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 948 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 949 "ORDER BY": lambda self, query: query.order_by( 950 self._parse_order(), append=False, copy=False 951 ), 952 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 953 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 954 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 955 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 956 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 957 } 958 959 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 960 "ALLOWED_VALUES": lambda self: self.expression( 961 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 962 ), 963 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 964 "AUTO": lambda self: self._parse_auto_property(), 965 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 966 "BACKUP": lambda self: self.expression( 967 exp.BackupProperty, this=self._parse_var(any_token=True) 968 ), 969 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 970 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHECKSUM": lambda self: self._parse_checksum(), 973 "CLUSTER BY": lambda self: self._parse_cluster(), 974 "CLUSTERED": lambda self: self._parse_clustered_by(), 975 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 976 exp.CollateProperty, **kwargs 977 ), 978 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 979 "CONTAINS": lambda self: self._parse_contains_property(), 980 "COPY": lambda self: self._parse_copy_property(), 981 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 982 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 983 "DEFINER": lambda self: self._parse_definer(), 984 "DETERMINISTIC": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 986 ), 987 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 988 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 989 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 990 "DISTKEY": lambda self: self._parse_distkey(), 991 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 992 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 993 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 994 "ENVIRONMENT": lambda self: self.expression( 995 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 996 ), 997 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 998 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 999 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1000 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1001 "FREESPACE": lambda self: self._parse_freespace(), 1002 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1003 "HEAP": lambda self: self.expression(exp.HeapProperty), 1004 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1005 "IMMUTABLE": lambda self: self.expression( 1006 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1007 ), 1008 "INHERITS": lambda self: self.expression( 1009 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1010 ), 1011 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1012 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1013 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1014 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1015 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1016 "LIKE": lambda self: self._parse_create_like(), 1017 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1018 "LOCK": lambda self: self._parse_locking(), 1019 "LOCKING": lambda self: self._parse_locking(), 1020 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1021 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1022 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1023 "MODIFIES": lambda self: self._parse_modifies_property(), 1024 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1025 "NO": lambda self: self._parse_no_property(), 1026 "ON": lambda self: self._parse_on_property(), 1027 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1028 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1029 "PARTITION": lambda self: self._parse_partitioned_of(), 1030 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1033 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1034 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1035 "READS": lambda self: self._parse_reads_property(), 1036 "REMOTE": lambda self: self._parse_remote_with_connection(), 1037 "RETURNS": lambda self: self._parse_returns(), 1038 "STRICT": lambda self: self.expression(exp.StrictProperty), 1039 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1040 "ROW": lambda self: self._parse_row(), 1041 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1042 "SAMPLE": lambda self: self.expression( 1043 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1044 ), 1045 "SECURE": lambda self: self.expression(exp.SecureProperty), 1046 "SECURITY": lambda self: self._parse_security(), 1047 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1048 "SETTINGS": lambda self: self._parse_settings_property(), 1049 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1050 "SORTKEY": lambda self: self._parse_sortkey(), 1051 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1052 "STABLE": lambda self: self.expression( 1053 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1054 ), 1055 "STORED": lambda self: self._parse_stored(), 1056 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1057 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1058 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1059 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1060 "TO": lambda self: self._parse_to_table(), 1061 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1062 "TRANSFORM": lambda self: self.expression( 1063 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1064 ), 1065 "TTL": lambda self: self._parse_ttl(), 1066 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1067 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1068 "VOLATILE": lambda self: self._parse_volatile_property(), 1069 "WITH": lambda self: self._parse_with_property(), 1070 } 1071 1072 CONSTRAINT_PARSERS = { 1073 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1074 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1075 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1076 "CHARACTER SET": lambda self: self.expression( 1077 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "CHECK": lambda self: self.expression( 1080 exp.CheckColumnConstraint, 1081 this=self._parse_wrapped(self._parse_assignment), 1082 enforced=self._match_text_seq("ENFORCED"), 1083 ), 1084 "COLLATE": lambda self: self.expression( 1085 exp.CollateColumnConstraint, 1086 this=self._parse_identifier() or self._parse_column(), 1087 ), 1088 "COMMENT": lambda self: self.expression( 1089 exp.CommentColumnConstraint, this=self._parse_string() 1090 ), 1091 "COMPRESS": lambda self: self._parse_compress(), 1092 "CLUSTERED": lambda self: self.expression( 1093 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1094 ), 1095 "NONCLUSTERED": lambda self: self.expression( 1096 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1097 ), 1098 "DEFAULT": lambda self: self.expression( 1099 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1102 "EPHEMERAL": lambda self: self.expression( 1103 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "EXCLUDE": lambda self: self.expression( 1106 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1107 ), 1108 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1109 "FORMAT": lambda self: self.expression( 1110 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1111 ), 1112 "GENERATED": lambda self: self._parse_generated_as_identity(), 1113 "IDENTITY": lambda self: self._parse_auto_increment(), 1114 "INLINE": lambda self: self._parse_inline(), 1115 "LIKE": lambda self: self._parse_create_like(), 1116 "NOT": lambda self: self._parse_not_constraint(), 1117 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1118 "ON": lambda self: ( 1119 self._match(TokenType.UPDATE) 1120 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1121 ) 1122 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1123 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1124 "PERIOD": lambda self: self._parse_period_for_system_time(), 1125 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1126 "REFERENCES": lambda self: self._parse_references(match=False), 1127 "TITLE": lambda self: self.expression( 1128 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1129 ), 1130 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1131 "UNIQUE": lambda self: self._parse_unique(), 1132 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1133 "WATERMARK": lambda self: self.expression( 1134 exp.WatermarkColumnConstraint, 1135 this=self._match(TokenType.FOR) and self._parse_column(), 1136 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1137 ), 1138 "WITH": lambda self: self.expression( 1139 exp.Properties, expressions=self._parse_wrapped_properties() 1140 ), 1141 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 } 1144 1145 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1146 if not self._match(TokenType.L_PAREN, advance=False): 1147 # Partitioning by bucket or truncate follows the syntax: 1148 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1149 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1150 self._retreat(self._index - 1) 1151 return None 1152 1153 klass = ( 1154 exp.PartitionedByBucket 1155 if self._prev.text.upper() == "BUCKET" 1156 else exp.PartitionByTruncate 1157 ) 1158 1159 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1160 this, expression = seq_get(args, 0), seq_get(args, 1) 1161 1162 if isinstance(this, exp.Literal): 1163 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1164 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1165 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1166 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1167 # 1168 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1169 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1170 this, expression = expression, this 1171 1172 return self.expression(klass, this=this, expression=expression) 1173 1174 ALTER_PARSERS = { 1175 "ADD": lambda self: self._parse_alter_table_add(), 1176 "AS": lambda self: self._parse_select(), 1177 "ALTER": lambda self: self._parse_alter_table_alter(), 1178 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1179 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1180 "DROP": lambda self: self._parse_alter_table_drop(), 1181 "RENAME": lambda self: self._parse_alter_table_rename(), 1182 "SET": lambda self: self._parse_alter_table_set(), 1183 "SWAP": lambda self: self.expression( 1184 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1185 ), 1186 } 1187 1188 ALTER_ALTER_PARSERS = { 1189 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1190 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1191 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1192 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1193 } 1194 1195 SCHEMA_UNNAMED_CONSTRAINTS = { 1196 "CHECK", 1197 "EXCLUDE", 1198 "FOREIGN KEY", 1199 "LIKE", 1200 "PERIOD", 1201 "PRIMARY KEY", 1202 "UNIQUE", 1203 "WATERMARK", 1204 "BUCKET", 1205 "TRUNCATE", 1206 } 1207 1208 NO_PAREN_FUNCTION_PARSERS = { 1209 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1210 "CASE": lambda self: self._parse_case(), 1211 "CONNECT_BY_ROOT": lambda self: self.expression( 1212 exp.ConnectByRoot, this=self._parse_column() 1213 ), 1214 "IF": lambda self: self._parse_if(), 1215 } 1216 1217 INVALID_FUNC_NAME_TOKENS = { 1218 TokenType.IDENTIFIER, 1219 TokenType.STRING, 1220 } 1221 1222 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1223 1224 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1225 1226 FUNCTION_PARSERS = { 1227 **{ 1228 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1229 }, 1230 **{ 1231 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1232 }, 1233 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1234 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1235 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1236 "DECODE": lambda self: self._parse_decode(), 1237 "EXTRACT": lambda self: self._parse_extract(), 1238 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1239 "GAP_FILL": lambda self: self._parse_gap_fill(), 1240 "JSON_OBJECT": lambda self: self._parse_json_object(), 1241 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1242 "JSON_TABLE": lambda self: self._parse_json_table(), 1243 "MATCH": lambda self: self._parse_match_against(), 1244 "NORMALIZE": lambda self: self._parse_normalize(), 1245 "OPENJSON": lambda self: self._parse_open_json(), 1246 "OVERLAY": lambda self: self._parse_overlay(), 1247 "POSITION": lambda self: self._parse_position(), 1248 "PREDICT": lambda self: self._parse_predict(), 1249 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1250 "STRING_AGG": lambda self: self._parse_string_agg(), 1251 "SUBSTRING": lambda self: self._parse_substring(), 1252 "TRIM": lambda self: self._parse_trim(), 1253 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1255 "XMLELEMENT": lambda self: self.expression( 1256 exp.XMLElement, 1257 this=self._match_text_seq("NAME") and self._parse_id_var(), 1258 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1259 ), 1260 "XMLTABLE": lambda self: self._parse_xml_table(), 1261 } 1262 1263 QUERY_MODIFIER_PARSERS = { 1264 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1265 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1266 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1267 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1268 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1269 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1270 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1271 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1272 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1273 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1274 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1275 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1276 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1277 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1279 TokenType.CLUSTER_BY: lambda self: ( 1280 "cluster", 1281 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1282 ), 1283 TokenType.DISTRIBUTE_BY: lambda self: ( 1284 "distribute", 1285 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1286 ), 1287 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1288 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1289 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1290 } 1291 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1292 1293 SET_PARSERS = { 1294 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1295 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1296 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1297 "TRANSACTION": lambda self: self._parse_set_transaction(), 1298 } 1299 1300 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1301 1302 TYPE_LITERAL_PARSERS = { 1303 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1304 } 1305 1306 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1307 1308 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1309 1310 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1311 1312 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1313 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1314 "ISOLATION": ( 1315 ("LEVEL", "REPEATABLE", "READ"), 1316 ("LEVEL", "READ", "COMMITTED"), 1317 ("LEVEL", "READ", "UNCOMITTED"), 1318 ("LEVEL", "SERIALIZABLE"), 1319 ), 1320 "READ": ("WRITE", "ONLY"), 1321 } 1322 1323 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1324 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1325 ) 1326 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1327 1328 CREATE_SEQUENCE: OPTIONS_TYPE = { 1329 "SCALE": ("EXTEND", "NOEXTEND"), 1330 "SHARD": ("EXTEND", "NOEXTEND"), 1331 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1332 **dict.fromkeys( 1333 ( 1334 "SESSION", 1335 "GLOBAL", 1336 "KEEP", 1337 "NOKEEP", 1338 "ORDER", 1339 "NOORDER", 1340 "NOCACHE", 1341 "CYCLE", 1342 "NOCYCLE", 1343 "NOMINVALUE", 1344 "NOMAXVALUE", 1345 "NOSCALE", 1346 "NOSHARD", 1347 ), 1348 tuple(), 1349 ), 1350 } 1351 1352 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1353 1354 USABLES: OPTIONS_TYPE = dict.fromkeys( 1355 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1356 ) 1357 1358 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1359 1360 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1361 "TYPE": ("EVOLUTION",), 1362 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1363 } 1364 1365 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1366 1367 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1368 1369 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1370 "NOT": ("ENFORCED",), 1371 "MATCH": ( 1372 "FULL", 1373 "PARTIAL", 1374 "SIMPLE", 1375 ), 1376 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1377 "USING": ( 1378 "BTREE", 1379 "HASH", 1380 ), 1381 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1382 } 1383 1384 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1385 "NO": ("OTHERS",), 1386 "CURRENT": ("ROW",), 1387 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1388 } 1389 1390 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1391 1392 CLONE_KEYWORDS = {"CLONE", "COPY"} 1393 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1394 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1395 1396 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1397 1398 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1399 1400 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1401 1402 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1403 1404 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1405 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1406 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1407 1408 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1409 1410 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1411 1412 ADD_CONSTRAINT_TOKENS = { 1413 TokenType.CONSTRAINT, 1414 TokenType.FOREIGN_KEY, 1415 TokenType.INDEX, 1416 TokenType.KEY, 1417 TokenType.PRIMARY_KEY, 1418 TokenType.UNIQUE, 1419 } 1420 1421 DISTINCT_TOKENS = {TokenType.DISTINCT} 1422 1423 NULL_TOKENS = {TokenType.NULL} 1424 1425 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1426 1427 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1428 1429 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1430 1431 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1432 1433 ODBC_DATETIME_LITERALS = { 1434 "d": exp.Date, 1435 "t": exp.Time, 1436 "ts": exp.Timestamp, 1437 } 1438 1439 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1440 1441 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1442 1443 # The style options for the DESCRIBE statement 1444 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1445 1446 # The style options for the ANALYZE statement 1447 ANALYZE_STYLES = { 1448 "BUFFER_USAGE_LIMIT", 1449 "FULL", 1450 "LOCAL", 1451 "NO_WRITE_TO_BINLOG", 1452 "SAMPLE", 1453 "SKIP_LOCKED", 1454 "VERBOSE", 1455 } 1456 1457 ANALYZE_EXPRESSION_PARSERS = { 1458 "ALL": lambda self: self._parse_analyze_columns(), 1459 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1460 "DELETE": lambda self: self._parse_analyze_delete(), 1461 "DROP": lambda self: self._parse_analyze_histogram(), 1462 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1463 "LIST": lambda self: self._parse_analyze_list(), 1464 "PREDICATE": lambda self: self._parse_analyze_columns(), 1465 "UPDATE": lambda self: self._parse_analyze_histogram(), 1466 "VALIDATE": lambda self: self._parse_analyze_validate(), 1467 } 1468 1469 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1470 1471 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1472 1473 OPERATION_MODIFIERS: t.Set[str] = set() 1474 1475 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1476 1477 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1478 1479 STRICT_CAST = True 1480 1481 PREFIXED_PIVOT_COLUMNS = False 1482 IDENTIFY_PIVOT_STRINGS = False 1483 1484 LOG_DEFAULTS_TO_LN = False 1485 1486 # Whether the table sample clause expects CSV syntax 1487 TABLESAMPLE_CSV = False 1488 1489 # The default method used for table sampling 1490 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1491 1492 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1493 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1494 1495 # Whether the TRIM function expects the characters to trim as its first argument 1496 TRIM_PATTERN_FIRST = False 1497 1498 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1499 STRING_ALIASES = False 1500 1501 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1502 MODIFIERS_ATTACHED_TO_SET_OP = True 1503 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1504 1505 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1506 NO_PAREN_IF_COMMANDS = True 1507 1508 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1509 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1510 1511 # Whether the `:` operator is used to extract a value from a VARIANT column 1512 COLON_IS_VARIANT_EXTRACT = False 1513 1514 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1515 # If this is True and '(' is not found, the keyword will be treated as an identifier 1516 VALUES_FOLLOWED_BY_PAREN = True 1517 1518 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1519 SUPPORTS_IMPLICIT_UNNEST = False 1520 1521 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1522 INTERVAL_SPANS = True 1523 1524 # Whether a PARTITION clause can follow a table reference 1525 SUPPORTS_PARTITION_SELECTION = False 1526 1527 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1528 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1529 1530 # Whether the 'AS' keyword is optional in the CTE definition syntax 1531 OPTIONAL_ALIAS_TOKEN_CTE = True 1532 1533 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1534 ALTER_RENAME_REQUIRES_COLUMN = True 1535 1536 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1537 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1538 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1539 # as BigQuery, where all joins have the same precedence. 1540 JOINS_HAVE_EQUAL_PRECEDENCE = False 1541 1542 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1543 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1544 1545 # Whether map literals support arbitrary expressions as keys. 1546 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1547 # When False, keys are typically restricted to identifiers. 1548 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1549 1550 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1551 # is true for Snowflake but not for BigQuery which can also process strings 1552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1553 1554 __slots__ = ( 1555 "error_level", 1556 "error_message_context", 1557 "max_errors", 1558 "dialect", 1559 "sql", 1560 "errors", 1561 "_tokens", 1562 "_index", 1563 "_curr", 1564 "_next", 1565 "_prev", 1566 "_prev_comments", 1567 "_pipe_cte_counter", 1568 ) 1569 1570 # Autofilled 1571 SHOW_TRIE: t.Dict = {} 1572 SET_TRIE: t.Dict = {} 1573 1574 def __init__( 1575 self, 1576 error_level: t.Optional[ErrorLevel] = None, 1577 error_message_context: int = 100, 1578 max_errors: int = 3, 1579 dialect: DialectType = None, 1580 ): 1581 from sqlglot.dialects import Dialect 1582 1583 self.error_level = error_level or ErrorLevel.IMMEDIATE 1584 self.error_message_context = error_message_context 1585 self.max_errors = max_errors 1586 self.dialect = Dialect.get_or_raise(dialect) 1587 self.reset() 1588 1589 def reset(self): 1590 self.sql = "" 1591 self.errors = [] 1592 self._tokens = [] 1593 self._index = 0 1594 self._curr = None 1595 self._next = None 1596 self._prev = None 1597 self._prev_comments = None 1598 self._pipe_cte_counter = 0 1599 1600 def parse( 1601 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens and returns a list of syntax trees, one tree 1605 per parsed SQL statement. 1606 1607 Args: 1608 raw_tokens: The list of tokens. 1609 sql: The original SQL string, used to produce helpful debug messages. 1610 1611 Returns: 1612 The list of the produced syntax trees. 1613 """ 1614 return self._parse( 1615 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1616 ) 1617 1618 def parse_into( 1619 self, 1620 expression_types: exp.IntoType, 1621 raw_tokens: t.List[Token], 1622 sql: t.Optional[str] = None, 1623 ) -> t.List[t.Optional[exp.Expression]]: 1624 """ 1625 Parses a list of tokens into a given Expression type. If a collection of Expression 1626 types is given instead, this method will try to parse the token list into each one 1627 of them, stopping at the first for which the parsing succeeds. 1628 1629 Args: 1630 expression_types: The expression type(s) to try and parse the token list into. 1631 raw_tokens: The list of tokens. 1632 sql: The original SQL string, used to produce helpful debug messages. 1633 1634 Returns: 1635 The target Expression. 1636 """ 1637 errors = [] 1638 for expression_type in ensure_list(expression_types): 1639 parser = self.EXPRESSION_PARSERS.get(expression_type) 1640 if not parser: 1641 raise TypeError(f"No parser registered for {expression_type}") 1642 1643 try: 1644 return self._parse(parser, raw_tokens, sql) 1645 except ParseError as e: 1646 e.errors[0]["into_expression"] = expression_type 1647 errors.append(e) 1648 1649 raise ParseError( 1650 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1651 errors=merge_errors(errors), 1652 ) from errors[-1] 1653 1654 def _parse( 1655 self, 1656 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1657 raw_tokens: t.List[Token], 1658 sql: t.Optional[str] = None, 1659 ) -> t.List[t.Optional[exp.Expression]]: 1660 self.reset() 1661 self.sql = sql or "" 1662 1663 total = len(raw_tokens) 1664 chunks: t.List[t.List[Token]] = [[]] 1665 1666 for i, token in enumerate(raw_tokens): 1667 if token.token_type == TokenType.SEMICOLON: 1668 if token.comments: 1669 chunks.append([token]) 1670 1671 if i < total - 1: 1672 chunks.append([]) 1673 else: 1674 chunks[-1].append(token) 1675 1676 expressions = [] 1677 1678 for tokens in chunks: 1679 self._index = -1 1680 self._tokens = tokens 1681 self._advance() 1682 1683 expressions.append(parse_method(self)) 1684 1685 if self._index < len(self._tokens): 1686 self.raise_error("Invalid expression / Unexpected token") 1687 1688 self.check_errors() 1689 1690 return expressions 1691 1692 def check_errors(self) -> None: 1693 """Logs or raises any found errors, depending on the chosen error level setting.""" 1694 if self.error_level == ErrorLevel.WARN: 1695 for error in self.errors: 1696 logger.error(str(error)) 1697 elif self.error_level == ErrorLevel.RAISE and self.errors: 1698 raise ParseError( 1699 concat_messages(self.errors, self.max_errors), 1700 errors=merge_errors(self.errors), 1701 ) 1702 1703 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1704 """ 1705 Appends an error in the list of recorded errors or raises it, depending on the chosen 1706 error level setting. 1707 """ 1708 token = token or self._curr or self._prev or Token.string("") 1709 start = token.start 1710 end = token.end + 1 1711 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1712 highlight = self.sql[start:end] 1713 end_context = self.sql[end : end + self.error_message_context] 1714 1715 error = ParseError.new( 1716 f"{message}. Line {token.line}, Col: {token.col}.\n" 1717 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1718 description=message, 1719 line=token.line, 1720 col=token.col, 1721 start_context=start_context, 1722 highlight=highlight, 1723 end_context=end_context, 1724 ) 1725 1726 if self.error_level == ErrorLevel.IMMEDIATE: 1727 raise error 1728 1729 self.errors.append(error) 1730 1731 def expression( 1732 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1733 ) -> E: 1734 """ 1735 Creates a new, validated Expression. 1736 1737 Args: 1738 exp_class: The expression class to instantiate. 1739 comments: An optional list of comments to attach to the expression. 1740 kwargs: The arguments to set for the expression along with their respective values. 1741 1742 Returns: 1743 The target expression. 1744 """ 1745 instance = exp_class(**kwargs) 1746 instance.add_comments(comments) if comments else self._add_comments(instance) 1747 return self.validate_expression(instance) 1748 1749 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1750 if expression and self._prev_comments: 1751 expression.add_comments(self._prev_comments) 1752 self._prev_comments = None 1753 1754 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1755 """ 1756 Validates an Expression, making sure that all its mandatory arguments are set. 1757 1758 Args: 1759 expression: The expression to validate. 1760 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1761 1762 Returns: 1763 The validated expression. 1764 """ 1765 if self.error_level != ErrorLevel.IGNORE: 1766 for error_message in expression.error_messages(args): 1767 self.raise_error(error_message) 1768 1769 return expression 1770 1771 def _find_sql(self, start: Token, end: Token) -> str: 1772 return self.sql[start.start : end.end + 1] 1773 1774 def _is_connected(self) -> bool: 1775 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1776 1777 def _advance(self, times: int = 1) -> None: 1778 self._index += times 1779 self._curr = seq_get(self._tokens, self._index) 1780 self._next = seq_get(self._tokens, self._index + 1) 1781 1782 if self._index > 0: 1783 self._prev = self._tokens[self._index - 1] 1784 self._prev_comments = self._prev.comments 1785 else: 1786 self._prev = None 1787 self._prev_comments = None 1788 1789 def _retreat(self, index: int) -> None: 1790 if index != self._index: 1791 self._advance(index - self._index) 1792 1793 def _warn_unsupported(self) -> None: 1794 if len(self._tokens) <= 1: 1795 return 1796 1797 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1798 # interested in emitting a warning for the one being currently processed. 1799 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1800 1801 logger.warning( 1802 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1803 ) 1804 1805 def _parse_command(self) -> exp.Command: 1806 self._warn_unsupported() 1807 return self.expression( 1808 exp.Command, 1809 comments=self._prev_comments, 1810 this=self._prev.text.upper(), 1811 expression=self._parse_string(), 1812 ) 1813 1814 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1815 """ 1816 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1817 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1818 solve this by setting & resetting the parser state accordingly 1819 """ 1820 index = self._index 1821 error_level = self.error_level 1822 1823 self.error_level = ErrorLevel.IMMEDIATE 1824 try: 1825 this = parse_method() 1826 except ParseError: 1827 this = None 1828 finally: 1829 if not this or retreat: 1830 self._retreat(index) 1831 self.error_level = error_level 1832 1833 return this 1834 1835 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1836 start = self._prev 1837 exists = self._parse_exists() if allow_exists else None 1838 1839 self._match(TokenType.ON) 1840 1841 materialized = self._match_text_seq("MATERIALIZED") 1842 kind = self._match_set(self.CREATABLES) and self._prev 1843 if not kind: 1844 return self._parse_as_command(start) 1845 1846 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=kind.token_type) 1848 elif kind.token_type == TokenType.TABLE: 1849 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1850 elif kind.token_type == TokenType.COLUMN: 1851 this = self._parse_column() 1852 else: 1853 this = self._parse_id_var() 1854 1855 self._match(TokenType.IS) 1856 1857 return self.expression( 1858 exp.Comment, 1859 this=this, 1860 kind=kind.text, 1861 expression=self._parse_string(), 1862 exists=exists, 1863 materialized=materialized, 1864 ) 1865 1866 def _parse_to_table( 1867 self, 1868 ) -> exp.ToTableProperty: 1869 table = self._parse_table_parts(schema=True) 1870 return self.expression(exp.ToTableProperty, this=table) 1871 1872 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1873 def _parse_ttl(self) -> exp.Expression: 1874 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1875 this = self._parse_bitwise() 1876 1877 if self._match_text_seq("DELETE"): 1878 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1879 if self._match_text_seq("RECOMPRESS"): 1880 return self.expression( 1881 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1882 ) 1883 if self._match_text_seq("TO", "DISK"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1886 ) 1887 if self._match_text_seq("TO", "VOLUME"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1890 ) 1891 1892 return this 1893 1894 expressions = self._parse_csv(_parse_ttl_action) 1895 where = self._parse_where() 1896 group = self._parse_group() 1897 1898 aggregates = None 1899 if group and self._match(TokenType.SET): 1900 aggregates = self._parse_csv(self._parse_set_item) 1901 1902 return self.expression( 1903 exp.MergeTreeTTL, 1904 expressions=expressions, 1905 where=where, 1906 group=group, 1907 aggregates=aggregates, 1908 ) 1909 1910 def _parse_statement(self) -> t.Optional[exp.Expression]: 1911 if self._curr is None: 1912 return None 1913 1914 if self._match_set(self.STATEMENT_PARSERS): 1915 comments = self._prev_comments 1916 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1917 stmt.add_comments(comments, prepend=True) 1918 return stmt 1919 1920 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1921 return self._parse_command() 1922 1923 expression = self._parse_expression() 1924 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1925 return self._parse_query_modifiers(expression) 1926 1927 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1928 start = self._prev 1929 temporary = self._match(TokenType.TEMPORARY) 1930 materialized = self._match_text_seq("MATERIALIZED") 1931 1932 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1933 if not kind: 1934 return self._parse_as_command(start) 1935 1936 concurrently = self._match_text_seq("CONCURRENTLY") 1937 if_exists = exists or self._parse_exists() 1938 1939 if kind == "COLUMN": 1940 this = self._parse_column() 1941 else: 1942 this = self._parse_table_parts( 1943 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1944 ) 1945 1946 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1947 1948 if self._match(TokenType.L_PAREN, advance=False): 1949 expressions = self._parse_wrapped_csv(self._parse_types) 1950 else: 1951 expressions = None 1952 1953 return self.expression( 1954 exp.Drop, 1955 exists=if_exists, 1956 this=this, 1957 expressions=expressions, 1958 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1959 temporary=temporary, 1960 materialized=materialized, 1961 cascade=self._match_text_seq("CASCADE"), 1962 constraints=self._match_text_seq("CONSTRAINTS"), 1963 purge=self._match_text_seq("PURGE"), 1964 cluster=cluster, 1965 concurrently=concurrently, 1966 ) 1967 1968 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1969 return ( 1970 self._match_text_seq("IF") 1971 and (not not_ or self._match(TokenType.NOT)) 1972 and self._match(TokenType.EXISTS) 1973 ) 1974 1975 def _parse_create(self) -> exp.Create | exp.Command: 1976 # Note: this can't be None because we've matched a statement parser 1977 start = self._prev 1978 1979 replace = ( 1980 start.token_type == TokenType.REPLACE 1981 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1982 or self._match_pair(TokenType.OR, TokenType.ALTER) 1983 ) 1984 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1985 1986 unique = self._match(TokenType.UNIQUE) 1987 1988 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1989 clustered = True 1990 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1991 "COLUMNSTORE" 1992 ): 1993 clustered = False 1994 else: 1995 clustered = None 1996 1997 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1998 self._advance() 1999 2000 properties = None 2001 create_token = self._match_set(self.CREATABLES) and self._prev 2002 2003 if not create_token: 2004 # exp.Properties.Location.POST_CREATE 2005 properties = self._parse_properties() 2006 create_token = self._match_set(self.CREATABLES) and self._prev 2007 2008 if not properties or not create_token: 2009 return self._parse_as_command(start) 2010 2011 concurrently = self._match_text_seq("CONCURRENTLY") 2012 exists = self._parse_exists(not_=True) 2013 this = None 2014 expression: t.Optional[exp.Expression] = None 2015 indexes = None 2016 no_schema_binding = None 2017 begin = None 2018 end = None 2019 clone = None 2020 2021 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2022 nonlocal properties 2023 if properties and temp_props: 2024 properties.expressions.extend(temp_props.expressions) 2025 elif temp_props: 2026 properties = temp_props 2027 2028 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2029 this = self._parse_user_defined_function(kind=create_token.token_type) 2030 2031 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2032 extend_props(self._parse_properties()) 2033 2034 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2035 extend_props(self._parse_properties()) 2036 2037 if not expression: 2038 if self._match(TokenType.COMMAND): 2039 expression = self._parse_as_command(self._prev) 2040 else: 2041 begin = self._match(TokenType.BEGIN) 2042 return_ = self._match_text_seq("RETURN") 2043 2044 if self._match(TokenType.STRING, advance=False): 2045 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2046 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2047 expression = self._parse_string() 2048 extend_props(self._parse_properties()) 2049 else: 2050 expression = self._parse_user_defined_function_expression() 2051 2052 end = self._match_text_seq("END") 2053 2054 if return_: 2055 expression = self.expression(exp.Return, this=expression) 2056 elif create_token.token_type == TokenType.INDEX: 2057 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2058 if not self._match(TokenType.ON): 2059 index = self._parse_id_var() 2060 anonymous = False 2061 else: 2062 index = None 2063 anonymous = True 2064 2065 this = self._parse_index(index=index, anonymous=anonymous) 2066 elif create_token.token_type in self.DB_CREATABLES: 2067 table_parts = self._parse_table_parts( 2068 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2069 ) 2070 2071 # exp.Properties.Location.POST_NAME 2072 self._match(TokenType.COMMA) 2073 extend_props(self._parse_properties(before=True)) 2074 2075 this = self._parse_schema(this=table_parts) 2076 2077 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2078 extend_props(self._parse_properties()) 2079 2080 has_alias = self._match(TokenType.ALIAS) 2081 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2082 # exp.Properties.Location.POST_ALIAS 2083 extend_props(self._parse_properties()) 2084 2085 if create_token.token_type == TokenType.SEQUENCE: 2086 expression = self._parse_types() 2087 extend_props(self._parse_properties()) 2088 else: 2089 expression = self._parse_ddl_select() 2090 2091 # Some dialects also support using a table as an alias instead of a SELECT. 2092 # Here we fallback to this as an alternative. 2093 if not expression and has_alias: 2094 expression = self._try_parse(self._parse_table_parts) 2095 2096 if create_token.token_type == TokenType.TABLE: 2097 # exp.Properties.Location.POST_EXPRESSION 2098 extend_props(self._parse_properties()) 2099 2100 indexes = [] 2101 while True: 2102 index = self._parse_index() 2103 2104 # exp.Properties.Location.POST_INDEX 2105 extend_props(self._parse_properties()) 2106 if not index: 2107 break 2108 else: 2109 self._match(TokenType.COMMA) 2110 indexes.append(index) 2111 elif create_token.token_type == TokenType.VIEW: 2112 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2113 no_schema_binding = True 2114 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2115 extend_props(self._parse_properties()) 2116 2117 shallow = self._match_text_seq("SHALLOW") 2118 2119 if self._match_texts(self.CLONE_KEYWORDS): 2120 copy = self._prev.text.lower() == "copy" 2121 clone = self.expression( 2122 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2123 ) 2124 2125 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2126 return self._parse_as_command(start) 2127 2128 create_kind_text = create_token.text.upper() 2129 return self.expression( 2130 exp.Create, 2131 this=this, 2132 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2133 replace=replace, 2134 refresh=refresh, 2135 unique=unique, 2136 expression=expression, 2137 exists=exists, 2138 properties=properties, 2139 indexes=indexes, 2140 no_schema_binding=no_schema_binding, 2141 begin=begin, 2142 end=end, 2143 clone=clone, 2144 concurrently=concurrently, 2145 clustered=clustered, 2146 ) 2147 2148 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2149 seq = exp.SequenceProperties() 2150 2151 options = [] 2152 index = self._index 2153 2154 while self._curr: 2155 self._match(TokenType.COMMA) 2156 if self._match_text_seq("INCREMENT"): 2157 self._match_text_seq("BY") 2158 self._match_text_seq("=") 2159 seq.set("increment", self._parse_term()) 2160 elif self._match_text_seq("MINVALUE"): 2161 seq.set("minvalue", self._parse_term()) 2162 elif self._match_text_seq("MAXVALUE"): 2163 seq.set("maxvalue", self._parse_term()) 2164 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2165 self._match_text_seq("=") 2166 seq.set("start", self._parse_term()) 2167 elif self._match_text_seq("CACHE"): 2168 # T-SQL allows empty CACHE which is initialized dynamically 2169 seq.set("cache", self._parse_number() or True) 2170 elif self._match_text_seq("OWNED", "BY"): 2171 # "OWNED BY NONE" is the default 2172 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2173 else: 2174 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2175 if opt: 2176 options.append(opt) 2177 else: 2178 break 2179 2180 seq.set("options", options if options else None) 2181 return None if self._index == index else seq 2182 2183 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2184 # only used for teradata currently 2185 self._match(TokenType.COMMA) 2186 2187 kwargs = { 2188 "no": self._match_text_seq("NO"), 2189 "dual": self._match_text_seq("DUAL"), 2190 "before": self._match_text_seq("BEFORE"), 2191 "default": self._match_text_seq("DEFAULT"), 2192 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2193 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2194 "after": self._match_text_seq("AFTER"), 2195 "minimum": self._match_texts(("MIN", "MINIMUM")), 2196 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2197 } 2198 2199 if self._match_texts(self.PROPERTY_PARSERS): 2200 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2201 try: 2202 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2203 except TypeError: 2204 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2205 2206 return None 2207 2208 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2209 return self._parse_wrapped_csv(self._parse_property) 2210 2211 def _parse_property(self) -> t.Optional[exp.Expression]: 2212 if self._match_texts(self.PROPERTY_PARSERS): 2213 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2214 2215 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2216 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2217 2218 if self._match_text_seq("COMPOUND", "SORTKEY"): 2219 return self._parse_sortkey(compound=True) 2220 2221 if self._match_text_seq("SQL", "SECURITY"): 2222 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2223 2224 index = self._index 2225 key = self._parse_column() 2226 2227 if not self._match(TokenType.EQ): 2228 self._retreat(index) 2229 return self._parse_sequence_properties() 2230 2231 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2232 if isinstance(key, exp.Column): 2233 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2234 2235 value = self._parse_bitwise() or self._parse_var(any_token=True) 2236 2237 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2238 if isinstance(value, exp.Column): 2239 value = exp.var(value.name) 2240 2241 return self.expression(exp.Property, this=key, value=value) 2242 2243 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2244 if self._match_text_seq("BY"): 2245 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2246 2247 self._match(TokenType.ALIAS) 2248 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2249 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2250 2251 return self.expression( 2252 exp.FileFormatProperty, 2253 this=( 2254 self.expression( 2255 exp.InputOutputFormat, 2256 input_format=input_format, 2257 output_format=output_format, 2258 ) 2259 if input_format or output_format 2260 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2261 ), 2262 hive_format=True, 2263 ) 2264 2265 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2266 field = self._parse_field() 2267 if isinstance(field, exp.Identifier) and not field.quoted: 2268 field = exp.var(field) 2269 2270 return field 2271 2272 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2273 self._match(TokenType.EQ) 2274 self._match(TokenType.ALIAS) 2275 2276 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2277 2278 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2279 properties = [] 2280 while True: 2281 if before: 2282 prop = self._parse_property_before() 2283 else: 2284 prop = self._parse_property() 2285 if not prop: 2286 break 2287 for p in ensure_list(prop): 2288 properties.append(p) 2289 2290 if properties: 2291 return self.expression(exp.Properties, expressions=properties) 2292 2293 return None 2294 2295 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2296 return self.expression( 2297 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2298 ) 2299 2300 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2301 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2302 security_specifier = self._prev.text.upper() 2303 return self.expression(exp.SecurityProperty, this=security_specifier) 2304 return None 2305 2306 def _parse_settings_property(self) -> exp.SettingsProperty: 2307 return self.expression( 2308 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2309 ) 2310 2311 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2312 if self._index >= 2: 2313 pre_volatile_token = self._tokens[self._index - 2] 2314 else: 2315 pre_volatile_token = None 2316 2317 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2318 return exp.VolatileProperty() 2319 2320 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2321 2322 def _parse_retention_period(self) -> exp.Var: 2323 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2324 number = self._parse_number() 2325 number_str = f"{number} " if number else "" 2326 unit = self._parse_var(any_token=True) 2327 return exp.var(f"{number_str}{unit}") 2328 2329 def _parse_system_versioning_property( 2330 self, with_: bool = False 2331 ) -> exp.WithSystemVersioningProperty: 2332 self._match(TokenType.EQ) 2333 prop = self.expression( 2334 exp.WithSystemVersioningProperty, 2335 **{ # type: ignore 2336 "on": True, 2337 "with": with_, 2338 }, 2339 ) 2340 2341 if self._match_text_seq("OFF"): 2342 prop.set("on", False) 2343 return prop 2344 2345 self._match(TokenType.ON) 2346 if self._match(TokenType.L_PAREN): 2347 while self._curr and not self._match(TokenType.R_PAREN): 2348 if self._match_text_seq("HISTORY_TABLE", "="): 2349 prop.set("this", self._parse_table_parts()) 2350 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2351 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2352 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2353 prop.set("retention_period", self._parse_retention_period()) 2354 2355 self._match(TokenType.COMMA) 2356 2357 return prop 2358 2359 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2360 self._match(TokenType.EQ) 2361 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2362 prop = self.expression(exp.DataDeletionProperty, on=on) 2363 2364 if self._match(TokenType.L_PAREN): 2365 while self._curr and not self._match(TokenType.R_PAREN): 2366 if self._match_text_seq("FILTER_COLUMN", "="): 2367 prop.set("filter_column", self._parse_column()) 2368 elif self._match_text_seq("RETENTION_PERIOD", "="): 2369 prop.set("retention_period", self._parse_retention_period()) 2370 2371 self._match(TokenType.COMMA) 2372 2373 return prop 2374 2375 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2376 kind = "HASH" 2377 expressions: t.Optional[t.List[exp.Expression]] = None 2378 if self._match_text_seq("BY", "HASH"): 2379 expressions = self._parse_wrapped_csv(self._parse_id_var) 2380 elif self._match_text_seq("BY", "RANDOM"): 2381 kind = "RANDOM" 2382 2383 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2384 buckets: t.Optional[exp.Expression] = None 2385 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2386 buckets = self._parse_number() 2387 2388 return self.expression( 2389 exp.DistributedByProperty, 2390 expressions=expressions, 2391 kind=kind, 2392 buckets=buckets, 2393 order=self._parse_order(), 2394 ) 2395 2396 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2397 self._match_text_seq("KEY") 2398 expressions = self._parse_wrapped_id_vars() 2399 return self.expression(expr_type, expressions=expressions) 2400 2401 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2402 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2403 prop = self._parse_system_versioning_property(with_=True) 2404 self._match_r_paren() 2405 return prop 2406 2407 if self._match(TokenType.L_PAREN, advance=False): 2408 return self._parse_wrapped_properties() 2409 2410 if self._match_text_seq("JOURNAL"): 2411 return self._parse_withjournaltable() 2412 2413 if self._match_texts(self.VIEW_ATTRIBUTES): 2414 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2415 2416 if self._match_text_seq("DATA"): 2417 return self._parse_withdata(no=False) 2418 elif self._match_text_seq("NO", "DATA"): 2419 return self._parse_withdata(no=True) 2420 2421 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2422 return self._parse_serde_properties(with_=True) 2423 2424 if self._match(TokenType.SCHEMA): 2425 return self.expression( 2426 exp.WithSchemaBindingProperty, 2427 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2428 ) 2429 2430 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2431 return self.expression( 2432 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2433 ) 2434 2435 if not self._next: 2436 return None 2437 2438 return self._parse_withisolatedloading() 2439 2440 def _parse_procedure_option(self) -> exp.Expression | None: 2441 if self._match_text_seq("EXECUTE", "AS"): 2442 return self.expression( 2443 exp.ExecuteAsProperty, 2444 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2445 or self._parse_string(), 2446 ) 2447 2448 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2449 2450 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2451 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2452 self._match(TokenType.EQ) 2453 2454 user = self._parse_id_var() 2455 self._match(TokenType.PARAMETER) 2456 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2457 2458 if not user or not host: 2459 return None 2460 2461 return exp.DefinerProperty(this=f"{user}@{host}") 2462 2463 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2464 self._match(TokenType.TABLE) 2465 self._match(TokenType.EQ) 2466 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2467 2468 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2469 return self.expression(exp.LogProperty, no=no) 2470 2471 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2472 return self.expression(exp.JournalProperty, **kwargs) 2473 2474 def _parse_checksum(self) -> exp.ChecksumProperty: 2475 self._match(TokenType.EQ) 2476 2477 on = None 2478 if self._match(TokenType.ON): 2479 on = True 2480 elif self._match_text_seq("OFF"): 2481 on = False 2482 2483 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2484 2485 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2486 return self.expression( 2487 exp.Cluster, 2488 expressions=( 2489 self._parse_wrapped_csv(self._parse_ordered) 2490 if wrapped 2491 else self._parse_csv(self._parse_ordered) 2492 ), 2493 ) 2494 2495 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2496 self._match_text_seq("BY") 2497 2498 self._match_l_paren() 2499 expressions = self._parse_csv(self._parse_column) 2500 self._match_r_paren() 2501 2502 if self._match_text_seq("SORTED", "BY"): 2503 self._match_l_paren() 2504 sorted_by = self._parse_csv(self._parse_ordered) 2505 self._match_r_paren() 2506 else: 2507 sorted_by = None 2508 2509 self._match(TokenType.INTO) 2510 buckets = self._parse_number() 2511 self._match_text_seq("BUCKETS") 2512 2513 return self.expression( 2514 exp.ClusteredByProperty, 2515 expressions=expressions, 2516 sorted_by=sorted_by, 2517 buckets=buckets, 2518 ) 2519 2520 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2521 if not self._match_text_seq("GRANTS"): 2522 self._retreat(self._index - 1) 2523 return None 2524 2525 return self.expression(exp.CopyGrantsProperty) 2526 2527 def _parse_freespace(self) -> exp.FreespaceProperty: 2528 self._match(TokenType.EQ) 2529 return self.expression( 2530 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2531 ) 2532 2533 def _parse_mergeblockratio( 2534 self, no: bool = False, default: bool = False 2535 ) -> exp.MergeBlockRatioProperty: 2536 if self._match(TokenType.EQ): 2537 return self.expression( 2538 exp.MergeBlockRatioProperty, 2539 this=self._parse_number(), 2540 percent=self._match(TokenType.PERCENT), 2541 ) 2542 2543 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2544 2545 def _parse_datablocksize( 2546 self, 2547 default: t.Optional[bool] = None, 2548 minimum: t.Optional[bool] = None, 2549 maximum: t.Optional[bool] = None, 2550 ) -> exp.DataBlocksizeProperty: 2551 self._match(TokenType.EQ) 2552 size = self._parse_number() 2553 2554 units = None 2555 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2556 units = self._prev.text 2557 2558 return self.expression( 2559 exp.DataBlocksizeProperty, 2560 size=size, 2561 units=units, 2562 default=default, 2563 minimum=minimum, 2564 maximum=maximum, 2565 ) 2566 2567 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2568 self._match(TokenType.EQ) 2569 always = self._match_text_seq("ALWAYS") 2570 manual = self._match_text_seq("MANUAL") 2571 never = self._match_text_seq("NEVER") 2572 default = self._match_text_seq("DEFAULT") 2573 2574 autotemp = None 2575 if self._match_text_seq("AUTOTEMP"): 2576 autotemp = self._parse_schema() 2577 2578 return self.expression( 2579 exp.BlockCompressionProperty, 2580 always=always, 2581 manual=manual, 2582 never=never, 2583 default=default, 2584 autotemp=autotemp, 2585 ) 2586 2587 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2588 index = self._index 2589 no = self._match_text_seq("NO") 2590 concurrent = self._match_text_seq("CONCURRENT") 2591 2592 if not self._match_text_seq("ISOLATED", "LOADING"): 2593 self._retreat(index) 2594 return None 2595 2596 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2597 return self.expression( 2598 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2599 ) 2600 2601 def _parse_locking(self) -> exp.LockingProperty: 2602 if self._match(TokenType.TABLE): 2603 kind = "TABLE" 2604 elif self._match(TokenType.VIEW): 2605 kind = "VIEW" 2606 elif self._match(TokenType.ROW): 2607 kind = "ROW" 2608 elif self._match_text_seq("DATABASE"): 2609 kind = "DATABASE" 2610 else: 2611 kind = None 2612 2613 if kind in ("DATABASE", "TABLE", "VIEW"): 2614 this = self._parse_table_parts() 2615 else: 2616 this = None 2617 2618 if self._match(TokenType.FOR): 2619 for_or_in = "FOR" 2620 elif self._match(TokenType.IN): 2621 for_or_in = "IN" 2622 else: 2623 for_or_in = None 2624 2625 if self._match_text_seq("ACCESS"): 2626 lock_type = "ACCESS" 2627 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2628 lock_type = "EXCLUSIVE" 2629 elif self._match_text_seq("SHARE"): 2630 lock_type = "SHARE" 2631 elif self._match_text_seq("READ"): 2632 lock_type = "READ" 2633 elif self._match_text_seq("WRITE"): 2634 lock_type = "WRITE" 2635 elif self._match_text_seq("CHECKSUM"): 2636 lock_type = "CHECKSUM" 2637 else: 2638 lock_type = None 2639 2640 override = self._match_text_seq("OVERRIDE") 2641 2642 return self.expression( 2643 exp.LockingProperty, 2644 this=this, 2645 kind=kind, 2646 for_or_in=for_or_in, 2647 lock_type=lock_type, 2648 override=override, 2649 ) 2650 2651 def _parse_partition_by(self) -> t.List[exp.Expression]: 2652 if self._match(TokenType.PARTITION_BY): 2653 return self._parse_csv(self._parse_assignment) 2654 return [] 2655 2656 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2657 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2658 if self._match_text_seq("MINVALUE"): 2659 return exp.var("MINVALUE") 2660 if self._match_text_seq("MAXVALUE"): 2661 return exp.var("MAXVALUE") 2662 return self._parse_bitwise() 2663 2664 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2665 expression = None 2666 from_expressions = None 2667 to_expressions = None 2668 2669 if self._match(TokenType.IN): 2670 this = self._parse_wrapped_csv(self._parse_bitwise) 2671 elif self._match(TokenType.FROM): 2672 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2673 self._match_text_seq("TO") 2674 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2675 elif self._match_text_seq("WITH", "(", "MODULUS"): 2676 this = self._parse_number() 2677 self._match_text_seq(",", "REMAINDER") 2678 expression = self._parse_number() 2679 self._match_r_paren() 2680 else: 2681 self.raise_error("Failed to parse partition bound spec.") 2682 2683 return self.expression( 2684 exp.PartitionBoundSpec, 2685 this=this, 2686 expression=expression, 2687 from_expressions=from_expressions, 2688 to_expressions=to_expressions, 2689 ) 2690 2691 # https://www.postgresql.org/docs/current/sql-createtable.html 2692 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2693 if not self._match_text_seq("OF"): 2694 self._retreat(self._index - 1) 2695 return None 2696 2697 this = self._parse_table(schema=True) 2698 2699 if self._match(TokenType.DEFAULT): 2700 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2701 elif self._match_text_seq("FOR", "VALUES"): 2702 expression = self._parse_partition_bound_spec() 2703 else: 2704 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2705 2706 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2707 2708 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2709 self._match(TokenType.EQ) 2710 return self.expression( 2711 exp.PartitionedByProperty, 2712 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2713 ) 2714 2715 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2716 if self._match_text_seq("AND", "STATISTICS"): 2717 statistics = True 2718 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2719 statistics = False 2720 else: 2721 statistics = None 2722 2723 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2724 2725 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2726 if self._match_text_seq("SQL"): 2727 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2728 return None 2729 2730 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2731 if self._match_text_seq("SQL", "DATA"): 2732 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2733 return None 2734 2735 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2736 if self._match_text_seq("PRIMARY", "INDEX"): 2737 return exp.NoPrimaryIndexProperty() 2738 if self._match_text_seq("SQL"): 2739 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2740 return None 2741 2742 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2743 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2744 return exp.OnCommitProperty() 2745 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2746 return exp.OnCommitProperty(delete=True) 2747 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2748 2749 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2750 if self._match_text_seq("SQL", "DATA"): 2751 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2752 return None 2753 2754 def _parse_distkey(self) -> exp.DistKeyProperty: 2755 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2756 2757 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2758 table = self._parse_table(schema=True) 2759 2760 options = [] 2761 while self._match_texts(("INCLUDING", "EXCLUDING")): 2762 this = self._prev.text.upper() 2763 2764 id_var = self._parse_id_var() 2765 if not id_var: 2766 return None 2767 2768 options.append( 2769 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2770 ) 2771 2772 return self.expression(exp.LikeProperty, this=table, expressions=options) 2773 2774 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2775 return self.expression( 2776 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2777 ) 2778 2779 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2780 self._match(TokenType.EQ) 2781 return self.expression( 2782 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2783 ) 2784 2785 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2786 self._match_text_seq("WITH", "CONNECTION") 2787 return self.expression( 2788 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2789 ) 2790 2791 def _parse_returns(self) -> exp.ReturnsProperty: 2792 value: t.Optional[exp.Expression] 2793 null = None 2794 is_table = self._match(TokenType.TABLE) 2795 2796 if is_table: 2797 if self._match(TokenType.LT): 2798 value = self.expression( 2799 exp.Schema, 2800 this="TABLE", 2801 expressions=self._parse_csv(self._parse_struct_types), 2802 ) 2803 if not self._match(TokenType.GT): 2804 self.raise_error("Expecting >") 2805 else: 2806 value = self._parse_schema(exp.var("TABLE")) 2807 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2808 null = True 2809 value = None 2810 else: 2811 value = self._parse_types() 2812 2813 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2814 2815 def _parse_describe(self) -> exp.Describe: 2816 kind = self._match_set(self.CREATABLES) and self._prev.text 2817 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2818 if self._match(TokenType.DOT): 2819 style = None 2820 self._retreat(self._index - 2) 2821 2822 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2823 2824 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2825 this = self._parse_statement() 2826 else: 2827 this = self._parse_table(schema=True) 2828 2829 properties = self._parse_properties() 2830 expressions = properties.expressions if properties else None 2831 partition = self._parse_partition() 2832 return self.expression( 2833 exp.Describe, 2834 this=this, 2835 style=style, 2836 kind=kind, 2837 expressions=expressions, 2838 partition=partition, 2839 format=format, 2840 ) 2841 2842 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2843 kind = self._prev.text.upper() 2844 expressions = [] 2845 2846 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2847 if self._match(TokenType.WHEN): 2848 expression = self._parse_disjunction() 2849 self._match(TokenType.THEN) 2850 else: 2851 expression = None 2852 2853 else_ = self._match(TokenType.ELSE) 2854 2855 if not self._match(TokenType.INTO): 2856 return None 2857 2858 return self.expression( 2859 exp.ConditionalInsert, 2860 this=self.expression( 2861 exp.Insert, 2862 this=self._parse_table(schema=True), 2863 expression=self._parse_derived_table_values(), 2864 ), 2865 expression=expression, 2866 else_=else_, 2867 ) 2868 2869 expression = parse_conditional_insert() 2870 while expression is not None: 2871 expressions.append(expression) 2872 expression = parse_conditional_insert() 2873 2874 return self.expression( 2875 exp.MultitableInserts, 2876 kind=kind, 2877 comments=comments, 2878 expressions=expressions, 2879 source=self._parse_table(), 2880 ) 2881 2882 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2883 comments = [] 2884 hint = self._parse_hint() 2885 overwrite = self._match(TokenType.OVERWRITE) 2886 ignore = self._match(TokenType.IGNORE) 2887 local = self._match_text_seq("LOCAL") 2888 alternative = None 2889 is_function = None 2890 2891 if self._match_text_seq("DIRECTORY"): 2892 this: t.Optional[exp.Expression] = self.expression( 2893 exp.Directory, 2894 this=self._parse_var_or_string(), 2895 local=local, 2896 row_format=self._parse_row_format(match_row=True), 2897 ) 2898 else: 2899 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2900 comments += ensure_list(self._prev_comments) 2901 return self._parse_multitable_inserts(comments) 2902 2903 if self._match(TokenType.OR): 2904 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2905 2906 self._match(TokenType.INTO) 2907 comments += ensure_list(self._prev_comments) 2908 self._match(TokenType.TABLE) 2909 is_function = self._match(TokenType.FUNCTION) 2910 2911 this = ( 2912 self._parse_table(schema=True, parse_partition=True) 2913 if not is_function 2914 else self._parse_function() 2915 ) 2916 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2917 this.set("alias", self._parse_table_alias()) 2918 2919 returning = self._parse_returning() 2920 2921 return self.expression( 2922 exp.Insert, 2923 comments=comments, 2924 hint=hint, 2925 is_function=is_function, 2926 this=this, 2927 stored=self._match_text_seq("STORED") and self._parse_stored(), 2928 by_name=self._match_text_seq("BY", "NAME"), 2929 exists=self._parse_exists(), 2930 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2931 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2932 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2933 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2934 conflict=self._parse_on_conflict(), 2935 returning=returning or self._parse_returning(), 2936 overwrite=overwrite, 2937 alternative=alternative, 2938 ignore=ignore, 2939 source=self._match(TokenType.TABLE) and self._parse_table(), 2940 ) 2941 2942 def _parse_kill(self) -> exp.Kill: 2943 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2944 2945 return self.expression( 2946 exp.Kill, 2947 this=self._parse_primary(), 2948 kind=kind, 2949 ) 2950 2951 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2952 conflict = self._match_text_seq("ON", "CONFLICT") 2953 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2954 2955 if not conflict and not duplicate: 2956 return None 2957 2958 conflict_keys = None 2959 constraint = None 2960 2961 if conflict: 2962 if self._match_text_seq("ON", "CONSTRAINT"): 2963 constraint = self._parse_id_var() 2964 elif self._match(TokenType.L_PAREN): 2965 conflict_keys = self._parse_csv(self._parse_id_var) 2966 self._match_r_paren() 2967 2968 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2969 if self._prev.token_type == TokenType.UPDATE: 2970 self._match(TokenType.SET) 2971 expressions = self._parse_csv(self._parse_equality) 2972 else: 2973 expressions = None 2974 2975 return self.expression( 2976 exp.OnConflict, 2977 duplicate=duplicate, 2978 expressions=expressions, 2979 action=action, 2980 conflict_keys=conflict_keys, 2981 constraint=constraint, 2982 where=self._parse_where(), 2983 ) 2984 2985 def _parse_returning(self) -> t.Optional[exp.Returning]: 2986 if not self._match(TokenType.RETURNING): 2987 return None 2988 return self.expression( 2989 exp.Returning, 2990 expressions=self._parse_csv(self._parse_expression), 2991 into=self._match(TokenType.INTO) and self._parse_table_part(), 2992 ) 2993 2994 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2995 if not self._match(TokenType.FORMAT): 2996 return None 2997 return self._parse_row_format() 2998 2999 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3000 index = self._index 3001 with_ = with_ or self._match_text_seq("WITH") 3002 3003 if not self._match(TokenType.SERDE_PROPERTIES): 3004 self._retreat(index) 3005 return None 3006 return self.expression( 3007 exp.SerdeProperties, 3008 **{ # type: ignore 3009 "expressions": self._parse_wrapped_properties(), 3010 "with": with_, 3011 }, 3012 ) 3013 3014 def _parse_row_format( 3015 self, match_row: bool = False 3016 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3017 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3018 return None 3019 3020 if self._match_text_seq("SERDE"): 3021 this = self._parse_string() 3022 3023 serde_properties = self._parse_serde_properties() 3024 3025 return self.expression( 3026 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3027 ) 3028 3029 self._match_text_seq("DELIMITED") 3030 3031 kwargs = {} 3032 3033 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3034 kwargs["fields"] = self._parse_string() 3035 if self._match_text_seq("ESCAPED", "BY"): 3036 kwargs["escaped"] = self._parse_string() 3037 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3038 kwargs["collection_items"] = self._parse_string() 3039 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3040 kwargs["map_keys"] = self._parse_string() 3041 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3042 kwargs["lines"] = self._parse_string() 3043 if self._match_text_seq("NULL", "DEFINED", "AS"): 3044 kwargs["null"] = self._parse_string() 3045 3046 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3047 3048 def _parse_load(self) -> exp.LoadData | exp.Command: 3049 if self._match_text_seq("DATA"): 3050 local = self._match_text_seq("LOCAL") 3051 self._match_text_seq("INPATH") 3052 inpath = self._parse_string() 3053 overwrite = self._match(TokenType.OVERWRITE) 3054 self._match_pair(TokenType.INTO, TokenType.TABLE) 3055 3056 return self.expression( 3057 exp.LoadData, 3058 this=self._parse_table(schema=True), 3059 local=local, 3060 overwrite=overwrite, 3061 inpath=inpath, 3062 partition=self._parse_partition(), 3063 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3064 serde=self._match_text_seq("SERDE") and self._parse_string(), 3065 ) 3066 return self._parse_as_command(self._prev) 3067 3068 def _parse_delete(self) -> exp.Delete: 3069 # This handles MySQL's "Multiple-Table Syntax" 3070 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3071 tables = None 3072 if not self._match(TokenType.FROM, advance=False): 3073 tables = self._parse_csv(self._parse_table) or None 3074 3075 returning = self._parse_returning() 3076 3077 return self.expression( 3078 exp.Delete, 3079 tables=tables, 3080 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3081 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3082 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3083 where=self._parse_where(), 3084 returning=returning or self._parse_returning(), 3085 limit=self._parse_limit(), 3086 ) 3087 3088 def _parse_update(self) -> exp.Update: 3089 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3090 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3091 returning = self._parse_returning() 3092 return self.expression( 3093 exp.Update, 3094 **{ # type: ignore 3095 "this": this, 3096 "expressions": expressions, 3097 "from": self._parse_from(joins=True), 3098 "where": self._parse_where(), 3099 "returning": returning or self._parse_returning(), 3100 "order": self._parse_order(), 3101 "limit": self._parse_limit(), 3102 }, 3103 ) 3104 3105 def _parse_use(self) -> exp.Use: 3106 return self.expression( 3107 exp.Use, 3108 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3109 this=self._parse_table(schema=False), 3110 ) 3111 3112 def _parse_uncache(self) -> exp.Uncache: 3113 if not self._match(TokenType.TABLE): 3114 self.raise_error("Expecting TABLE after UNCACHE") 3115 3116 return self.expression( 3117 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3118 ) 3119 3120 def _parse_cache(self) -> exp.Cache: 3121 lazy = self._match_text_seq("LAZY") 3122 self._match(TokenType.TABLE) 3123 table = self._parse_table(schema=True) 3124 3125 options = [] 3126 if self._match_text_seq("OPTIONS"): 3127 self._match_l_paren() 3128 k = self._parse_string() 3129 self._match(TokenType.EQ) 3130 v = self._parse_string() 3131 options = [k, v] 3132 self._match_r_paren() 3133 3134 self._match(TokenType.ALIAS) 3135 return self.expression( 3136 exp.Cache, 3137 this=table, 3138 lazy=lazy, 3139 options=options, 3140 expression=self._parse_select(nested=True), 3141 ) 3142 3143 def _parse_partition(self) -> t.Optional[exp.Partition]: 3144 if not self._match_texts(self.PARTITION_KEYWORDS): 3145 return None 3146 3147 return self.expression( 3148 exp.Partition, 3149 subpartition=self._prev.text.upper() == "SUBPARTITION", 3150 expressions=self._parse_wrapped_csv(self._parse_assignment), 3151 ) 3152 3153 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3154 def _parse_value_expression() -> t.Optional[exp.Expression]: 3155 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3156 return exp.var(self._prev.text.upper()) 3157 return self._parse_expression() 3158 3159 if self._match(TokenType.L_PAREN): 3160 expressions = self._parse_csv(_parse_value_expression) 3161 self._match_r_paren() 3162 return self.expression(exp.Tuple, expressions=expressions) 3163 3164 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3165 expression = self._parse_expression() 3166 if expression: 3167 return self.expression(exp.Tuple, expressions=[expression]) 3168 return None 3169 3170 def _parse_projections(self) -> t.List[exp.Expression]: 3171 return self._parse_expressions() 3172 3173 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3174 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3175 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3176 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3177 ) 3178 elif self._match(TokenType.FROM): 3179 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3180 # Support parentheses for duckdb FROM-first syntax 3181 select = self._parse_select() 3182 if select: 3183 select.set("from", from_) 3184 this = select 3185 else: 3186 this = exp.select("*").from_(t.cast(exp.From, from_)) 3187 else: 3188 this = ( 3189 self._parse_table(consume_pipe=True) 3190 if table 3191 else self._parse_select(nested=True, parse_set_operation=False) 3192 ) 3193 3194 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3195 # in case a modifier (e.g. join) is following 3196 if table and isinstance(this, exp.Values) and this.alias: 3197 alias = this.args["alias"].pop() 3198 this = exp.Table(this=this, alias=alias) 3199 3200 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3201 3202 return this 3203 3204 def _parse_select( 3205 self, 3206 nested: bool = False, 3207 table: bool = False, 3208 parse_subquery_alias: bool = True, 3209 parse_set_operation: bool = True, 3210 consume_pipe: bool = True, 3211 ) -> t.Optional[exp.Expression]: 3212 query = self._parse_select_query( 3213 nested=nested, 3214 table=table, 3215 parse_subquery_alias=parse_subquery_alias, 3216 parse_set_operation=parse_set_operation, 3217 ) 3218 3219 if ( 3220 consume_pipe 3221 and self._match(TokenType.PIPE_GT, advance=False) 3222 and isinstance(query, exp.Query) 3223 ): 3224 query = self._parse_pipe_syntax_query(query) 3225 query = query.subquery(copy=False) if query and table else query 3226 3227 return query 3228 3229 def _parse_select_query( 3230 self, 3231 nested: bool = False, 3232 table: bool = False, 3233 parse_subquery_alias: bool = True, 3234 parse_set_operation: bool = True, 3235 ) -> t.Optional[exp.Expression]: 3236 cte = self._parse_with() 3237 3238 if cte: 3239 this = self._parse_statement() 3240 3241 if not this: 3242 self.raise_error("Failed to parse any statement following CTE") 3243 return cte 3244 3245 if "with" in this.arg_types: 3246 this.set("with", cte) 3247 else: 3248 self.raise_error(f"{this.key} does not support CTE") 3249 this = cte 3250 3251 return this 3252 3253 # duckdb supports leading with FROM x 3254 from_ = ( 3255 self._parse_from(consume_pipe=True) 3256 if self._match(TokenType.FROM, advance=False) 3257 else None 3258 ) 3259 3260 if self._match(TokenType.SELECT): 3261 comments = self._prev_comments 3262 3263 hint = self._parse_hint() 3264 3265 if self._next and not self._next.token_type == TokenType.DOT: 3266 all_ = self._match(TokenType.ALL) 3267 distinct = self._match_set(self.DISTINCT_TOKENS) 3268 else: 3269 all_, distinct = None, None 3270 3271 kind = ( 3272 self._match(TokenType.ALIAS) 3273 and self._match_texts(("STRUCT", "VALUE")) 3274 and self._prev.text.upper() 3275 ) 3276 3277 if distinct: 3278 distinct = self.expression( 3279 exp.Distinct, 3280 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3281 ) 3282 3283 if all_ and distinct: 3284 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3285 3286 operation_modifiers = [] 3287 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3288 operation_modifiers.append(exp.var(self._prev.text.upper())) 3289 3290 limit = self._parse_limit(top=True) 3291 projections = self._parse_projections() 3292 3293 this = self.expression( 3294 exp.Select, 3295 kind=kind, 3296 hint=hint, 3297 distinct=distinct, 3298 expressions=projections, 3299 limit=limit, 3300 operation_modifiers=operation_modifiers or None, 3301 ) 3302 this.comments = comments 3303 3304 into = self._parse_into() 3305 if into: 3306 this.set("into", into) 3307 3308 if not from_: 3309 from_ = self._parse_from() 3310 3311 if from_: 3312 this.set("from", from_) 3313 3314 this = self._parse_query_modifiers(this) 3315 elif (table or nested) and self._match(TokenType.L_PAREN): 3316 this = self._parse_wrapped_select(table=table) 3317 3318 # We return early here so that the UNION isn't attached to the subquery by the 3319 # following call to _parse_set_operations, but instead becomes the parent node 3320 self._match_r_paren() 3321 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3322 elif self._match(TokenType.VALUES, advance=False): 3323 this = self._parse_derived_table_values() 3324 elif from_: 3325 this = exp.select("*").from_(from_.this, copy=False) 3326 elif self._match(TokenType.SUMMARIZE): 3327 table = self._match(TokenType.TABLE) 3328 this = self._parse_select() or self._parse_string() or self._parse_table() 3329 return self.expression(exp.Summarize, this=this, table=table) 3330 elif self._match(TokenType.DESCRIBE): 3331 this = self._parse_describe() 3332 elif self._match_text_seq("STREAM"): 3333 this = self._parse_function() 3334 if this: 3335 this = self.expression(exp.Stream, this=this) 3336 else: 3337 self._retreat(self._index - 1) 3338 else: 3339 this = None 3340 3341 return self._parse_set_operations(this) if parse_set_operation else this 3342 3343 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3344 self._match_text_seq("SEARCH") 3345 3346 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3347 3348 if not kind: 3349 return None 3350 3351 self._match_text_seq("FIRST", "BY") 3352 3353 return self.expression( 3354 exp.RecursiveWithSearch, 3355 kind=kind, 3356 this=self._parse_id_var(), 3357 expression=self._match_text_seq("SET") and self._parse_id_var(), 3358 using=self._match_text_seq("USING") and self._parse_id_var(), 3359 ) 3360 3361 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3362 if not skip_with_token and not self._match(TokenType.WITH): 3363 return None 3364 3365 comments = self._prev_comments 3366 recursive = self._match(TokenType.RECURSIVE) 3367 3368 last_comments = None 3369 expressions = [] 3370 while True: 3371 cte = self._parse_cte() 3372 if isinstance(cte, exp.CTE): 3373 expressions.append(cte) 3374 if last_comments: 3375 cte.add_comments(last_comments) 3376 3377 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3378 break 3379 else: 3380 self._match(TokenType.WITH) 3381 3382 last_comments = self._prev_comments 3383 3384 return self.expression( 3385 exp.With, 3386 comments=comments, 3387 expressions=expressions, 3388 recursive=recursive, 3389 search=self._parse_recursive_with_search(), 3390 ) 3391 3392 def _parse_cte(self) -> t.Optional[exp.CTE]: 3393 index = self._index 3394 3395 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3396 if not alias or not alias.this: 3397 self.raise_error("Expected CTE to have alias") 3398 3399 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3400 self._retreat(index) 3401 return None 3402 3403 comments = self._prev_comments 3404 3405 if self._match_text_seq("NOT", "MATERIALIZED"): 3406 materialized = False 3407 elif self._match_text_seq("MATERIALIZED"): 3408 materialized = True 3409 else: 3410 materialized = None 3411 3412 cte = self.expression( 3413 exp.CTE, 3414 this=self._parse_wrapped(self._parse_statement), 3415 alias=alias, 3416 materialized=materialized, 3417 comments=comments, 3418 ) 3419 3420 values = cte.this 3421 if isinstance(values, exp.Values): 3422 if values.alias: 3423 cte.set("this", exp.select("*").from_(values)) 3424 else: 3425 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3426 3427 return cte 3428 3429 def _parse_table_alias( 3430 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3431 ) -> t.Optional[exp.TableAlias]: 3432 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3433 # so this section tries to parse the clause version and if it fails, it treats the token 3434 # as an identifier (alias) 3435 if self._can_parse_limit_or_offset(): 3436 return None 3437 3438 any_token = self._match(TokenType.ALIAS) 3439 alias = ( 3440 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3441 or self._parse_string_as_identifier() 3442 ) 3443 3444 index = self._index 3445 if self._match(TokenType.L_PAREN): 3446 columns = self._parse_csv(self._parse_function_parameter) 3447 self._match_r_paren() if columns else self._retreat(index) 3448 else: 3449 columns = None 3450 3451 if not alias and not columns: 3452 return None 3453 3454 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3455 3456 # We bubble up comments from the Identifier to the TableAlias 3457 if isinstance(alias, exp.Identifier): 3458 table_alias.add_comments(alias.pop_comments()) 3459 3460 return table_alias 3461 3462 def _parse_subquery( 3463 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3464 ) -> t.Optional[exp.Subquery]: 3465 if not this: 3466 return None 3467 3468 return self.expression( 3469 exp.Subquery, 3470 this=this, 3471 pivots=self._parse_pivots(), 3472 alias=self._parse_table_alias() if parse_alias else None, 3473 sample=self._parse_table_sample(), 3474 ) 3475 3476 def _implicit_unnests_to_explicit(self, this: E) -> E: 3477 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3478 3479 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3480 for i, join in enumerate(this.args.get("joins") or []): 3481 table = join.this 3482 normalized_table = table.copy() 3483 normalized_table.meta["maybe_column"] = True 3484 normalized_table = _norm(normalized_table, dialect=self.dialect) 3485 3486 if isinstance(table, exp.Table) and not join.args.get("on"): 3487 if normalized_table.parts[0].name in refs: 3488 table_as_column = table.to_column() 3489 unnest = exp.Unnest(expressions=[table_as_column]) 3490 3491 # Table.to_column creates a parent Alias node that we want to convert to 3492 # a TableAlias and attach to the Unnest, so it matches the parser's output 3493 if isinstance(table.args.get("alias"), exp.TableAlias): 3494 table_as_column.replace(table_as_column.this) 3495 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3496 3497 table.replace(unnest) 3498 3499 refs.add(normalized_table.alias_or_name) 3500 3501 return this 3502 3503 def _parse_query_modifiers( 3504 self, this: t.Optional[exp.Expression] 3505 ) -> t.Optional[exp.Expression]: 3506 if isinstance(this, self.MODIFIABLES): 3507 for join in self._parse_joins(): 3508 this.append("joins", join) 3509 for lateral in iter(self._parse_lateral, None): 3510 this.append("laterals", lateral) 3511 3512 while True: 3513 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3514 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3515 key, expression = parser(self) 3516 3517 if expression: 3518 this.set(key, expression) 3519 if key == "limit": 3520 offset = expression.args.pop("offset", None) 3521 3522 if offset: 3523 offset = exp.Offset(expression=offset) 3524 this.set("offset", offset) 3525 3526 limit_by_expressions = expression.expressions 3527 expression.set("expressions", None) 3528 offset.set("expressions", limit_by_expressions) 3529 continue 3530 break 3531 3532 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3533 this = self._implicit_unnests_to_explicit(this) 3534 3535 return this 3536 3537 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3538 start = self._curr 3539 while self._curr: 3540 self._advance() 3541 3542 end = self._tokens[self._index - 1] 3543 return exp.Hint(expressions=[self._find_sql(start, end)]) 3544 3545 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3546 return self._parse_function_call() 3547 3548 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3549 start_index = self._index 3550 should_fallback_to_string = False 3551 3552 hints = [] 3553 try: 3554 for hint in iter( 3555 lambda: self._parse_csv( 3556 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3557 ), 3558 [], 3559 ): 3560 hints.extend(hint) 3561 except ParseError: 3562 should_fallback_to_string = True 3563 3564 if should_fallback_to_string or self._curr: 3565 self._retreat(start_index) 3566 return self._parse_hint_fallback_to_string() 3567 3568 return self.expression(exp.Hint, expressions=hints) 3569 3570 def _parse_hint(self) -> t.Optional[exp.Hint]: 3571 if self._match(TokenType.HINT) and self._prev_comments: 3572 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3573 3574 return None 3575 3576 def _parse_into(self) -> t.Optional[exp.Into]: 3577 if not self._match(TokenType.INTO): 3578 return None 3579 3580 temp = self._match(TokenType.TEMPORARY) 3581 unlogged = self._match_text_seq("UNLOGGED") 3582 self._match(TokenType.TABLE) 3583 3584 return self.expression( 3585 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3586 ) 3587 3588 def _parse_from( 3589 self, 3590 joins: bool = False, 3591 skip_from_token: bool = False, 3592 consume_pipe: bool = False, 3593 ) -> t.Optional[exp.From]: 3594 if not skip_from_token and not self._match(TokenType.FROM): 3595 return None 3596 3597 return self.expression( 3598 exp.From, 3599 comments=self._prev_comments, 3600 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3601 ) 3602 3603 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3604 return self.expression( 3605 exp.MatchRecognizeMeasure, 3606 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3607 this=self._parse_expression(), 3608 ) 3609 3610 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3611 if not self._match(TokenType.MATCH_RECOGNIZE): 3612 return None 3613 3614 self._match_l_paren() 3615 3616 partition = self._parse_partition_by() 3617 order = self._parse_order() 3618 3619 measures = ( 3620 self._parse_csv(self._parse_match_recognize_measure) 3621 if self._match_text_seq("MEASURES") 3622 else None 3623 ) 3624 3625 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3626 rows = exp.var("ONE ROW PER MATCH") 3627 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3628 text = "ALL ROWS PER MATCH" 3629 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3630 text += " SHOW EMPTY MATCHES" 3631 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3632 text += " OMIT EMPTY MATCHES" 3633 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3634 text += " WITH UNMATCHED ROWS" 3635 rows = exp.var(text) 3636 else: 3637 rows = None 3638 3639 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3640 text = "AFTER MATCH SKIP" 3641 if self._match_text_seq("PAST", "LAST", "ROW"): 3642 text += " PAST LAST ROW" 3643 elif self._match_text_seq("TO", "NEXT", "ROW"): 3644 text += " TO NEXT ROW" 3645 elif self._match_text_seq("TO", "FIRST"): 3646 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3647 elif self._match_text_seq("TO", "LAST"): 3648 text += f" TO LAST {self._advance_any().text}" # type: ignore 3649 after = exp.var(text) 3650 else: 3651 after = None 3652 3653 if self._match_text_seq("PATTERN"): 3654 self._match_l_paren() 3655 3656 if not self._curr: 3657 self.raise_error("Expecting )", self._curr) 3658 3659 paren = 1 3660 start = self._curr 3661 3662 while self._curr and paren > 0: 3663 if self._curr.token_type == TokenType.L_PAREN: 3664 paren += 1 3665 if self._curr.token_type == TokenType.R_PAREN: 3666 paren -= 1 3667 3668 end = self._prev 3669 self._advance() 3670 3671 if paren > 0: 3672 self.raise_error("Expecting )", self._curr) 3673 3674 pattern = exp.var(self._find_sql(start, end)) 3675 else: 3676 pattern = None 3677 3678 define = ( 3679 self._parse_csv(self._parse_name_as_expression) 3680 if self._match_text_seq("DEFINE") 3681 else None 3682 ) 3683 3684 self._match_r_paren() 3685 3686 return self.expression( 3687 exp.MatchRecognize, 3688 partition_by=partition, 3689 order=order, 3690 measures=measures, 3691 rows=rows, 3692 after=after, 3693 pattern=pattern, 3694 define=define, 3695 alias=self._parse_table_alias(), 3696 ) 3697 3698 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3699 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3700 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3701 cross_apply = False 3702 3703 if cross_apply is not None: 3704 this = self._parse_select(table=True) 3705 view = None 3706 outer = None 3707 elif self._match(TokenType.LATERAL): 3708 this = self._parse_select(table=True) 3709 view = self._match(TokenType.VIEW) 3710 outer = self._match(TokenType.OUTER) 3711 else: 3712 return None 3713 3714 if not this: 3715 this = ( 3716 self._parse_unnest() 3717 or self._parse_function() 3718 or self._parse_id_var(any_token=False) 3719 ) 3720 3721 while self._match(TokenType.DOT): 3722 this = exp.Dot( 3723 this=this, 3724 expression=self._parse_function() or self._parse_id_var(any_token=False), 3725 ) 3726 3727 ordinality: t.Optional[bool] = None 3728 3729 if view: 3730 table = self._parse_id_var(any_token=False) 3731 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3732 table_alias: t.Optional[exp.TableAlias] = self.expression( 3733 exp.TableAlias, this=table, columns=columns 3734 ) 3735 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3736 # We move the alias from the lateral's child node to the lateral itself 3737 table_alias = this.args["alias"].pop() 3738 else: 3739 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3740 table_alias = self._parse_table_alias() 3741 3742 return self.expression( 3743 exp.Lateral, 3744 this=this, 3745 view=view, 3746 outer=outer, 3747 alias=table_alias, 3748 cross_apply=cross_apply, 3749 ordinality=ordinality, 3750 ) 3751 3752 def _parse_join_parts( 3753 self, 3754 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3755 return ( 3756 self._match_set(self.JOIN_METHODS) and self._prev, 3757 self._match_set(self.JOIN_SIDES) and self._prev, 3758 self._match_set(self.JOIN_KINDS) and self._prev, 3759 ) 3760 3761 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3762 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3763 this = self._parse_column() 3764 if isinstance(this, exp.Column): 3765 return this.this 3766 return this 3767 3768 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3769 3770 def _parse_join( 3771 self, skip_join_token: bool = False, parse_bracket: bool = False 3772 ) -> t.Optional[exp.Join]: 3773 if self._match(TokenType.COMMA): 3774 table = self._try_parse(self._parse_table) 3775 cross_join = self.expression(exp.Join, this=table) if table else None 3776 3777 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3778 cross_join.set("kind", "CROSS") 3779 3780 return cross_join 3781 3782 index = self._index 3783 method, side, kind = self._parse_join_parts() 3784 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3785 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3786 join_comments = self._prev_comments 3787 3788 if not skip_join_token and not join: 3789 self._retreat(index) 3790 kind = None 3791 method = None 3792 side = None 3793 3794 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3795 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3796 3797 if not skip_join_token and not join and not outer_apply and not cross_apply: 3798 return None 3799 3800 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3801 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3802 kwargs["expressions"] = self._parse_csv( 3803 lambda: self._parse_table(parse_bracket=parse_bracket) 3804 ) 3805 3806 if method: 3807 kwargs["method"] = method.text 3808 if side: 3809 kwargs["side"] = side.text 3810 if kind: 3811 kwargs["kind"] = kind.text 3812 if hint: 3813 kwargs["hint"] = hint 3814 3815 if self._match(TokenType.MATCH_CONDITION): 3816 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3817 3818 if self._match(TokenType.ON): 3819 kwargs["on"] = self._parse_assignment() 3820 elif self._match(TokenType.USING): 3821 kwargs["using"] = self._parse_using_identifiers() 3822 elif ( 3823 not (outer_apply or cross_apply) 3824 and not isinstance(kwargs["this"], exp.Unnest) 3825 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3826 ): 3827 index = self._index 3828 joins: t.Optional[list] = list(self._parse_joins()) 3829 3830 if joins and self._match(TokenType.ON): 3831 kwargs["on"] = self._parse_assignment() 3832 elif joins and self._match(TokenType.USING): 3833 kwargs["using"] = self._parse_using_identifiers() 3834 else: 3835 joins = None 3836 self._retreat(index) 3837 3838 kwargs["this"].set("joins", joins if joins else None) 3839 3840 kwargs["pivots"] = self._parse_pivots() 3841 3842 comments = [c for token in (method, side, kind) if token for c in token.comments] 3843 comments = (join_comments or []) + comments 3844 return self.expression(exp.Join, comments=comments, **kwargs) 3845 3846 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3847 this = self._parse_assignment() 3848 3849 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3850 return this 3851 3852 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3853 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3854 3855 return this 3856 3857 def _parse_index_params(self) -> exp.IndexParameters: 3858 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3859 3860 if self._match(TokenType.L_PAREN, advance=False): 3861 columns = self._parse_wrapped_csv(self._parse_with_operator) 3862 else: 3863 columns = None 3864 3865 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3866 partition_by = self._parse_partition_by() 3867 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3868 tablespace = ( 3869 self._parse_var(any_token=True) 3870 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3871 else None 3872 ) 3873 where = self._parse_where() 3874 3875 on = self._parse_field() if self._match(TokenType.ON) else None 3876 3877 return self.expression( 3878 exp.IndexParameters, 3879 using=using, 3880 columns=columns, 3881 include=include, 3882 partition_by=partition_by, 3883 where=where, 3884 with_storage=with_storage, 3885 tablespace=tablespace, 3886 on=on, 3887 ) 3888 3889 def _parse_index( 3890 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3891 ) -> t.Optional[exp.Index]: 3892 if index or anonymous: 3893 unique = None 3894 primary = None 3895 amp = None 3896 3897 self._match(TokenType.ON) 3898 self._match(TokenType.TABLE) # hive 3899 table = self._parse_table_parts(schema=True) 3900 else: 3901 unique = self._match(TokenType.UNIQUE) 3902 primary = self._match_text_seq("PRIMARY") 3903 amp = self._match_text_seq("AMP") 3904 3905 if not self._match(TokenType.INDEX): 3906 return None 3907 3908 index = self._parse_id_var() 3909 table = None 3910 3911 params = self._parse_index_params() 3912 3913 return self.expression( 3914 exp.Index, 3915 this=index, 3916 table=table, 3917 unique=unique, 3918 primary=primary, 3919 amp=amp, 3920 params=params, 3921 ) 3922 3923 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3924 hints: t.List[exp.Expression] = [] 3925 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3926 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3927 hints.append( 3928 self.expression( 3929 exp.WithTableHint, 3930 expressions=self._parse_csv( 3931 lambda: self._parse_function() or self._parse_var(any_token=True) 3932 ), 3933 ) 3934 ) 3935 self._match_r_paren() 3936 else: 3937 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3938 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3939 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3940 3941 self._match_set((TokenType.INDEX, TokenType.KEY)) 3942 if self._match(TokenType.FOR): 3943 hint.set("target", self._advance_any() and self._prev.text.upper()) 3944 3945 hint.set("expressions", self._parse_wrapped_id_vars()) 3946 hints.append(hint) 3947 3948 return hints or None 3949 3950 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3951 return ( 3952 (not schema and self._parse_function(optional_parens=False)) 3953 or self._parse_id_var(any_token=False) 3954 or self._parse_string_as_identifier() 3955 or self._parse_placeholder() 3956 ) 3957 3958 def _parse_table_parts( 3959 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3960 ) -> exp.Table: 3961 catalog = None 3962 db = None 3963 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3964 3965 while self._match(TokenType.DOT): 3966 if catalog: 3967 # This allows nesting the table in arbitrarily many dot expressions if needed 3968 table = self.expression( 3969 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3970 ) 3971 else: 3972 catalog = db 3973 db = table 3974 # "" used for tsql FROM a..b case 3975 table = self._parse_table_part(schema=schema) or "" 3976 3977 if ( 3978 wildcard 3979 and self._is_connected() 3980 and (isinstance(table, exp.Identifier) or not table) 3981 and self._match(TokenType.STAR) 3982 ): 3983 if isinstance(table, exp.Identifier): 3984 table.args["this"] += "*" 3985 else: 3986 table = exp.Identifier(this="*") 3987 3988 # We bubble up comments from the Identifier to the Table 3989 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3990 3991 if is_db_reference: 3992 catalog = db 3993 db = table 3994 table = None 3995 3996 if not table and not is_db_reference: 3997 self.raise_error(f"Expected table name but got {self._curr}") 3998 if not db and is_db_reference: 3999 self.raise_error(f"Expected database name but got {self._curr}") 4000 4001 table = self.expression( 4002 exp.Table, 4003 comments=comments, 4004 this=table, 4005 db=db, 4006 catalog=catalog, 4007 ) 4008 4009 changes = self._parse_changes() 4010 if changes: 4011 table.set("changes", changes) 4012 4013 at_before = self._parse_historical_data() 4014 if at_before: 4015 table.set("when", at_before) 4016 4017 pivots = self._parse_pivots() 4018 if pivots: 4019 table.set("pivots", pivots) 4020 4021 return table 4022 4023 def _parse_table( 4024 self, 4025 schema: bool = False, 4026 joins: bool = False, 4027 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4028 parse_bracket: bool = False, 4029 is_db_reference: bool = False, 4030 parse_partition: bool = False, 4031 consume_pipe: bool = False, 4032 ) -> t.Optional[exp.Expression]: 4033 lateral = self._parse_lateral() 4034 if lateral: 4035 return lateral 4036 4037 unnest = self._parse_unnest() 4038 if unnest: 4039 return unnest 4040 4041 values = self._parse_derived_table_values() 4042 if values: 4043 return values 4044 4045 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4046 if subquery: 4047 if not subquery.args.get("pivots"): 4048 subquery.set("pivots", self._parse_pivots()) 4049 return subquery 4050 4051 bracket = parse_bracket and self._parse_bracket(None) 4052 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4053 4054 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4055 self._parse_table 4056 ) 4057 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4058 4059 only = self._match(TokenType.ONLY) 4060 4061 this = t.cast( 4062 exp.Expression, 4063 bracket 4064 or rows_from 4065 or self._parse_bracket( 4066 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4067 ), 4068 ) 4069 4070 if only: 4071 this.set("only", only) 4072 4073 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4074 self._match_text_seq("*") 4075 4076 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4077 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4078 this.set("partition", self._parse_partition()) 4079 4080 if schema: 4081 return self._parse_schema(this=this) 4082 4083 version = self._parse_version() 4084 4085 if version: 4086 this.set("version", version) 4087 4088 if self.dialect.ALIAS_POST_TABLESAMPLE: 4089 this.set("sample", self._parse_table_sample()) 4090 4091 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4092 if alias: 4093 this.set("alias", alias) 4094 4095 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4096 return self.expression( 4097 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4098 ) 4099 4100 this.set("hints", self._parse_table_hints()) 4101 4102 if not this.args.get("pivots"): 4103 this.set("pivots", self._parse_pivots()) 4104 4105 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4106 this.set("sample", self._parse_table_sample()) 4107 4108 if joins: 4109 for join in self._parse_joins(): 4110 this.append("joins", join) 4111 4112 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4113 this.set("ordinality", True) 4114 this.set("alias", self._parse_table_alias()) 4115 4116 return this 4117 4118 def _parse_version(self) -> t.Optional[exp.Version]: 4119 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4120 this = "TIMESTAMP" 4121 elif self._match(TokenType.VERSION_SNAPSHOT): 4122 this = "VERSION" 4123 else: 4124 return None 4125 4126 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4127 kind = self._prev.text.upper() 4128 start = self._parse_bitwise() 4129 self._match_texts(("TO", "AND")) 4130 end = self._parse_bitwise() 4131 expression: t.Optional[exp.Expression] = self.expression( 4132 exp.Tuple, expressions=[start, end] 4133 ) 4134 elif self._match_text_seq("CONTAINED", "IN"): 4135 kind = "CONTAINED IN" 4136 expression = self.expression( 4137 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4138 ) 4139 elif self._match(TokenType.ALL): 4140 kind = "ALL" 4141 expression = None 4142 else: 4143 self._match_text_seq("AS", "OF") 4144 kind = "AS OF" 4145 expression = self._parse_type() 4146 4147 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4148 4149 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4150 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4151 index = self._index 4152 historical_data = None 4153 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4154 this = self._prev.text.upper() 4155 kind = ( 4156 self._match(TokenType.L_PAREN) 4157 and self._match_texts(self.HISTORICAL_DATA_KIND) 4158 and self._prev.text.upper() 4159 ) 4160 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4161 4162 if expression: 4163 self._match_r_paren() 4164 historical_data = self.expression( 4165 exp.HistoricalData, this=this, kind=kind, expression=expression 4166 ) 4167 else: 4168 self._retreat(index) 4169 4170 return historical_data 4171 4172 def _parse_changes(self) -> t.Optional[exp.Changes]: 4173 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4174 return None 4175 4176 information = self._parse_var(any_token=True) 4177 self._match_r_paren() 4178 4179 return self.expression( 4180 exp.Changes, 4181 information=information, 4182 at_before=self._parse_historical_data(), 4183 end=self._parse_historical_data(), 4184 ) 4185 4186 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4187 if not self._match(TokenType.UNNEST): 4188 return None 4189 4190 expressions = self._parse_wrapped_csv(self._parse_equality) 4191 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4192 4193 alias = self._parse_table_alias() if with_alias else None 4194 4195 if alias: 4196 if self.dialect.UNNEST_COLUMN_ONLY: 4197 if alias.args.get("columns"): 4198 self.raise_error("Unexpected extra column alias in unnest.") 4199 4200 alias.set("columns", [alias.this]) 4201 alias.set("this", None) 4202 4203 columns = alias.args.get("columns") or [] 4204 if offset and len(expressions) < len(columns): 4205 offset = columns.pop() 4206 4207 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4208 self._match(TokenType.ALIAS) 4209 offset = self._parse_id_var( 4210 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4211 ) or exp.to_identifier("offset") 4212 4213 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4214 4215 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4216 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4217 if not is_derived and not ( 4218 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4219 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4220 ): 4221 return None 4222 4223 expressions = self._parse_csv(self._parse_value) 4224 alias = self._parse_table_alias() 4225 4226 if is_derived: 4227 self._match_r_paren() 4228 4229 return self.expression( 4230 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4231 ) 4232 4233 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4234 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4235 as_modifier and self._match_text_seq("USING", "SAMPLE") 4236 ): 4237 return None 4238 4239 bucket_numerator = None 4240 bucket_denominator = None 4241 bucket_field = None 4242 percent = None 4243 size = None 4244 seed = None 4245 4246 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4247 matched_l_paren = self._match(TokenType.L_PAREN) 4248 4249 if self.TABLESAMPLE_CSV: 4250 num = None 4251 expressions = self._parse_csv(self._parse_primary) 4252 else: 4253 expressions = None 4254 num = ( 4255 self._parse_factor() 4256 if self._match(TokenType.NUMBER, advance=False) 4257 else self._parse_primary() or self._parse_placeholder() 4258 ) 4259 4260 if self._match_text_seq("BUCKET"): 4261 bucket_numerator = self._parse_number() 4262 self._match_text_seq("OUT", "OF") 4263 bucket_denominator = bucket_denominator = self._parse_number() 4264 self._match(TokenType.ON) 4265 bucket_field = self._parse_field() 4266 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4267 percent = num 4268 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4269 size = num 4270 else: 4271 percent = num 4272 4273 if matched_l_paren: 4274 self._match_r_paren() 4275 4276 if self._match(TokenType.L_PAREN): 4277 method = self._parse_var(upper=True) 4278 seed = self._match(TokenType.COMMA) and self._parse_number() 4279 self._match_r_paren() 4280 elif self._match_texts(("SEED", "REPEATABLE")): 4281 seed = self._parse_wrapped(self._parse_number) 4282 4283 if not method and self.DEFAULT_SAMPLING_METHOD: 4284 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4285 4286 return self.expression( 4287 exp.TableSample, 4288 expressions=expressions, 4289 method=method, 4290 bucket_numerator=bucket_numerator, 4291 bucket_denominator=bucket_denominator, 4292 bucket_field=bucket_field, 4293 percent=percent, 4294 size=size, 4295 seed=seed, 4296 ) 4297 4298 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4299 return list(iter(self._parse_pivot, None)) or None 4300 4301 def _parse_joins(self) -> t.Iterator[exp.Join]: 4302 return iter(self._parse_join, None) 4303 4304 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4305 if not self._match(TokenType.INTO): 4306 return None 4307 4308 return self.expression( 4309 exp.UnpivotColumns, 4310 this=self._match_text_seq("NAME") and self._parse_column(), 4311 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4312 ) 4313 4314 # https://duckdb.org/docs/sql/statements/pivot 4315 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4316 def _parse_on() -> t.Optional[exp.Expression]: 4317 this = self._parse_bitwise() 4318 4319 if self._match(TokenType.IN): 4320 # PIVOT ... ON col IN (row_val1, row_val2) 4321 return self._parse_in(this) 4322 if self._match(TokenType.ALIAS, advance=False): 4323 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4324 return self._parse_alias(this) 4325 4326 return this 4327 4328 this = self._parse_table() 4329 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4330 into = self._parse_unpivot_columns() 4331 using = self._match(TokenType.USING) and self._parse_csv( 4332 lambda: self._parse_alias(self._parse_function()) 4333 ) 4334 group = self._parse_group() 4335 4336 return self.expression( 4337 exp.Pivot, 4338 this=this, 4339 expressions=expressions, 4340 using=using, 4341 group=group, 4342 unpivot=is_unpivot, 4343 into=into, 4344 ) 4345 4346 def _parse_pivot_in(self) -> exp.In: 4347 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4348 this = self._parse_select_or_expression() 4349 4350 self._match(TokenType.ALIAS) 4351 alias = self._parse_bitwise() 4352 if alias: 4353 if isinstance(alias, exp.Column) and not alias.db: 4354 alias = alias.this 4355 return self.expression(exp.PivotAlias, this=this, alias=alias) 4356 4357 return this 4358 4359 value = self._parse_column() 4360 4361 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4362 self.raise_error("Expecting IN (") 4363 4364 if self._match(TokenType.ANY): 4365 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4366 else: 4367 exprs = self._parse_csv(_parse_aliased_expression) 4368 4369 self._match_r_paren() 4370 return self.expression(exp.In, this=value, expressions=exprs) 4371 4372 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4373 func = self._parse_function() 4374 if not func: 4375 self.raise_error("Expecting an aggregation function in PIVOT") 4376 4377 return self._parse_alias(func) 4378 4379 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4380 index = self._index 4381 include_nulls = None 4382 4383 if self._match(TokenType.PIVOT): 4384 unpivot = False 4385 elif self._match(TokenType.UNPIVOT): 4386 unpivot = True 4387 4388 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4389 if self._match_text_seq("INCLUDE", "NULLS"): 4390 include_nulls = True 4391 elif self._match_text_seq("EXCLUDE", "NULLS"): 4392 include_nulls = False 4393 else: 4394 return None 4395 4396 expressions = [] 4397 4398 if not self._match(TokenType.L_PAREN): 4399 self._retreat(index) 4400 return None 4401 4402 if unpivot: 4403 expressions = self._parse_csv(self._parse_column) 4404 else: 4405 expressions = self._parse_csv(self._parse_pivot_aggregation) 4406 4407 if not expressions: 4408 self.raise_error("Failed to parse PIVOT's aggregation list") 4409 4410 if not self._match(TokenType.FOR): 4411 self.raise_error("Expecting FOR") 4412 4413 fields = [] 4414 while True: 4415 field = self._try_parse(self._parse_pivot_in) 4416 if not field: 4417 break 4418 fields.append(field) 4419 4420 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4421 self._parse_bitwise 4422 ) 4423 4424 group = self._parse_group() 4425 4426 self._match_r_paren() 4427 4428 pivot = self.expression( 4429 exp.Pivot, 4430 expressions=expressions, 4431 fields=fields, 4432 unpivot=unpivot, 4433 include_nulls=include_nulls, 4434 default_on_null=default_on_null, 4435 group=group, 4436 ) 4437 4438 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4439 pivot.set("alias", self._parse_table_alias()) 4440 4441 if not unpivot: 4442 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4443 4444 columns: t.List[exp.Expression] = [] 4445 all_fields = [] 4446 for pivot_field in pivot.fields: 4447 pivot_field_expressions = pivot_field.expressions 4448 4449 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4450 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4451 continue 4452 4453 all_fields.append( 4454 [ 4455 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4456 for fld in pivot_field_expressions 4457 ] 4458 ) 4459 4460 if all_fields: 4461 if names: 4462 all_fields.append(names) 4463 4464 # Generate all possible combinations of the pivot columns 4465 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4466 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4467 for fld_parts_tuple in itertools.product(*all_fields): 4468 fld_parts = list(fld_parts_tuple) 4469 4470 if names and self.PREFIXED_PIVOT_COLUMNS: 4471 # Move the "name" to the front of the list 4472 fld_parts.insert(0, fld_parts.pop(-1)) 4473 4474 columns.append(exp.to_identifier("_".join(fld_parts))) 4475 4476 pivot.set("columns", columns) 4477 4478 return pivot 4479 4480 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4481 return [agg.alias for agg in aggregations if agg.alias] 4482 4483 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4484 if not skip_where_token and not self._match(TokenType.PREWHERE): 4485 return None 4486 4487 return self.expression( 4488 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4489 ) 4490 4491 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4492 if not skip_where_token and not self._match(TokenType.WHERE): 4493 return None 4494 4495 return self.expression( 4496 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4497 ) 4498 4499 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4500 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4501 return None 4502 comments = self._prev_comments 4503 4504 elements: t.Dict[str, t.Any] = defaultdict(list) 4505 4506 if self._match(TokenType.ALL): 4507 elements["all"] = True 4508 elif self._match(TokenType.DISTINCT): 4509 elements["all"] = False 4510 4511 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4512 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4513 4514 while True: 4515 index = self._index 4516 4517 elements["expressions"].extend( 4518 self._parse_csv( 4519 lambda: None 4520 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4521 else self._parse_assignment() 4522 ) 4523 ) 4524 4525 before_with_index = self._index 4526 with_prefix = self._match(TokenType.WITH) 4527 4528 if self._match(TokenType.ROLLUP): 4529 elements["rollup"].append( 4530 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4531 ) 4532 elif self._match(TokenType.CUBE): 4533 elements["cube"].append( 4534 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4535 ) 4536 elif self._match(TokenType.GROUPING_SETS): 4537 elements["grouping_sets"].append( 4538 self.expression( 4539 exp.GroupingSets, 4540 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4541 ) 4542 ) 4543 elif self._match_text_seq("TOTALS"): 4544 elements["totals"] = True # type: ignore 4545 4546 if before_with_index <= self._index <= before_with_index + 1: 4547 self._retreat(before_with_index) 4548 break 4549 4550 if index == self._index: 4551 break 4552 4553 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4554 4555 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4556 return self.expression( 4557 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4558 ) 4559 4560 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4561 if self._match(TokenType.L_PAREN): 4562 grouping_set = self._parse_csv(self._parse_column) 4563 self._match_r_paren() 4564 return self.expression(exp.Tuple, expressions=grouping_set) 4565 4566 return self._parse_column() 4567 4568 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4569 if not skip_having_token and not self._match(TokenType.HAVING): 4570 return None 4571 return self.expression( 4572 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4573 ) 4574 4575 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4576 if not self._match(TokenType.QUALIFY): 4577 return None 4578 return self.expression(exp.Qualify, this=self._parse_assignment()) 4579 4580 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4581 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4582 exp.Prior, this=self._parse_bitwise() 4583 ) 4584 connect = self._parse_assignment() 4585 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4586 return connect 4587 4588 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4589 if skip_start_token: 4590 start = None 4591 elif self._match(TokenType.START_WITH): 4592 start = self._parse_assignment() 4593 else: 4594 return None 4595 4596 self._match(TokenType.CONNECT_BY) 4597 nocycle = self._match_text_seq("NOCYCLE") 4598 connect = self._parse_connect_with_prior() 4599 4600 if not start and self._match(TokenType.START_WITH): 4601 start = self._parse_assignment() 4602 4603 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4604 4605 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4606 this = self._parse_id_var(any_token=True) 4607 if self._match(TokenType.ALIAS): 4608 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4609 return this 4610 4611 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4612 if self._match_text_seq("INTERPOLATE"): 4613 return self._parse_wrapped_csv(self._parse_name_as_expression) 4614 return None 4615 4616 def _parse_order( 4617 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4618 ) -> t.Optional[exp.Expression]: 4619 siblings = None 4620 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4621 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4622 return this 4623 4624 siblings = True 4625 4626 return self.expression( 4627 exp.Order, 4628 comments=self._prev_comments, 4629 this=this, 4630 expressions=self._parse_csv(self._parse_ordered), 4631 siblings=siblings, 4632 ) 4633 4634 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4635 if not self._match(token): 4636 return None 4637 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4638 4639 def _parse_ordered( 4640 self, parse_method: t.Optional[t.Callable] = None 4641 ) -> t.Optional[exp.Ordered]: 4642 this = parse_method() if parse_method else self._parse_assignment() 4643 if not this: 4644 return None 4645 4646 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4647 this = exp.var("ALL") 4648 4649 asc = self._match(TokenType.ASC) 4650 desc = self._match(TokenType.DESC) or (asc and False) 4651 4652 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4653 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4654 4655 nulls_first = is_nulls_first or False 4656 explicitly_null_ordered = is_nulls_first or is_nulls_last 4657 4658 if ( 4659 not explicitly_null_ordered 4660 and ( 4661 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4662 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4663 ) 4664 and self.dialect.NULL_ORDERING != "nulls_are_last" 4665 ): 4666 nulls_first = True 4667 4668 if self._match_text_seq("WITH", "FILL"): 4669 with_fill = self.expression( 4670 exp.WithFill, 4671 **{ # type: ignore 4672 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4673 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4674 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4675 "interpolate": self._parse_interpolate(), 4676 }, 4677 ) 4678 else: 4679 with_fill = None 4680 4681 return self.expression( 4682 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4683 ) 4684 4685 def _parse_limit_options(self) -> exp.LimitOptions: 4686 percent = self._match(TokenType.PERCENT) 4687 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4688 self._match_text_seq("ONLY") 4689 with_ties = self._match_text_seq("WITH", "TIES") 4690 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4691 4692 def _parse_limit( 4693 self, 4694 this: t.Optional[exp.Expression] = None, 4695 top: bool = False, 4696 skip_limit_token: bool = False, 4697 ) -> t.Optional[exp.Expression]: 4698 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4699 comments = self._prev_comments 4700 if top: 4701 limit_paren = self._match(TokenType.L_PAREN) 4702 expression = self._parse_term() if limit_paren else self._parse_number() 4703 4704 if limit_paren: 4705 self._match_r_paren() 4706 4707 limit_options = self._parse_limit_options() 4708 else: 4709 limit_options = None 4710 expression = self._parse_term() 4711 4712 if self._match(TokenType.COMMA): 4713 offset = expression 4714 expression = self._parse_term() 4715 else: 4716 offset = None 4717 4718 limit_exp = self.expression( 4719 exp.Limit, 4720 this=this, 4721 expression=expression, 4722 offset=offset, 4723 comments=comments, 4724 limit_options=limit_options, 4725 expressions=self._parse_limit_by(), 4726 ) 4727 4728 return limit_exp 4729 4730 if self._match(TokenType.FETCH): 4731 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4732 direction = self._prev.text.upper() if direction else "FIRST" 4733 4734 count = self._parse_field(tokens=self.FETCH_TOKENS) 4735 4736 return self.expression( 4737 exp.Fetch, 4738 direction=direction, 4739 count=count, 4740 limit_options=self._parse_limit_options(), 4741 ) 4742 4743 return this 4744 4745 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4746 if not self._match(TokenType.OFFSET): 4747 return this 4748 4749 count = self._parse_term() 4750 self._match_set((TokenType.ROW, TokenType.ROWS)) 4751 4752 return self.expression( 4753 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4754 ) 4755 4756 def _can_parse_limit_or_offset(self) -> bool: 4757 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4758 return False 4759 4760 index = self._index 4761 result = bool( 4762 self._try_parse(self._parse_limit, retreat=True) 4763 or self._try_parse(self._parse_offset, retreat=True) 4764 ) 4765 self._retreat(index) 4766 return result 4767 4768 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4769 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4770 4771 def _parse_locks(self) -> t.List[exp.Lock]: 4772 locks = [] 4773 while True: 4774 update, key = None, None 4775 if self._match_text_seq("FOR", "UPDATE"): 4776 update = True 4777 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4778 "LOCK", "IN", "SHARE", "MODE" 4779 ): 4780 update = False 4781 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4782 update, key = False, True 4783 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4784 update, key = True, True 4785 else: 4786 break 4787 4788 expressions = None 4789 if self._match_text_seq("OF"): 4790 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4791 4792 wait: t.Optional[bool | exp.Expression] = None 4793 if self._match_text_seq("NOWAIT"): 4794 wait = True 4795 elif self._match_text_seq("WAIT"): 4796 wait = self._parse_primary() 4797 elif self._match_text_seq("SKIP", "LOCKED"): 4798 wait = False 4799 4800 locks.append( 4801 self.expression( 4802 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4803 ) 4804 ) 4805 4806 return locks 4807 4808 def parse_set_operation( 4809 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4810 ) -> t.Optional[exp.Expression]: 4811 start = self._index 4812 _, side_token, kind_token = self._parse_join_parts() 4813 4814 side = side_token.text if side_token else None 4815 kind = kind_token.text if kind_token else None 4816 4817 if not self._match_set(self.SET_OPERATIONS): 4818 self._retreat(start) 4819 return None 4820 4821 token_type = self._prev.token_type 4822 4823 if token_type == TokenType.UNION: 4824 operation: t.Type[exp.SetOperation] = exp.Union 4825 elif token_type == TokenType.EXCEPT: 4826 operation = exp.Except 4827 else: 4828 operation = exp.Intersect 4829 4830 comments = self._prev.comments 4831 4832 if self._match(TokenType.DISTINCT): 4833 distinct: t.Optional[bool] = True 4834 elif self._match(TokenType.ALL): 4835 distinct = False 4836 else: 4837 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4838 if distinct is None: 4839 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4840 4841 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4842 "STRICT", "CORRESPONDING" 4843 ) 4844 if self._match_text_seq("CORRESPONDING"): 4845 by_name = True 4846 if not side and not kind: 4847 kind = "INNER" 4848 4849 on_column_list = None 4850 if by_name and self._match_texts(("ON", "BY")): 4851 on_column_list = self._parse_wrapped_csv(self._parse_column) 4852 4853 expression = self._parse_select( 4854 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4855 ) 4856 4857 return self.expression( 4858 operation, 4859 comments=comments, 4860 this=this, 4861 distinct=distinct, 4862 by_name=by_name, 4863 expression=expression, 4864 side=side, 4865 kind=kind, 4866 on=on_column_list, 4867 ) 4868 4869 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4870 while this: 4871 setop = self.parse_set_operation(this) 4872 if not setop: 4873 break 4874 this = setop 4875 4876 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4877 expression = this.expression 4878 4879 if expression: 4880 for arg in self.SET_OP_MODIFIERS: 4881 expr = expression.args.get(arg) 4882 if expr: 4883 this.set(arg, expr.pop()) 4884 4885 return this 4886 4887 def _parse_expression(self) -> t.Optional[exp.Expression]: 4888 return self._parse_alias(self._parse_assignment()) 4889 4890 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4891 this = self._parse_disjunction() 4892 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4893 # This allows us to parse <non-identifier token> := <expr> 4894 this = exp.column( 4895 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4896 ) 4897 4898 while self._match_set(self.ASSIGNMENT): 4899 if isinstance(this, exp.Column) and len(this.parts) == 1: 4900 this = this.this 4901 4902 this = self.expression( 4903 self.ASSIGNMENT[self._prev.token_type], 4904 this=this, 4905 comments=self._prev_comments, 4906 expression=self._parse_assignment(), 4907 ) 4908 4909 return this 4910 4911 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4912 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4913 4914 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4915 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4916 4917 def _parse_equality(self) -> t.Optional[exp.Expression]: 4918 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4919 4920 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4921 return self._parse_tokens(self._parse_range, self.COMPARISON) 4922 4923 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4924 this = this or self._parse_bitwise() 4925 negate = self._match(TokenType.NOT) 4926 4927 if self._match_set(self.RANGE_PARSERS): 4928 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4929 if not expression: 4930 return this 4931 4932 this = expression 4933 elif self._match(TokenType.ISNULL): 4934 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4935 4936 # Postgres supports ISNULL and NOTNULL for conditions. 4937 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4938 if self._match(TokenType.NOTNULL): 4939 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4940 this = self.expression(exp.Not, this=this) 4941 4942 if negate: 4943 this = self._negate_range(this) 4944 4945 if self._match(TokenType.IS): 4946 this = self._parse_is(this) 4947 4948 return this 4949 4950 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4951 if not this: 4952 return this 4953 4954 return self.expression(exp.Not, this=this) 4955 4956 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4957 index = self._index - 1 4958 negate = self._match(TokenType.NOT) 4959 4960 if self._match_text_seq("DISTINCT", "FROM"): 4961 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4962 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4963 4964 if self._match(TokenType.JSON): 4965 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4966 4967 if self._match_text_seq("WITH"): 4968 _with = True 4969 elif self._match_text_seq("WITHOUT"): 4970 _with = False 4971 else: 4972 _with = None 4973 4974 unique = self._match(TokenType.UNIQUE) 4975 self._match_text_seq("KEYS") 4976 expression: t.Optional[exp.Expression] = self.expression( 4977 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4978 ) 4979 else: 4980 expression = self._parse_primary() or self._parse_null() 4981 if not expression: 4982 self._retreat(index) 4983 return None 4984 4985 this = self.expression(exp.Is, this=this, expression=expression) 4986 return self.expression(exp.Not, this=this) if negate else this 4987 4988 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4989 unnest = self._parse_unnest(with_alias=False) 4990 if unnest: 4991 this = self.expression(exp.In, this=this, unnest=unnest) 4992 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4993 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4994 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4995 4996 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4997 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4998 else: 4999 this = self.expression(exp.In, this=this, expressions=expressions) 5000 5001 if matched_l_paren: 5002 self._match_r_paren(this) 5003 elif not self._match(TokenType.R_BRACKET, expression=this): 5004 self.raise_error("Expecting ]") 5005 else: 5006 this = self.expression(exp.In, this=this, field=self._parse_column()) 5007 5008 return this 5009 5010 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5011 symmetric = None 5012 if self._match_text_seq("SYMMETRIC"): 5013 symmetric = True 5014 elif self._match_text_seq("ASYMMETRIC"): 5015 symmetric = False 5016 5017 low = self._parse_bitwise() 5018 self._match(TokenType.AND) 5019 high = self._parse_bitwise() 5020 5021 return self.expression( 5022 exp.Between, 5023 this=this, 5024 low=low, 5025 high=high, 5026 symmetric=symmetric, 5027 ) 5028 5029 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5030 if not self._match(TokenType.ESCAPE): 5031 return this 5032 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5033 5034 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5035 index = self._index 5036 5037 if not self._match(TokenType.INTERVAL) and match_interval: 5038 return None 5039 5040 if self._match(TokenType.STRING, advance=False): 5041 this = self._parse_primary() 5042 else: 5043 this = self._parse_term() 5044 5045 if not this or ( 5046 isinstance(this, exp.Column) 5047 and not this.table 5048 and not this.this.quoted 5049 and this.name.upper() == "IS" 5050 ): 5051 self._retreat(index) 5052 return None 5053 5054 unit = self._parse_function() or ( 5055 not self._match(TokenType.ALIAS, advance=False) 5056 and self._parse_var(any_token=True, upper=True) 5057 ) 5058 5059 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5060 # each INTERVAL expression into this canonical form so it's easy to transpile 5061 if this and this.is_number: 5062 this = exp.Literal.string(this.to_py()) 5063 elif this and this.is_string: 5064 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5065 if parts and unit: 5066 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5067 unit = None 5068 self._retreat(self._index - 1) 5069 5070 if len(parts) == 1: 5071 this = exp.Literal.string(parts[0][0]) 5072 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5073 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5074 unit = self.expression( 5075 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5076 ) 5077 5078 interval = self.expression(exp.Interval, this=this, unit=unit) 5079 5080 index = self._index 5081 self._match(TokenType.PLUS) 5082 5083 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5084 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5085 return self.expression( 5086 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5087 ) 5088 5089 self._retreat(index) 5090 return interval 5091 5092 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5093 this = self._parse_term() 5094 5095 while True: 5096 if self._match_set(self.BITWISE): 5097 this = self.expression( 5098 self.BITWISE[self._prev.token_type], 5099 this=this, 5100 expression=self._parse_term(), 5101 ) 5102 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5103 this = self.expression( 5104 exp.DPipe, 5105 this=this, 5106 expression=self._parse_term(), 5107 safe=not self.dialect.STRICT_STRING_CONCAT, 5108 ) 5109 elif self._match(TokenType.DQMARK): 5110 this = self.expression( 5111 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5112 ) 5113 elif self._match_pair(TokenType.LT, TokenType.LT): 5114 this = self.expression( 5115 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5116 ) 5117 elif self._match_pair(TokenType.GT, TokenType.GT): 5118 this = self.expression( 5119 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5120 ) 5121 else: 5122 break 5123 5124 return this 5125 5126 def _parse_term(self) -> t.Optional[exp.Expression]: 5127 this = self._parse_factor() 5128 5129 while self._match_set(self.TERM): 5130 klass = self.TERM[self._prev.token_type] 5131 comments = self._prev_comments 5132 expression = self._parse_factor() 5133 5134 this = self.expression(klass, this=this, comments=comments, expression=expression) 5135 5136 if isinstance(this, exp.Collate): 5137 expr = this.expression 5138 5139 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5140 # fallback to Identifier / Var 5141 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5142 ident = expr.this 5143 if isinstance(ident, exp.Identifier): 5144 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5145 5146 return this 5147 5148 def _parse_factor(self) -> t.Optional[exp.Expression]: 5149 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5150 this = parse_method() 5151 5152 while self._match_set(self.FACTOR): 5153 klass = self.FACTOR[self._prev.token_type] 5154 comments = self._prev_comments 5155 expression = parse_method() 5156 5157 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5158 self._retreat(self._index - 1) 5159 return this 5160 5161 this = self.expression(klass, this=this, comments=comments, expression=expression) 5162 5163 if isinstance(this, exp.Div): 5164 this.args["typed"] = self.dialect.TYPED_DIVISION 5165 this.args["safe"] = self.dialect.SAFE_DIVISION 5166 5167 return this 5168 5169 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5170 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5171 5172 def _parse_unary(self) -> t.Optional[exp.Expression]: 5173 if self._match_set(self.UNARY_PARSERS): 5174 return self.UNARY_PARSERS[self._prev.token_type](self) 5175 return self._parse_at_time_zone(self._parse_type()) 5176 5177 def _parse_type( 5178 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5179 ) -> t.Optional[exp.Expression]: 5180 interval = parse_interval and self._parse_interval() 5181 if interval: 5182 return interval 5183 5184 index = self._index 5185 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5186 5187 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5188 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5189 if isinstance(data_type, exp.Cast): 5190 # This constructor can contain ops directly after it, for instance struct unnesting: 5191 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5192 return self._parse_column_ops(data_type) 5193 5194 if data_type: 5195 index2 = self._index 5196 this = self._parse_primary() 5197 5198 if isinstance(this, exp.Literal): 5199 literal = this.name 5200 this = self._parse_column_ops(this) 5201 5202 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5203 if parser: 5204 return parser(self, this, data_type) 5205 5206 if ( 5207 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5208 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5209 and TIME_ZONE_RE.search(literal) 5210 ): 5211 data_type = exp.DataType.build("TIMESTAMPTZ") 5212 5213 return self.expression(exp.Cast, this=this, to=data_type) 5214 5215 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5216 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5217 # 5218 # If the index difference here is greater than 1, that means the parser itself must have 5219 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5220 # 5221 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5222 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5223 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5224 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5225 # 5226 # In these cases, we don't really want to return the converted type, but instead retreat 5227 # and try to parse a Column or Identifier in the section below. 5228 if data_type.expressions and index2 - index > 1: 5229 self._retreat(index2) 5230 return self._parse_column_ops(data_type) 5231 5232 self._retreat(index) 5233 5234 if fallback_to_identifier: 5235 return self._parse_id_var() 5236 5237 this = self._parse_column() 5238 return this and self._parse_column_ops(this) 5239 5240 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5241 this = self._parse_type() 5242 if not this: 5243 return None 5244 5245 if isinstance(this, exp.Column) and not this.table: 5246 this = exp.var(this.name.upper()) 5247 5248 return self.expression( 5249 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5250 ) 5251 5252 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5253 type_name = identifier.name 5254 5255 while self._match(TokenType.DOT): 5256 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5257 5258 return exp.DataType.build(type_name, udt=True) 5259 5260 def _parse_types( 5261 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5262 ) -> t.Optional[exp.Expression]: 5263 index = self._index 5264 5265 this: t.Optional[exp.Expression] = None 5266 prefix = self._match_text_seq("SYSUDTLIB", ".") 5267 5268 if not self._match_set(self.TYPE_TOKENS): 5269 identifier = allow_identifiers and self._parse_id_var( 5270 any_token=False, tokens=(TokenType.VAR,) 5271 ) 5272 if isinstance(identifier, exp.Identifier): 5273 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5274 5275 if len(tokens) != 1: 5276 self.raise_error("Unexpected identifier", self._prev) 5277 5278 if tokens[0].token_type in self.TYPE_TOKENS: 5279 self._prev = tokens[0] 5280 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5281 this = self._parse_user_defined_type(identifier) 5282 else: 5283 self._retreat(self._index - 1) 5284 return None 5285 else: 5286 return None 5287 5288 type_token = self._prev.token_type 5289 5290 if type_token == TokenType.PSEUDO_TYPE: 5291 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5292 5293 if type_token == TokenType.OBJECT_IDENTIFIER: 5294 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5295 5296 # https://materialize.com/docs/sql/types/map/ 5297 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5298 key_type = self._parse_types( 5299 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5300 ) 5301 if not self._match(TokenType.FARROW): 5302 self._retreat(index) 5303 return None 5304 5305 value_type = self._parse_types( 5306 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5307 ) 5308 if not self._match(TokenType.R_BRACKET): 5309 self._retreat(index) 5310 return None 5311 5312 return exp.DataType( 5313 this=exp.DataType.Type.MAP, 5314 expressions=[key_type, value_type], 5315 nested=True, 5316 prefix=prefix, 5317 ) 5318 5319 nested = type_token in self.NESTED_TYPE_TOKENS 5320 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5321 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5322 expressions = None 5323 maybe_func = False 5324 5325 if self._match(TokenType.L_PAREN): 5326 if is_struct: 5327 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5328 elif nested: 5329 expressions = self._parse_csv( 5330 lambda: self._parse_types( 5331 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5332 ) 5333 ) 5334 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5335 this = expressions[0] 5336 this.set("nullable", True) 5337 self._match_r_paren() 5338 return this 5339 elif type_token in self.ENUM_TYPE_TOKENS: 5340 expressions = self._parse_csv(self._parse_equality) 5341 elif is_aggregate: 5342 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5343 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5344 ) 5345 if not func_or_ident: 5346 return None 5347 expressions = [func_or_ident] 5348 if self._match(TokenType.COMMA): 5349 expressions.extend( 5350 self._parse_csv( 5351 lambda: self._parse_types( 5352 check_func=check_func, 5353 schema=schema, 5354 allow_identifiers=allow_identifiers, 5355 ) 5356 ) 5357 ) 5358 else: 5359 expressions = self._parse_csv(self._parse_type_size) 5360 5361 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5362 if type_token == TokenType.VECTOR and len(expressions) == 2: 5363 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5364 5365 if not expressions or not self._match(TokenType.R_PAREN): 5366 self._retreat(index) 5367 return None 5368 5369 maybe_func = True 5370 5371 values: t.Optional[t.List[exp.Expression]] = None 5372 5373 if nested and self._match(TokenType.LT): 5374 if is_struct: 5375 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5376 else: 5377 expressions = self._parse_csv( 5378 lambda: self._parse_types( 5379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5380 ) 5381 ) 5382 5383 if not self._match(TokenType.GT): 5384 self.raise_error("Expecting >") 5385 5386 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5387 values = self._parse_csv(self._parse_assignment) 5388 if not values and is_struct: 5389 values = None 5390 self._retreat(self._index - 1) 5391 else: 5392 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5393 5394 if type_token in self.TIMESTAMPS: 5395 if self._match_text_seq("WITH", "TIME", "ZONE"): 5396 maybe_func = False 5397 tz_type = ( 5398 exp.DataType.Type.TIMETZ 5399 if type_token in self.TIMES 5400 else exp.DataType.Type.TIMESTAMPTZ 5401 ) 5402 this = exp.DataType(this=tz_type, expressions=expressions) 5403 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5404 maybe_func = False 5405 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5406 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5407 maybe_func = False 5408 elif type_token == TokenType.INTERVAL: 5409 unit = self._parse_var(upper=True) 5410 if unit: 5411 if self._match_text_seq("TO"): 5412 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5413 5414 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5415 else: 5416 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5417 elif type_token == TokenType.VOID: 5418 this = exp.DataType(this=exp.DataType.Type.NULL) 5419 5420 if maybe_func and check_func: 5421 index2 = self._index 5422 peek = self._parse_string() 5423 5424 if not peek: 5425 self._retreat(index) 5426 return None 5427 5428 self._retreat(index2) 5429 5430 if not this: 5431 if self._match_text_seq("UNSIGNED"): 5432 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5433 if not unsigned_type_token: 5434 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5435 5436 type_token = unsigned_type_token or type_token 5437 5438 this = exp.DataType( 5439 this=exp.DataType.Type[type_token.value], 5440 expressions=expressions, 5441 nested=nested, 5442 prefix=prefix, 5443 ) 5444 5445 # Empty arrays/structs are allowed 5446 if values is not None: 5447 cls = exp.Struct if is_struct else exp.Array 5448 this = exp.cast(cls(expressions=values), this, copy=False) 5449 5450 elif expressions: 5451 this.set("expressions", expressions) 5452 5453 # https://materialize.com/docs/sql/types/list/#type-name 5454 while self._match(TokenType.LIST): 5455 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5456 5457 index = self._index 5458 5459 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5460 matched_array = self._match(TokenType.ARRAY) 5461 5462 while self._curr: 5463 datatype_token = self._prev.token_type 5464 matched_l_bracket = self._match(TokenType.L_BRACKET) 5465 5466 if (not matched_l_bracket and not matched_array) or ( 5467 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5468 ): 5469 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5470 # not to be confused with the fixed size array parsing 5471 break 5472 5473 matched_array = False 5474 values = self._parse_csv(self._parse_assignment) or None 5475 if ( 5476 values 5477 and not schema 5478 and ( 5479 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5480 ) 5481 ): 5482 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5483 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5484 self._retreat(index) 5485 break 5486 5487 this = exp.DataType( 5488 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5489 ) 5490 self._match(TokenType.R_BRACKET) 5491 5492 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5493 converter = self.TYPE_CONVERTERS.get(this.this) 5494 if converter: 5495 this = converter(t.cast(exp.DataType, this)) 5496 5497 return this 5498 5499 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5500 index = self._index 5501 5502 if ( 5503 self._curr 5504 and self._next 5505 and self._curr.token_type in self.TYPE_TOKENS 5506 and self._next.token_type in self.TYPE_TOKENS 5507 ): 5508 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5509 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5510 this = self._parse_id_var() 5511 else: 5512 this = ( 5513 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5514 or self._parse_id_var() 5515 ) 5516 5517 self._match(TokenType.COLON) 5518 5519 if ( 5520 type_required 5521 and not isinstance(this, exp.DataType) 5522 and not self._match_set(self.TYPE_TOKENS, advance=False) 5523 ): 5524 self._retreat(index) 5525 return self._parse_types() 5526 5527 return self._parse_column_def(this) 5528 5529 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5530 if not self._match_text_seq("AT", "TIME", "ZONE"): 5531 return this 5532 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5533 5534 def _parse_column(self) -> t.Optional[exp.Expression]: 5535 this = self._parse_column_reference() 5536 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5537 5538 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5539 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5540 5541 return column 5542 5543 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5544 this = self._parse_field() 5545 if ( 5546 not this 5547 and self._match(TokenType.VALUES, advance=False) 5548 and self.VALUES_FOLLOWED_BY_PAREN 5549 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5550 ): 5551 this = self._parse_id_var() 5552 5553 if isinstance(this, exp.Identifier): 5554 # We bubble up comments from the Identifier to the Column 5555 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5556 5557 return this 5558 5559 def _parse_colon_as_variant_extract( 5560 self, this: t.Optional[exp.Expression] 5561 ) -> t.Optional[exp.Expression]: 5562 casts = [] 5563 json_path = [] 5564 escape = None 5565 5566 while self._match(TokenType.COLON): 5567 start_index = self._index 5568 5569 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5570 path = self._parse_column_ops( 5571 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5572 ) 5573 5574 # The cast :: operator has a lower precedence than the extraction operator :, so 5575 # we rearrange the AST appropriately to avoid casting the JSON path 5576 while isinstance(path, exp.Cast): 5577 casts.append(path.to) 5578 path = path.this 5579 5580 if casts: 5581 dcolon_offset = next( 5582 i 5583 for i, t in enumerate(self._tokens[start_index:]) 5584 if t.token_type == TokenType.DCOLON 5585 ) 5586 end_token = self._tokens[start_index + dcolon_offset - 1] 5587 else: 5588 end_token = self._prev 5589 5590 if path: 5591 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5592 # it'll roundtrip to a string literal in GET_PATH 5593 if isinstance(path, exp.Identifier) and path.quoted: 5594 escape = True 5595 5596 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5597 5598 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5599 # Databricks transforms it back to the colon/dot notation 5600 if json_path: 5601 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5602 5603 if json_path_expr: 5604 json_path_expr.set("escape", escape) 5605 5606 this = self.expression( 5607 exp.JSONExtract, 5608 this=this, 5609 expression=json_path_expr, 5610 variant_extract=True, 5611 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5612 ) 5613 5614 while casts: 5615 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5616 5617 return this 5618 5619 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5620 return self._parse_types() 5621 5622 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5623 this = self._parse_bracket(this) 5624 5625 while self._match_set(self.COLUMN_OPERATORS): 5626 op_token = self._prev.token_type 5627 op = self.COLUMN_OPERATORS.get(op_token) 5628 5629 if op_token in self.CAST_COLUMN_OPERATORS: 5630 field = self._parse_dcolon() 5631 if not field: 5632 self.raise_error("Expected type") 5633 elif op and self._curr: 5634 field = self._parse_column_reference() or self._parse_bracket() 5635 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5636 field = self._parse_column_ops(field) 5637 else: 5638 field = self._parse_field(any_token=True, anonymous_func=True) 5639 5640 # Function calls can be qualified, e.g., x.y.FOO() 5641 # This converts the final AST to a series of Dots leading to the function call 5642 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5643 if isinstance(field, (exp.Func, exp.Window)) and this: 5644 this = this.transform( 5645 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5646 ) 5647 5648 if op: 5649 this = op(self, this, field) 5650 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5651 this = self.expression( 5652 exp.Column, 5653 comments=this.comments, 5654 this=field, 5655 table=this.this, 5656 db=this.args.get("table"), 5657 catalog=this.args.get("db"), 5658 ) 5659 elif isinstance(field, exp.Window): 5660 # Move the exp.Dot's to the window's function 5661 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5662 field.set("this", window_func) 5663 this = field 5664 else: 5665 this = self.expression(exp.Dot, this=this, expression=field) 5666 5667 if field and field.comments: 5668 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5669 5670 this = self._parse_bracket(this) 5671 5672 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5673 5674 def _parse_paren(self) -> t.Optional[exp.Expression]: 5675 if not self._match(TokenType.L_PAREN): 5676 return None 5677 5678 comments = self._prev_comments 5679 query = self._parse_select() 5680 5681 if query: 5682 expressions = [query] 5683 else: 5684 expressions = self._parse_expressions() 5685 5686 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5687 5688 if not this and self._match(TokenType.R_PAREN, advance=False): 5689 this = self.expression(exp.Tuple) 5690 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5691 this = self._parse_subquery(this=this, parse_alias=False) 5692 elif isinstance(this, exp.Subquery): 5693 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5694 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5695 this = self.expression(exp.Tuple, expressions=expressions) 5696 else: 5697 this = self.expression(exp.Paren, this=this) 5698 5699 if this: 5700 this.add_comments(comments) 5701 5702 self._match_r_paren(expression=this) 5703 return this 5704 5705 def _parse_primary(self) -> t.Optional[exp.Expression]: 5706 if self._match_set(self.PRIMARY_PARSERS): 5707 token_type = self._prev.token_type 5708 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5709 5710 if token_type == TokenType.STRING: 5711 expressions = [primary] 5712 while self._match(TokenType.STRING): 5713 expressions.append(exp.Literal.string(self._prev.text)) 5714 5715 if len(expressions) > 1: 5716 return self.expression(exp.Concat, expressions=expressions) 5717 5718 return primary 5719 5720 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5721 return exp.Literal.number(f"0.{self._prev.text}") 5722 5723 return self._parse_paren() 5724 5725 def _parse_field( 5726 self, 5727 any_token: bool = False, 5728 tokens: t.Optional[t.Collection[TokenType]] = None, 5729 anonymous_func: bool = False, 5730 ) -> t.Optional[exp.Expression]: 5731 if anonymous_func: 5732 field = ( 5733 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5734 or self._parse_primary() 5735 ) 5736 else: 5737 field = self._parse_primary() or self._parse_function( 5738 anonymous=anonymous_func, any_token=any_token 5739 ) 5740 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5741 5742 def _parse_function( 5743 self, 5744 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5745 anonymous: bool = False, 5746 optional_parens: bool = True, 5747 any_token: bool = False, 5748 ) -> t.Optional[exp.Expression]: 5749 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5750 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5751 fn_syntax = False 5752 if ( 5753 self._match(TokenType.L_BRACE, advance=False) 5754 and self._next 5755 and self._next.text.upper() == "FN" 5756 ): 5757 self._advance(2) 5758 fn_syntax = True 5759 5760 func = self._parse_function_call( 5761 functions=functions, 5762 anonymous=anonymous, 5763 optional_parens=optional_parens, 5764 any_token=any_token, 5765 ) 5766 5767 if fn_syntax: 5768 self._match(TokenType.R_BRACE) 5769 5770 return func 5771 5772 def _parse_function_call( 5773 self, 5774 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5775 anonymous: bool = False, 5776 optional_parens: bool = True, 5777 any_token: bool = False, 5778 ) -> t.Optional[exp.Expression]: 5779 if not self._curr: 5780 return None 5781 5782 comments = self._curr.comments 5783 prev = self._prev 5784 token = self._curr 5785 token_type = self._curr.token_type 5786 this = self._curr.text 5787 upper = this.upper() 5788 5789 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5790 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5791 self._advance() 5792 return self._parse_window(parser(self)) 5793 5794 if not self._next or self._next.token_type != TokenType.L_PAREN: 5795 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5796 self._advance() 5797 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5798 5799 return None 5800 5801 if any_token: 5802 if token_type in self.RESERVED_TOKENS: 5803 return None 5804 elif token_type not in self.FUNC_TOKENS: 5805 return None 5806 5807 self._advance(2) 5808 5809 parser = self.FUNCTION_PARSERS.get(upper) 5810 if parser and not anonymous: 5811 this = parser(self) 5812 else: 5813 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5814 5815 if subquery_predicate: 5816 expr = None 5817 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5818 expr = self._parse_select() 5819 self._match_r_paren() 5820 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5821 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5822 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5823 self._advance(-1) 5824 expr = self._parse_bitwise() 5825 5826 if expr: 5827 return self.expression(subquery_predicate, comments=comments, this=expr) 5828 5829 if functions is None: 5830 functions = self.FUNCTIONS 5831 5832 function = functions.get(upper) 5833 known_function = function and not anonymous 5834 5835 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5836 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5837 5838 post_func_comments = self._curr and self._curr.comments 5839 if known_function and post_func_comments: 5840 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5841 # call we'll construct it as exp.Anonymous, even if it's "known" 5842 if any( 5843 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5844 for comment in post_func_comments 5845 ): 5846 known_function = False 5847 5848 if alias and known_function: 5849 args = self._kv_to_prop_eq(args) 5850 5851 if known_function: 5852 func_builder = t.cast(t.Callable, function) 5853 5854 if "dialect" in func_builder.__code__.co_varnames: 5855 func = func_builder(args, dialect=self.dialect) 5856 else: 5857 func = func_builder(args) 5858 5859 func = self.validate_expression(func, args) 5860 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5861 func.meta["name"] = this 5862 5863 this = func 5864 else: 5865 if token_type == TokenType.IDENTIFIER: 5866 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5867 5868 this = self.expression(exp.Anonymous, this=this, expressions=args) 5869 this = this.update_positions(token) 5870 5871 if isinstance(this, exp.Expression): 5872 this.add_comments(comments) 5873 5874 self._match_r_paren(this) 5875 return self._parse_window(this) 5876 5877 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5878 return expression 5879 5880 def _kv_to_prop_eq( 5881 self, expressions: t.List[exp.Expression], parse_map: bool = False 5882 ) -> t.List[exp.Expression]: 5883 transformed = [] 5884 5885 for index, e in enumerate(expressions): 5886 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5887 if isinstance(e, exp.Alias): 5888 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5889 5890 if not isinstance(e, exp.PropertyEQ): 5891 e = self.expression( 5892 exp.PropertyEQ, 5893 this=e.this if parse_map else exp.to_identifier(e.this.name), 5894 expression=e.expression, 5895 ) 5896 5897 if isinstance(e.this, exp.Column): 5898 e.this.replace(e.this.this) 5899 else: 5900 e = self._to_prop_eq(e, index) 5901 5902 transformed.append(e) 5903 5904 return transformed 5905 5906 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5907 return self._parse_statement() 5908 5909 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5910 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5911 5912 def _parse_user_defined_function( 5913 self, kind: t.Optional[TokenType] = None 5914 ) -> t.Optional[exp.Expression]: 5915 this = self._parse_table_parts(schema=True) 5916 5917 if not self._match(TokenType.L_PAREN): 5918 return this 5919 5920 expressions = self._parse_csv(self._parse_function_parameter) 5921 self._match_r_paren() 5922 return self.expression( 5923 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5924 ) 5925 5926 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5927 literal = self._parse_primary() 5928 if literal: 5929 return self.expression(exp.Introducer, this=token.text, expression=literal) 5930 5931 return self._identifier_expression(token) 5932 5933 def _parse_session_parameter(self) -> exp.SessionParameter: 5934 kind = None 5935 this = self._parse_id_var() or self._parse_primary() 5936 5937 if this and self._match(TokenType.DOT): 5938 kind = this.name 5939 this = self._parse_var() or self._parse_primary() 5940 5941 return self.expression(exp.SessionParameter, this=this, kind=kind) 5942 5943 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5944 return self._parse_id_var() 5945 5946 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5947 index = self._index 5948 5949 if self._match(TokenType.L_PAREN): 5950 expressions = t.cast( 5951 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5952 ) 5953 5954 if not self._match(TokenType.R_PAREN): 5955 self._retreat(index) 5956 else: 5957 expressions = [self._parse_lambda_arg()] 5958 5959 if self._match_set(self.LAMBDAS): 5960 return self.LAMBDAS[self._prev.token_type](self, expressions) 5961 5962 self._retreat(index) 5963 5964 this: t.Optional[exp.Expression] 5965 5966 if self._match(TokenType.DISTINCT): 5967 this = self.expression( 5968 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5969 ) 5970 else: 5971 this = self._parse_select_or_expression(alias=alias) 5972 5973 return self._parse_limit( 5974 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5975 ) 5976 5977 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5978 index = self._index 5979 if not self._match(TokenType.L_PAREN): 5980 return this 5981 5982 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5983 # expr can be of both types 5984 if self._match_set(self.SELECT_START_TOKENS): 5985 self._retreat(index) 5986 return this 5987 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5988 self._match_r_paren() 5989 return self.expression(exp.Schema, this=this, expressions=args) 5990 5991 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5992 return self._parse_column_def(self._parse_field(any_token=True)) 5993 5994 def _parse_column_def( 5995 self, this: t.Optional[exp.Expression], computed_column: bool = True 5996 ) -> t.Optional[exp.Expression]: 5997 # column defs are not really columns, they're identifiers 5998 if isinstance(this, exp.Column): 5999 this = this.this 6000 6001 if not computed_column: 6002 self._match(TokenType.ALIAS) 6003 6004 kind = self._parse_types(schema=True) 6005 6006 if self._match_text_seq("FOR", "ORDINALITY"): 6007 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6008 6009 constraints: t.List[exp.Expression] = [] 6010 6011 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6012 ("ALIAS", "MATERIALIZED") 6013 ): 6014 persisted = self._prev.text.upper() == "MATERIALIZED" 6015 constraint_kind = exp.ComputedColumnConstraint( 6016 this=self._parse_assignment(), 6017 persisted=persisted or self._match_text_seq("PERSISTED"), 6018 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6019 ) 6020 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6021 elif ( 6022 kind 6023 and self._match(TokenType.ALIAS, advance=False) 6024 and ( 6025 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6026 or (self._next and self._next.token_type == TokenType.L_PAREN) 6027 ) 6028 ): 6029 self._advance() 6030 constraints.append( 6031 self.expression( 6032 exp.ColumnConstraint, 6033 kind=exp.ComputedColumnConstraint( 6034 this=self._parse_disjunction(), 6035 persisted=self._match_texts(("STORED", "VIRTUAL")) 6036 and self._prev.text.upper() == "STORED", 6037 ), 6038 ) 6039 ) 6040 6041 while True: 6042 constraint = self._parse_column_constraint() 6043 if not constraint: 6044 break 6045 constraints.append(constraint) 6046 6047 if not kind and not constraints: 6048 return this 6049 6050 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6051 6052 def _parse_auto_increment( 6053 self, 6054 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6055 start = None 6056 increment = None 6057 order = None 6058 6059 if self._match(TokenType.L_PAREN, advance=False): 6060 args = self._parse_wrapped_csv(self._parse_bitwise) 6061 start = seq_get(args, 0) 6062 increment = seq_get(args, 1) 6063 elif self._match_text_seq("START"): 6064 start = self._parse_bitwise() 6065 self._match_text_seq("INCREMENT") 6066 increment = self._parse_bitwise() 6067 if self._match_text_seq("ORDER"): 6068 order = True 6069 elif self._match_text_seq("NOORDER"): 6070 order = False 6071 6072 if start and increment: 6073 return exp.GeneratedAsIdentityColumnConstraint( 6074 start=start, increment=increment, this=False, order=order 6075 ) 6076 6077 return exp.AutoIncrementColumnConstraint() 6078 6079 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6080 if not self._match_text_seq("REFRESH"): 6081 self._retreat(self._index - 1) 6082 return None 6083 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6084 6085 def _parse_compress(self) -> exp.CompressColumnConstraint: 6086 if self._match(TokenType.L_PAREN, advance=False): 6087 return self.expression( 6088 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6089 ) 6090 6091 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6092 6093 def _parse_generated_as_identity( 6094 self, 6095 ) -> ( 6096 exp.GeneratedAsIdentityColumnConstraint 6097 | exp.ComputedColumnConstraint 6098 | exp.GeneratedAsRowColumnConstraint 6099 ): 6100 if self._match_text_seq("BY", "DEFAULT"): 6101 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6102 this = self.expression( 6103 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6104 ) 6105 else: 6106 self._match_text_seq("ALWAYS") 6107 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6108 6109 self._match(TokenType.ALIAS) 6110 6111 if self._match_text_seq("ROW"): 6112 start = self._match_text_seq("START") 6113 if not start: 6114 self._match(TokenType.END) 6115 hidden = self._match_text_seq("HIDDEN") 6116 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6117 6118 identity = self._match_text_seq("IDENTITY") 6119 6120 if self._match(TokenType.L_PAREN): 6121 if self._match(TokenType.START_WITH): 6122 this.set("start", self._parse_bitwise()) 6123 if self._match_text_seq("INCREMENT", "BY"): 6124 this.set("increment", self._parse_bitwise()) 6125 if self._match_text_seq("MINVALUE"): 6126 this.set("minvalue", self._parse_bitwise()) 6127 if self._match_text_seq("MAXVALUE"): 6128 this.set("maxvalue", self._parse_bitwise()) 6129 6130 if self._match_text_seq("CYCLE"): 6131 this.set("cycle", True) 6132 elif self._match_text_seq("NO", "CYCLE"): 6133 this.set("cycle", False) 6134 6135 if not identity: 6136 this.set("expression", self._parse_range()) 6137 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6138 args = self._parse_csv(self._parse_bitwise) 6139 this.set("start", seq_get(args, 0)) 6140 this.set("increment", seq_get(args, 1)) 6141 6142 self._match_r_paren() 6143 6144 return this 6145 6146 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6147 self._match_text_seq("LENGTH") 6148 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6149 6150 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6151 if self._match_text_seq("NULL"): 6152 return self.expression(exp.NotNullColumnConstraint) 6153 if self._match_text_seq("CASESPECIFIC"): 6154 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6155 if self._match_text_seq("FOR", "REPLICATION"): 6156 return self.expression(exp.NotForReplicationColumnConstraint) 6157 6158 # Unconsume the `NOT` token 6159 self._retreat(self._index - 1) 6160 return None 6161 6162 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6163 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6164 6165 procedure_option_follows = ( 6166 self._match(TokenType.WITH, advance=False) 6167 and self._next 6168 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6169 ) 6170 6171 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6172 return self.expression( 6173 exp.ColumnConstraint, 6174 this=this, 6175 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6176 ) 6177 6178 return this 6179 6180 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6181 if not self._match(TokenType.CONSTRAINT): 6182 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6183 6184 return self.expression( 6185 exp.Constraint, 6186 this=self._parse_id_var(), 6187 expressions=self._parse_unnamed_constraints(), 6188 ) 6189 6190 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6191 constraints = [] 6192 while True: 6193 constraint = self._parse_unnamed_constraint() or self._parse_function() 6194 if not constraint: 6195 break 6196 constraints.append(constraint) 6197 6198 return constraints 6199 6200 def _parse_unnamed_constraint( 6201 self, constraints: t.Optional[t.Collection[str]] = None 6202 ) -> t.Optional[exp.Expression]: 6203 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6204 constraints or self.CONSTRAINT_PARSERS 6205 ): 6206 return None 6207 6208 constraint = self._prev.text.upper() 6209 if constraint not in self.CONSTRAINT_PARSERS: 6210 self.raise_error(f"No parser found for schema constraint {constraint}.") 6211 6212 return self.CONSTRAINT_PARSERS[constraint](self) 6213 6214 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6215 return self._parse_id_var(any_token=False) 6216 6217 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6218 self._match_texts(("KEY", "INDEX")) 6219 return self.expression( 6220 exp.UniqueColumnConstraint, 6221 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6222 this=self._parse_schema(self._parse_unique_key()), 6223 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6224 on_conflict=self._parse_on_conflict(), 6225 options=self._parse_key_constraint_options(), 6226 ) 6227 6228 def _parse_key_constraint_options(self) -> t.List[str]: 6229 options = [] 6230 while True: 6231 if not self._curr: 6232 break 6233 6234 if self._match(TokenType.ON): 6235 action = None 6236 on = self._advance_any() and self._prev.text 6237 6238 if self._match_text_seq("NO", "ACTION"): 6239 action = "NO ACTION" 6240 elif self._match_text_seq("CASCADE"): 6241 action = "CASCADE" 6242 elif self._match_text_seq("RESTRICT"): 6243 action = "RESTRICT" 6244 elif self._match_pair(TokenType.SET, TokenType.NULL): 6245 action = "SET NULL" 6246 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6247 action = "SET DEFAULT" 6248 else: 6249 self.raise_error("Invalid key constraint") 6250 6251 options.append(f"ON {on} {action}") 6252 else: 6253 var = self._parse_var_from_options( 6254 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6255 ) 6256 if not var: 6257 break 6258 options.append(var.name) 6259 6260 return options 6261 6262 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6263 if match and not self._match(TokenType.REFERENCES): 6264 return None 6265 6266 expressions = None 6267 this = self._parse_table(schema=True) 6268 options = self._parse_key_constraint_options() 6269 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6270 6271 def _parse_foreign_key(self) -> exp.ForeignKey: 6272 expressions = ( 6273 self._parse_wrapped_id_vars() 6274 if not self._match(TokenType.REFERENCES, advance=False) 6275 else None 6276 ) 6277 reference = self._parse_references() 6278 on_options = {} 6279 6280 while self._match(TokenType.ON): 6281 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6282 self.raise_error("Expected DELETE or UPDATE") 6283 6284 kind = self._prev.text.lower() 6285 6286 if self._match_text_seq("NO", "ACTION"): 6287 action = "NO ACTION" 6288 elif self._match(TokenType.SET): 6289 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6290 action = "SET " + self._prev.text.upper() 6291 else: 6292 self._advance() 6293 action = self._prev.text.upper() 6294 6295 on_options[kind] = action 6296 6297 return self.expression( 6298 exp.ForeignKey, 6299 expressions=expressions, 6300 reference=reference, 6301 options=self._parse_key_constraint_options(), 6302 **on_options, # type: ignore 6303 ) 6304 6305 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6306 return self._parse_ordered() or self._parse_field() 6307 6308 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6309 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6310 self._retreat(self._index - 1) 6311 return None 6312 6313 id_vars = self._parse_wrapped_id_vars() 6314 return self.expression( 6315 exp.PeriodForSystemTimeConstraint, 6316 this=seq_get(id_vars, 0), 6317 expression=seq_get(id_vars, 1), 6318 ) 6319 6320 def _parse_primary_key( 6321 self, wrapped_optional: bool = False, in_props: bool = False 6322 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6323 desc = ( 6324 self._match_set((TokenType.ASC, TokenType.DESC)) 6325 and self._prev.token_type == TokenType.DESC 6326 ) 6327 6328 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6329 return self.expression( 6330 exp.PrimaryKeyColumnConstraint, 6331 desc=desc, 6332 options=self._parse_key_constraint_options(), 6333 ) 6334 6335 expressions = self._parse_wrapped_csv( 6336 self._parse_primary_key_part, optional=wrapped_optional 6337 ) 6338 6339 return self.expression( 6340 exp.PrimaryKey, 6341 expressions=expressions, 6342 include=self._parse_index_params(), 6343 options=self._parse_key_constraint_options(), 6344 ) 6345 6346 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6347 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6348 6349 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6350 """ 6351 Parses a datetime column in ODBC format. We parse the column into the corresponding 6352 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6353 same as we did for `DATE('yyyy-mm-dd')`. 6354 6355 Reference: 6356 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6357 """ 6358 self._match(TokenType.VAR) 6359 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6360 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6361 if not self._match(TokenType.R_BRACE): 6362 self.raise_error("Expected }") 6363 return expression 6364 6365 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6366 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6367 return this 6368 6369 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6370 map_token = seq_get(self._tokens, self._index - 2) 6371 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6372 else: 6373 parse_map = False 6374 6375 bracket_kind = self._prev.token_type 6376 if ( 6377 bracket_kind == TokenType.L_BRACE 6378 and self._curr 6379 and self._curr.token_type == TokenType.VAR 6380 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6381 ): 6382 return self._parse_odbc_datetime_literal() 6383 6384 expressions = self._parse_csv( 6385 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6386 ) 6387 6388 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6389 self.raise_error("Expected ]") 6390 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6391 self.raise_error("Expected }") 6392 6393 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6394 if bracket_kind == TokenType.L_BRACE: 6395 this = self.expression( 6396 exp.Struct, 6397 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6398 ) 6399 elif not this: 6400 this = build_array_constructor( 6401 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6402 ) 6403 else: 6404 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6405 if constructor_type: 6406 return build_array_constructor( 6407 constructor_type, 6408 args=expressions, 6409 bracket_kind=bracket_kind, 6410 dialect=self.dialect, 6411 ) 6412 6413 expressions = apply_index_offset( 6414 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6415 ) 6416 this = self.expression( 6417 exp.Bracket, 6418 this=this, 6419 expressions=expressions, 6420 comments=this.pop_comments(), 6421 ) 6422 6423 self._add_comments(this) 6424 return self._parse_bracket(this) 6425 6426 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6427 if self._match(TokenType.COLON): 6428 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6429 return this 6430 6431 def _parse_case(self) -> t.Optional[exp.Expression]: 6432 ifs = [] 6433 default = None 6434 6435 comments = self._prev_comments 6436 expression = self._parse_assignment() 6437 6438 while self._match(TokenType.WHEN): 6439 this = self._parse_assignment() 6440 self._match(TokenType.THEN) 6441 then = self._parse_assignment() 6442 ifs.append(self.expression(exp.If, this=this, true=then)) 6443 6444 if self._match(TokenType.ELSE): 6445 default = self._parse_assignment() 6446 6447 if not self._match(TokenType.END): 6448 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6449 default = exp.column("interval") 6450 else: 6451 self.raise_error("Expected END after CASE", self._prev) 6452 6453 return self.expression( 6454 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6455 ) 6456 6457 def _parse_if(self) -> t.Optional[exp.Expression]: 6458 if self._match(TokenType.L_PAREN): 6459 args = self._parse_csv( 6460 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6461 ) 6462 this = self.validate_expression(exp.If.from_arg_list(args), args) 6463 self._match_r_paren() 6464 else: 6465 index = self._index - 1 6466 6467 if self.NO_PAREN_IF_COMMANDS and index == 0: 6468 return self._parse_as_command(self._prev) 6469 6470 condition = self._parse_assignment() 6471 6472 if not condition: 6473 self._retreat(index) 6474 return None 6475 6476 self._match(TokenType.THEN) 6477 true = self._parse_assignment() 6478 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6479 self._match(TokenType.END) 6480 this = self.expression(exp.If, this=condition, true=true, false=false) 6481 6482 return this 6483 6484 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6485 if not self._match_text_seq("VALUE", "FOR"): 6486 self._retreat(self._index - 1) 6487 return None 6488 6489 return self.expression( 6490 exp.NextValueFor, 6491 this=self._parse_column(), 6492 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6493 ) 6494 6495 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6496 this = self._parse_function() or self._parse_var_or_string(upper=True) 6497 6498 if self._match(TokenType.FROM): 6499 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6500 6501 if not self._match(TokenType.COMMA): 6502 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6503 6504 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6505 6506 def _parse_gap_fill(self) -> exp.GapFill: 6507 self._match(TokenType.TABLE) 6508 this = self._parse_table() 6509 6510 self._match(TokenType.COMMA) 6511 args = [this, *self._parse_csv(self._parse_lambda)] 6512 6513 gap_fill = exp.GapFill.from_arg_list(args) 6514 return self.validate_expression(gap_fill, args) 6515 6516 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6517 this = self._parse_assignment() 6518 6519 if not self._match(TokenType.ALIAS): 6520 if self._match(TokenType.COMMA): 6521 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6522 6523 self.raise_error("Expected AS after CAST") 6524 6525 fmt = None 6526 to = self._parse_types() 6527 6528 default = self._match(TokenType.DEFAULT) 6529 if default: 6530 default = self._parse_bitwise() 6531 self._match_text_seq("ON", "CONVERSION", "ERROR") 6532 6533 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6534 fmt_string = self._parse_string() 6535 fmt = self._parse_at_time_zone(fmt_string) 6536 6537 if not to: 6538 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6539 if to.this in exp.DataType.TEMPORAL_TYPES: 6540 this = self.expression( 6541 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6542 this=this, 6543 format=exp.Literal.string( 6544 format_time( 6545 fmt_string.this if fmt_string else "", 6546 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6547 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6548 ) 6549 ), 6550 safe=safe, 6551 ) 6552 6553 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6554 this.set("zone", fmt.args["zone"]) 6555 return this 6556 elif not to: 6557 self.raise_error("Expected TYPE after CAST") 6558 elif isinstance(to, exp.Identifier): 6559 to = exp.DataType.build(to.name, udt=True) 6560 elif to.this == exp.DataType.Type.CHAR: 6561 if self._match(TokenType.CHARACTER_SET): 6562 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6563 6564 return self.build_cast( 6565 strict=strict, 6566 this=this, 6567 to=to, 6568 format=fmt, 6569 safe=safe, 6570 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6571 default=default, 6572 ) 6573 6574 def _parse_string_agg(self) -> exp.GroupConcat: 6575 if self._match(TokenType.DISTINCT): 6576 args: t.List[t.Optional[exp.Expression]] = [ 6577 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6578 ] 6579 if self._match(TokenType.COMMA): 6580 args.extend(self._parse_csv(self._parse_assignment)) 6581 else: 6582 args = self._parse_csv(self._parse_assignment) # type: ignore 6583 6584 if self._match_text_seq("ON", "OVERFLOW"): 6585 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6586 if self._match_text_seq("ERROR"): 6587 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6588 else: 6589 self._match_text_seq("TRUNCATE") 6590 on_overflow = self.expression( 6591 exp.OverflowTruncateBehavior, 6592 this=self._parse_string(), 6593 with_count=( 6594 self._match_text_seq("WITH", "COUNT") 6595 or not self._match_text_seq("WITHOUT", "COUNT") 6596 ), 6597 ) 6598 else: 6599 on_overflow = None 6600 6601 index = self._index 6602 if not self._match(TokenType.R_PAREN) and args: 6603 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6604 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6605 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6606 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6607 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6608 6609 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6610 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6611 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6612 if not self._match_text_seq("WITHIN", "GROUP"): 6613 self._retreat(index) 6614 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6615 6616 # The corresponding match_r_paren will be called in parse_function (caller) 6617 self._match_l_paren() 6618 6619 return self.expression( 6620 exp.GroupConcat, 6621 this=self._parse_order(this=seq_get(args, 0)), 6622 separator=seq_get(args, 1), 6623 on_overflow=on_overflow, 6624 ) 6625 6626 def _parse_convert( 6627 self, strict: bool, safe: t.Optional[bool] = None 6628 ) -> t.Optional[exp.Expression]: 6629 this = self._parse_bitwise() 6630 6631 if self._match(TokenType.USING): 6632 to: t.Optional[exp.Expression] = self.expression( 6633 exp.CharacterSet, this=self._parse_var() 6634 ) 6635 elif self._match(TokenType.COMMA): 6636 to = self._parse_types() 6637 else: 6638 to = None 6639 6640 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6641 6642 def _parse_xml_table(self) -> exp.XMLTable: 6643 namespaces = None 6644 passing = None 6645 columns = None 6646 6647 if self._match_text_seq("XMLNAMESPACES", "("): 6648 namespaces = self._parse_xml_namespace() 6649 self._match_text_seq(")", ",") 6650 6651 this = self._parse_string() 6652 6653 if self._match_text_seq("PASSING"): 6654 # The BY VALUE keywords are optional and are provided for semantic clarity 6655 self._match_text_seq("BY", "VALUE") 6656 passing = self._parse_csv(self._parse_column) 6657 6658 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6659 6660 if self._match_text_seq("COLUMNS"): 6661 columns = self._parse_csv(self._parse_field_def) 6662 6663 return self.expression( 6664 exp.XMLTable, 6665 this=this, 6666 namespaces=namespaces, 6667 passing=passing, 6668 columns=columns, 6669 by_ref=by_ref, 6670 ) 6671 6672 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6673 namespaces = [] 6674 6675 while True: 6676 if self._match(TokenType.DEFAULT): 6677 uri = self._parse_string() 6678 else: 6679 uri = self._parse_alias(self._parse_string()) 6680 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6681 if not self._match(TokenType.COMMA): 6682 break 6683 6684 return namespaces 6685 6686 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6687 args = self._parse_csv(self._parse_assignment) 6688 6689 if len(args) < 3: 6690 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6691 6692 return self.expression(exp.DecodeCase, expressions=args) 6693 6694 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6695 self._match_text_seq("KEY") 6696 key = self._parse_column() 6697 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6698 self._match_text_seq("VALUE") 6699 value = self._parse_bitwise() 6700 6701 if not key and not value: 6702 return None 6703 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6704 6705 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6706 if not this or not self._match_text_seq("FORMAT", "JSON"): 6707 return this 6708 6709 return self.expression(exp.FormatJson, this=this) 6710 6711 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6712 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6713 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6714 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6715 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6716 else: 6717 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6718 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6719 6720 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6721 6722 if not empty and not error and not null: 6723 return None 6724 6725 return self.expression( 6726 exp.OnCondition, 6727 empty=empty, 6728 error=error, 6729 null=null, 6730 ) 6731 6732 def _parse_on_handling( 6733 self, on: str, *values: str 6734 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6735 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6736 for value in values: 6737 if self._match_text_seq(value, "ON", on): 6738 return f"{value} ON {on}" 6739 6740 index = self._index 6741 if self._match(TokenType.DEFAULT): 6742 default_value = self._parse_bitwise() 6743 if self._match_text_seq("ON", on): 6744 return default_value 6745 6746 self._retreat(index) 6747 6748 return None 6749 6750 @t.overload 6751 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6752 6753 @t.overload 6754 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6755 6756 def _parse_json_object(self, agg=False): 6757 star = self._parse_star() 6758 expressions = ( 6759 [star] 6760 if star 6761 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6762 ) 6763 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6764 6765 unique_keys = None 6766 if self._match_text_seq("WITH", "UNIQUE"): 6767 unique_keys = True 6768 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6769 unique_keys = False 6770 6771 self._match_text_seq("KEYS") 6772 6773 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6774 self._parse_type() 6775 ) 6776 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6777 6778 return self.expression( 6779 exp.JSONObjectAgg if agg else exp.JSONObject, 6780 expressions=expressions, 6781 null_handling=null_handling, 6782 unique_keys=unique_keys, 6783 return_type=return_type, 6784 encoding=encoding, 6785 ) 6786 6787 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6788 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6789 if not self._match_text_seq("NESTED"): 6790 this = self._parse_id_var() 6791 kind = self._parse_types(allow_identifiers=False) 6792 nested = None 6793 else: 6794 this = None 6795 kind = None 6796 nested = True 6797 6798 path = self._match_text_seq("PATH") and self._parse_string() 6799 nested_schema = nested and self._parse_json_schema() 6800 6801 return self.expression( 6802 exp.JSONColumnDef, 6803 this=this, 6804 kind=kind, 6805 path=path, 6806 nested_schema=nested_schema, 6807 ) 6808 6809 def _parse_json_schema(self) -> exp.JSONSchema: 6810 self._match_text_seq("COLUMNS") 6811 return self.expression( 6812 exp.JSONSchema, 6813 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6814 ) 6815 6816 def _parse_json_table(self) -> exp.JSONTable: 6817 this = self._parse_format_json(self._parse_bitwise()) 6818 path = self._match(TokenType.COMMA) and self._parse_string() 6819 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6820 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6821 schema = self._parse_json_schema() 6822 6823 return exp.JSONTable( 6824 this=this, 6825 schema=schema, 6826 path=path, 6827 error_handling=error_handling, 6828 empty_handling=empty_handling, 6829 ) 6830 6831 def _parse_match_against(self) -> exp.MatchAgainst: 6832 expressions = self._parse_csv(self._parse_column) 6833 6834 self._match_text_seq(")", "AGAINST", "(") 6835 6836 this = self._parse_string() 6837 6838 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6839 modifier = "IN NATURAL LANGUAGE MODE" 6840 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6841 modifier = f"{modifier} WITH QUERY EXPANSION" 6842 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6843 modifier = "IN BOOLEAN MODE" 6844 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6845 modifier = "WITH QUERY EXPANSION" 6846 else: 6847 modifier = None 6848 6849 return self.expression( 6850 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6851 ) 6852 6853 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6854 def _parse_open_json(self) -> exp.OpenJSON: 6855 this = self._parse_bitwise() 6856 path = self._match(TokenType.COMMA) and self._parse_string() 6857 6858 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6859 this = self._parse_field(any_token=True) 6860 kind = self._parse_types() 6861 path = self._parse_string() 6862 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6863 6864 return self.expression( 6865 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6866 ) 6867 6868 expressions = None 6869 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6870 self._match_l_paren() 6871 expressions = self._parse_csv(_parse_open_json_column_def) 6872 6873 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6874 6875 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6876 args = self._parse_csv(self._parse_bitwise) 6877 6878 if self._match(TokenType.IN): 6879 return self.expression( 6880 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6881 ) 6882 6883 if haystack_first: 6884 haystack = seq_get(args, 0) 6885 needle = seq_get(args, 1) 6886 else: 6887 haystack = seq_get(args, 1) 6888 needle = seq_get(args, 0) 6889 6890 return self.expression( 6891 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6892 ) 6893 6894 def _parse_predict(self) -> exp.Predict: 6895 self._match_text_seq("MODEL") 6896 this = self._parse_table() 6897 6898 self._match(TokenType.COMMA) 6899 self._match_text_seq("TABLE") 6900 6901 return self.expression( 6902 exp.Predict, 6903 this=this, 6904 expression=self._parse_table(), 6905 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6906 ) 6907 6908 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6909 args = self._parse_csv(self._parse_table) 6910 return exp.JoinHint(this=func_name.upper(), expressions=args) 6911 6912 def _parse_substring(self) -> exp.Substring: 6913 # Postgres supports the form: substring(string [from int] [for int]) 6914 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6915 6916 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6917 6918 if self._match(TokenType.FROM): 6919 args.append(self._parse_bitwise()) 6920 if self._match(TokenType.FOR): 6921 if len(args) == 1: 6922 args.append(exp.Literal.number(1)) 6923 args.append(self._parse_bitwise()) 6924 6925 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6926 6927 def _parse_trim(self) -> exp.Trim: 6928 # https://www.w3resource.com/sql/character-functions/trim.php 6929 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6930 6931 position = None 6932 collation = None 6933 expression = None 6934 6935 if self._match_texts(self.TRIM_TYPES): 6936 position = self._prev.text.upper() 6937 6938 this = self._parse_bitwise() 6939 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6940 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6941 expression = self._parse_bitwise() 6942 6943 if invert_order: 6944 this, expression = expression, this 6945 6946 if self._match(TokenType.COLLATE): 6947 collation = self._parse_bitwise() 6948 6949 return self.expression( 6950 exp.Trim, this=this, position=position, expression=expression, collation=collation 6951 ) 6952 6953 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6954 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6955 6956 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6957 return self._parse_window(self._parse_id_var(), alias=True) 6958 6959 def _parse_respect_or_ignore_nulls( 6960 self, this: t.Optional[exp.Expression] 6961 ) -> t.Optional[exp.Expression]: 6962 if self._match_text_seq("IGNORE", "NULLS"): 6963 return self.expression(exp.IgnoreNulls, this=this) 6964 if self._match_text_seq("RESPECT", "NULLS"): 6965 return self.expression(exp.RespectNulls, this=this) 6966 return this 6967 6968 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6969 if self._match(TokenType.HAVING): 6970 self._match_texts(("MAX", "MIN")) 6971 max = self._prev.text.upper() != "MIN" 6972 return self.expression( 6973 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6974 ) 6975 6976 return this 6977 6978 def _parse_window( 6979 self, this: t.Optional[exp.Expression], alias: bool = False 6980 ) -> t.Optional[exp.Expression]: 6981 func = this 6982 comments = func.comments if isinstance(func, exp.Expression) else None 6983 6984 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6985 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6986 if self._match_text_seq("WITHIN", "GROUP"): 6987 order = self._parse_wrapped(self._parse_order) 6988 this = self.expression(exp.WithinGroup, this=this, expression=order) 6989 6990 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6991 self._match(TokenType.WHERE) 6992 this = self.expression( 6993 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6994 ) 6995 self._match_r_paren() 6996 6997 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6998 # Some dialects choose to implement and some do not. 6999 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7000 7001 # There is some code above in _parse_lambda that handles 7002 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7003 7004 # The below changes handle 7005 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7006 7007 # Oracle allows both formats 7008 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7009 # and Snowflake chose to do the same for familiarity 7010 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7011 if isinstance(this, exp.AggFunc): 7012 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7013 7014 if ignore_respect and ignore_respect is not this: 7015 ignore_respect.replace(ignore_respect.this) 7016 this = self.expression(ignore_respect.__class__, this=this) 7017 7018 this = self._parse_respect_or_ignore_nulls(this) 7019 7020 # bigquery select from window x AS (partition by ...) 7021 if alias: 7022 over = None 7023 self._match(TokenType.ALIAS) 7024 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7025 return this 7026 else: 7027 over = self._prev.text.upper() 7028 7029 if comments and isinstance(func, exp.Expression): 7030 func.pop_comments() 7031 7032 if not self._match(TokenType.L_PAREN): 7033 return self.expression( 7034 exp.Window, 7035 comments=comments, 7036 this=this, 7037 alias=self._parse_id_var(False), 7038 over=over, 7039 ) 7040 7041 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7042 7043 first = self._match(TokenType.FIRST) 7044 if self._match_text_seq("LAST"): 7045 first = False 7046 7047 partition, order = self._parse_partition_and_order() 7048 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7049 7050 if kind: 7051 self._match(TokenType.BETWEEN) 7052 start = self._parse_window_spec() 7053 self._match(TokenType.AND) 7054 end = self._parse_window_spec() 7055 exclude = ( 7056 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7057 if self._match_text_seq("EXCLUDE") 7058 else None 7059 ) 7060 7061 spec = self.expression( 7062 exp.WindowSpec, 7063 kind=kind, 7064 start=start["value"], 7065 start_side=start["side"], 7066 end=end["value"], 7067 end_side=end["side"], 7068 exclude=exclude, 7069 ) 7070 else: 7071 spec = None 7072 7073 self._match_r_paren() 7074 7075 window = self.expression( 7076 exp.Window, 7077 comments=comments, 7078 this=this, 7079 partition_by=partition, 7080 order=order, 7081 spec=spec, 7082 alias=window_alias, 7083 over=over, 7084 first=first, 7085 ) 7086 7087 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7088 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7089 return self._parse_window(window, alias=alias) 7090 7091 return window 7092 7093 def _parse_partition_and_order( 7094 self, 7095 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7096 return self._parse_partition_by(), self._parse_order() 7097 7098 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7099 self._match(TokenType.BETWEEN) 7100 7101 return { 7102 "value": ( 7103 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7104 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7105 or self._parse_bitwise() 7106 ), 7107 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7108 } 7109 7110 def _parse_alias( 7111 self, this: t.Optional[exp.Expression], explicit: bool = False 7112 ) -> t.Optional[exp.Expression]: 7113 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7114 # so this section tries to parse the clause version and if it fails, it treats the token 7115 # as an identifier (alias) 7116 if self._can_parse_limit_or_offset(): 7117 return this 7118 7119 any_token = self._match(TokenType.ALIAS) 7120 comments = self._prev_comments or [] 7121 7122 if explicit and not any_token: 7123 return this 7124 7125 if self._match(TokenType.L_PAREN): 7126 aliases = self.expression( 7127 exp.Aliases, 7128 comments=comments, 7129 this=this, 7130 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7131 ) 7132 self._match_r_paren(aliases) 7133 return aliases 7134 7135 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7136 self.STRING_ALIASES and self._parse_string_as_identifier() 7137 ) 7138 7139 if alias: 7140 comments.extend(alias.pop_comments()) 7141 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7142 column = this.this 7143 7144 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7145 if not this.comments and column and column.comments: 7146 this.comments = column.pop_comments() 7147 7148 return this 7149 7150 def _parse_id_var( 7151 self, 7152 any_token: bool = True, 7153 tokens: t.Optional[t.Collection[TokenType]] = None, 7154 ) -> t.Optional[exp.Expression]: 7155 expression = self._parse_identifier() 7156 if not expression and ( 7157 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7158 ): 7159 quoted = self._prev.token_type == TokenType.STRING 7160 expression = self._identifier_expression(quoted=quoted) 7161 7162 return expression 7163 7164 def _parse_string(self) -> t.Optional[exp.Expression]: 7165 if self._match_set(self.STRING_PARSERS): 7166 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7167 return self._parse_placeholder() 7168 7169 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7170 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7171 if output: 7172 output.update_positions(self._prev) 7173 return output 7174 7175 def _parse_number(self) -> t.Optional[exp.Expression]: 7176 if self._match_set(self.NUMERIC_PARSERS): 7177 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7178 return self._parse_placeholder() 7179 7180 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7181 if self._match(TokenType.IDENTIFIER): 7182 return self._identifier_expression(quoted=True) 7183 return self._parse_placeholder() 7184 7185 def _parse_var( 7186 self, 7187 any_token: bool = False, 7188 tokens: t.Optional[t.Collection[TokenType]] = None, 7189 upper: bool = False, 7190 ) -> t.Optional[exp.Expression]: 7191 if ( 7192 (any_token and self._advance_any()) 7193 or self._match(TokenType.VAR) 7194 or (self._match_set(tokens) if tokens else False) 7195 ): 7196 return self.expression( 7197 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7198 ) 7199 return self._parse_placeholder() 7200 7201 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7202 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7203 self._advance() 7204 return self._prev 7205 return None 7206 7207 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7208 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7209 7210 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7211 return self._parse_primary() or self._parse_var(any_token=True) 7212 7213 def _parse_null(self) -> t.Optional[exp.Expression]: 7214 if self._match_set(self.NULL_TOKENS): 7215 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7216 return self._parse_placeholder() 7217 7218 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7219 if self._match(TokenType.TRUE): 7220 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7221 if self._match(TokenType.FALSE): 7222 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7223 return self._parse_placeholder() 7224 7225 def _parse_star(self) -> t.Optional[exp.Expression]: 7226 if self._match(TokenType.STAR): 7227 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7228 return self._parse_placeholder() 7229 7230 def _parse_parameter(self) -> exp.Parameter: 7231 this = self._parse_identifier() or self._parse_primary_or_var() 7232 return self.expression(exp.Parameter, this=this) 7233 7234 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7235 if self._match_set(self.PLACEHOLDER_PARSERS): 7236 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7237 if placeholder: 7238 return placeholder 7239 self._advance(-1) 7240 return None 7241 7242 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7243 if not self._match_texts(keywords): 7244 return None 7245 if self._match(TokenType.L_PAREN, advance=False): 7246 return self._parse_wrapped_csv(self._parse_expression) 7247 7248 expression = self._parse_expression() 7249 return [expression] if expression else None 7250 7251 def _parse_csv( 7252 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7253 ) -> t.List[exp.Expression]: 7254 parse_result = parse_method() 7255 items = [parse_result] if parse_result is not None else [] 7256 7257 while self._match(sep): 7258 self._add_comments(parse_result) 7259 parse_result = parse_method() 7260 if parse_result is not None: 7261 items.append(parse_result) 7262 7263 return items 7264 7265 def _parse_tokens( 7266 self, parse_method: t.Callable, expressions: t.Dict 7267 ) -> t.Optional[exp.Expression]: 7268 this = parse_method() 7269 7270 while self._match_set(expressions): 7271 this = self.expression( 7272 expressions[self._prev.token_type], 7273 this=this, 7274 comments=self._prev_comments, 7275 expression=parse_method(), 7276 ) 7277 7278 return this 7279 7280 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7281 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7282 7283 def _parse_wrapped_csv( 7284 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7285 ) -> t.List[exp.Expression]: 7286 return self._parse_wrapped( 7287 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7288 ) 7289 7290 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7291 wrapped = self._match(TokenType.L_PAREN) 7292 if not wrapped and not optional: 7293 self.raise_error("Expecting (") 7294 parse_result = parse_method() 7295 if wrapped: 7296 self._match_r_paren() 7297 return parse_result 7298 7299 def _parse_expressions(self) -> t.List[exp.Expression]: 7300 return self._parse_csv(self._parse_expression) 7301 7302 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7303 return self._parse_select() or self._parse_set_operations( 7304 self._parse_alias(self._parse_assignment(), explicit=True) 7305 if alias 7306 else self._parse_assignment() 7307 ) 7308 7309 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7310 return self._parse_query_modifiers( 7311 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7312 ) 7313 7314 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7315 this = None 7316 if self._match_texts(self.TRANSACTION_KIND): 7317 this = self._prev.text 7318 7319 self._match_texts(("TRANSACTION", "WORK")) 7320 7321 modes = [] 7322 while True: 7323 mode = [] 7324 while self._match(TokenType.VAR): 7325 mode.append(self._prev.text) 7326 7327 if mode: 7328 modes.append(" ".join(mode)) 7329 if not self._match(TokenType.COMMA): 7330 break 7331 7332 return self.expression(exp.Transaction, this=this, modes=modes) 7333 7334 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7335 chain = None 7336 savepoint = None 7337 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7338 7339 self._match_texts(("TRANSACTION", "WORK")) 7340 7341 if self._match_text_seq("TO"): 7342 self._match_text_seq("SAVEPOINT") 7343 savepoint = self._parse_id_var() 7344 7345 if self._match(TokenType.AND): 7346 chain = not self._match_text_seq("NO") 7347 self._match_text_seq("CHAIN") 7348 7349 if is_rollback: 7350 return self.expression(exp.Rollback, savepoint=savepoint) 7351 7352 return self.expression(exp.Commit, chain=chain) 7353 7354 def _parse_refresh(self) -> exp.Refresh: 7355 self._match(TokenType.TABLE) 7356 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7357 7358 def _parse_column_def_with_exists(self): 7359 start = self._index 7360 self._match(TokenType.COLUMN) 7361 7362 exists_column = self._parse_exists(not_=True) 7363 expression = self._parse_field_def() 7364 7365 if not isinstance(expression, exp.ColumnDef): 7366 self._retreat(start) 7367 return None 7368 7369 expression.set("exists", exists_column) 7370 7371 return expression 7372 7373 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7374 if not self._prev.text.upper() == "ADD": 7375 return None 7376 7377 expression = self._parse_column_def_with_exists() 7378 if not expression: 7379 return None 7380 7381 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7382 if self._match_texts(("FIRST", "AFTER")): 7383 position = self._prev.text 7384 column_position = self.expression( 7385 exp.ColumnPosition, this=self._parse_column(), position=position 7386 ) 7387 expression.set("position", column_position) 7388 7389 return expression 7390 7391 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7392 drop = self._match(TokenType.DROP) and self._parse_drop() 7393 if drop and not isinstance(drop, exp.Command): 7394 drop.set("kind", drop.args.get("kind", "COLUMN")) 7395 return drop 7396 7397 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7398 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7399 return self.expression( 7400 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7401 ) 7402 7403 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7404 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7405 self._match_text_seq("ADD") 7406 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7407 return self.expression( 7408 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7409 ) 7410 7411 column_def = self._parse_add_column() 7412 if isinstance(column_def, exp.ColumnDef): 7413 return column_def 7414 7415 exists = self._parse_exists(not_=True) 7416 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7417 return self.expression( 7418 exp.AddPartition, 7419 exists=exists, 7420 this=self._parse_field(any_token=True), 7421 location=self._match_text_seq("LOCATION", advance=False) 7422 and self._parse_property(), 7423 ) 7424 7425 return None 7426 7427 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7428 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7429 or self._match_text_seq("COLUMNS") 7430 ): 7431 schema = self._parse_schema() 7432 7433 return ( 7434 ensure_list(schema) 7435 if schema 7436 else self._parse_csv(self._parse_column_def_with_exists) 7437 ) 7438 7439 return self._parse_csv(_parse_add_alteration) 7440 7441 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7442 if self._match_texts(self.ALTER_ALTER_PARSERS): 7443 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7444 7445 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7446 # keyword after ALTER we default to parsing this statement 7447 self._match(TokenType.COLUMN) 7448 column = self._parse_field(any_token=True) 7449 7450 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7451 return self.expression(exp.AlterColumn, this=column, drop=True) 7452 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7453 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7454 if self._match(TokenType.COMMENT): 7455 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7456 if self._match_text_seq("DROP", "NOT", "NULL"): 7457 return self.expression( 7458 exp.AlterColumn, 7459 this=column, 7460 drop=True, 7461 allow_null=True, 7462 ) 7463 if self._match_text_seq("SET", "NOT", "NULL"): 7464 return self.expression( 7465 exp.AlterColumn, 7466 this=column, 7467 allow_null=False, 7468 ) 7469 7470 if self._match_text_seq("SET", "VISIBLE"): 7471 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7472 if self._match_text_seq("SET", "INVISIBLE"): 7473 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7474 7475 self._match_text_seq("SET", "DATA") 7476 self._match_text_seq("TYPE") 7477 return self.expression( 7478 exp.AlterColumn, 7479 this=column, 7480 dtype=self._parse_types(), 7481 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7482 using=self._match(TokenType.USING) and self._parse_assignment(), 7483 ) 7484 7485 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7486 if self._match_texts(("ALL", "EVEN", "AUTO")): 7487 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7488 7489 self._match_text_seq("KEY", "DISTKEY") 7490 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7491 7492 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7493 if compound: 7494 self._match_text_seq("SORTKEY") 7495 7496 if self._match(TokenType.L_PAREN, advance=False): 7497 return self.expression( 7498 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7499 ) 7500 7501 self._match_texts(("AUTO", "NONE")) 7502 return self.expression( 7503 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7504 ) 7505 7506 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7507 index = self._index - 1 7508 7509 partition_exists = self._parse_exists() 7510 if self._match(TokenType.PARTITION, advance=False): 7511 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7512 7513 self._retreat(index) 7514 return self._parse_csv(self._parse_drop_column) 7515 7516 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7517 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7518 exists = self._parse_exists() 7519 old_column = self._parse_column() 7520 to = self._match_text_seq("TO") 7521 new_column = self._parse_column() 7522 7523 if old_column is None or to is None or new_column is None: 7524 return None 7525 7526 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7527 7528 self._match_text_seq("TO") 7529 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7530 7531 def _parse_alter_table_set(self) -> exp.AlterSet: 7532 alter_set = self.expression(exp.AlterSet) 7533 7534 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7535 "TABLE", "PROPERTIES" 7536 ): 7537 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7538 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7539 alter_set.set("expressions", [self._parse_assignment()]) 7540 elif self._match_texts(("LOGGED", "UNLOGGED")): 7541 alter_set.set("option", exp.var(self._prev.text.upper())) 7542 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7543 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7544 elif self._match_text_seq("LOCATION"): 7545 alter_set.set("location", self._parse_field()) 7546 elif self._match_text_seq("ACCESS", "METHOD"): 7547 alter_set.set("access_method", self._parse_field()) 7548 elif self._match_text_seq("TABLESPACE"): 7549 alter_set.set("tablespace", self._parse_field()) 7550 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7551 alter_set.set("file_format", [self._parse_field()]) 7552 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7553 alter_set.set("file_format", self._parse_wrapped_options()) 7554 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7555 alter_set.set("copy_options", self._parse_wrapped_options()) 7556 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7557 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7558 else: 7559 if self._match_text_seq("SERDE"): 7560 alter_set.set("serde", self._parse_field()) 7561 7562 properties = self._parse_wrapped(self._parse_properties, optional=True) 7563 alter_set.set("expressions", [properties]) 7564 7565 return alter_set 7566 7567 def _parse_alter(self) -> exp.Alter | exp.Command: 7568 start = self._prev 7569 7570 alter_token = self._match_set(self.ALTERABLES) and self._prev 7571 if not alter_token: 7572 return self._parse_as_command(start) 7573 7574 exists = self._parse_exists() 7575 only = self._match_text_seq("ONLY") 7576 this = self._parse_table(schema=True) 7577 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7578 7579 if self._next: 7580 self._advance() 7581 7582 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7583 if parser: 7584 actions = ensure_list(parser(self)) 7585 not_valid = self._match_text_seq("NOT", "VALID") 7586 options = self._parse_csv(self._parse_property) 7587 7588 if not self._curr and actions: 7589 return self.expression( 7590 exp.Alter, 7591 this=this, 7592 kind=alter_token.text.upper(), 7593 exists=exists, 7594 actions=actions, 7595 only=only, 7596 options=options, 7597 cluster=cluster, 7598 not_valid=not_valid, 7599 ) 7600 7601 return self._parse_as_command(start) 7602 7603 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7604 start = self._prev 7605 # https://duckdb.org/docs/sql/statements/analyze 7606 if not self._curr: 7607 return self.expression(exp.Analyze) 7608 7609 options = [] 7610 while self._match_texts(self.ANALYZE_STYLES): 7611 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7612 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7613 else: 7614 options.append(self._prev.text.upper()) 7615 7616 this: t.Optional[exp.Expression] = None 7617 inner_expression: t.Optional[exp.Expression] = None 7618 7619 kind = self._curr and self._curr.text.upper() 7620 7621 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7622 this = self._parse_table_parts() 7623 elif self._match_text_seq("TABLES"): 7624 if self._match_set((TokenType.FROM, TokenType.IN)): 7625 kind = f"{kind} {self._prev.text.upper()}" 7626 this = self._parse_table(schema=True, is_db_reference=True) 7627 elif self._match_text_seq("DATABASE"): 7628 this = self._parse_table(schema=True, is_db_reference=True) 7629 elif self._match_text_seq("CLUSTER"): 7630 this = self._parse_table() 7631 # Try matching inner expr keywords before fallback to parse table. 7632 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7633 kind = None 7634 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7635 else: 7636 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7637 kind = None 7638 this = self._parse_table_parts() 7639 7640 partition = self._try_parse(self._parse_partition) 7641 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7642 return self._parse_as_command(start) 7643 7644 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7645 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7646 "WITH", "ASYNC", "MODE" 7647 ): 7648 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7649 else: 7650 mode = None 7651 7652 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7653 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7654 7655 properties = self._parse_properties() 7656 return self.expression( 7657 exp.Analyze, 7658 kind=kind, 7659 this=this, 7660 mode=mode, 7661 partition=partition, 7662 properties=properties, 7663 expression=inner_expression, 7664 options=options, 7665 ) 7666 7667 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7668 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7669 this = None 7670 kind = self._prev.text.upper() 7671 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7672 expressions = [] 7673 7674 if not self._match_text_seq("STATISTICS"): 7675 self.raise_error("Expecting token STATISTICS") 7676 7677 if self._match_text_seq("NOSCAN"): 7678 this = "NOSCAN" 7679 elif self._match(TokenType.FOR): 7680 if self._match_text_seq("ALL", "COLUMNS"): 7681 this = "FOR ALL COLUMNS" 7682 if self._match_texts("COLUMNS"): 7683 this = "FOR COLUMNS" 7684 expressions = self._parse_csv(self._parse_column_reference) 7685 elif self._match_text_seq("SAMPLE"): 7686 sample = self._parse_number() 7687 expressions = [ 7688 self.expression( 7689 exp.AnalyzeSample, 7690 sample=sample, 7691 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7692 ) 7693 ] 7694 7695 return self.expression( 7696 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7697 ) 7698 7699 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7700 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7701 kind = None 7702 this = None 7703 expression: t.Optional[exp.Expression] = None 7704 if self._match_text_seq("REF", "UPDATE"): 7705 kind = "REF" 7706 this = "UPDATE" 7707 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7708 this = "UPDATE SET DANGLING TO NULL" 7709 elif self._match_text_seq("STRUCTURE"): 7710 kind = "STRUCTURE" 7711 if self._match_text_seq("CASCADE", "FAST"): 7712 this = "CASCADE FAST" 7713 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7714 ("ONLINE", "OFFLINE") 7715 ): 7716 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7717 expression = self._parse_into() 7718 7719 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7720 7721 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7722 this = self._prev.text.upper() 7723 if self._match_text_seq("COLUMNS"): 7724 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7725 return None 7726 7727 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7728 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7729 if self._match_text_seq("STATISTICS"): 7730 return self.expression(exp.AnalyzeDelete, kind=kind) 7731 return None 7732 7733 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7734 if self._match_text_seq("CHAINED", "ROWS"): 7735 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7736 return None 7737 7738 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7739 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7740 this = self._prev.text.upper() 7741 expression: t.Optional[exp.Expression] = None 7742 expressions = [] 7743 update_options = None 7744 7745 if self._match_text_seq("HISTOGRAM", "ON"): 7746 expressions = self._parse_csv(self._parse_column_reference) 7747 with_expressions = [] 7748 while self._match(TokenType.WITH): 7749 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7750 if self._match_texts(("SYNC", "ASYNC")): 7751 if self._match_text_seq("MODE", advance=False): 7752 with_expressions.append(f"{self._prev.text.upper()} MODE") 7753 self._advance() 7754 else: 7755 buckets = self._parse_number() 7756 if self._match_text_seq("BUCKETS"): 7757 with_expressions.append(f"{buckets} BUCKETS") 7758 if with_expressions: 7759 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7760 7761 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7762 TokenType.UPDATE, advance=False 7763 ): 7764 update_options = self._prev.text.upper() 7765 self._advance() 7766 elif self._match_text_seq("USING", "DATA"): 7767 expression = self.expression(exp.UsingData, this=self._parse_string()) 7768 7769 return self.expression( 7770 exp.AnalyzeHistogram, 7771 this=this, 7772 expressions=expressions, 7773 expression=expression, 7774 update_options=update_options, 7775 ) 7776 7777 def _parse_merge(self) -> exp.Merge: 7778 self._match(TokenType.INTO) 7779 target = self._parse_table() 7780 7781 if target and self._match(TokenType.ALIAS, advance=False): 7782 target.set("alias", self._parse_table_alias()) 7783 7784 self._match(TokenType.USING) 7785 using = self._parse_table() 7786 7787 self._match(TokenType.ON) 7788 on = self._parse_assignment() 7789 7790 return self.expression( 7791 exp.Merge, 7792 this=target, 7793 using=using, 7794 on=on, 7795 whens=self._parse_when_matched(), 7796 returning=self._parse_returning(), 7797 ) 7798 7799 def _parse_when_matched(self) -> exp.Whens: 7800 whens = [] 7801 7802 while self._match(TokenType.WHEN): 7803 matched = not self._match(TokenType.NOT) 7804 self._match_text_seq("MATCHED") 7805 source = ( 7806 False 7807 if self._match_text_seq("BY", "TARGET") 7808 else self._match_text_seq("BY", "SOURCE") 7809 ) 7810 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7811 7812 self._match(TokenType.THEN) 7813 7814 if self._match(TokenType.INSERT): 7815 this = self._parse_star() 7816 if this: 7817 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7818 else: 7819 then = self.expression( 7820 exp.Insert, 7821 this=exp.var("ROW") 7822 if self._match_text_seq("ROW") 7823 else self._parse_value(values=False), 7824 expression=self._match_text_seq("VALUES") and self._parse_value(), 7825 ) 7826 elif self._match(TokenType.UPDATE): 7827 expressions = self._parse_star() 7828 if expressions: 7829 then = self.expression(exp.Update, expressions=expressions) 7830 else: 7831 then = self.expression( 7832 exp.Update, 7833 expressions=self._match(TokenType.SET) 7834 and self._parse_csv(self._parse_equality), 7835 ) 7836 elif self._match(TokenType.DELETE): 7837 then = self.expression(exp.Var, this=self._prev.text) 7838 else: 7839 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7840 7841 whens.append( 7842 self.expression( 7843 exp.When, 7844 matched=matched, 7845 source=source, 7846 condition=condition, 7847 then=then, 7848 ) 7849 ) 7850 return self.expression(exp.Whens, expressions=whens) 7851 7852 def _parse_show(self) -> t.Optional[exp.Expression]: 7853 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7854 if parser: 7855 return parser(self) 7856 return self._parse_as_command(self._prev) 7857 7858 def _parse_set_item_assignment( 7859 self, kind: t.Optional[str] = None 7860 ) -> t.Optional[exp.Expression]: 7861 index = self._index 7862 7863 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7864 return self._parse_set_transaction(global_=kind == "GLOBAL") 7865 7866 left = self._parse_primary() or self._parse_column() 7867 assignment_delimiter = self._match_texts(("=", "TO")) 7868 7869 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7870 self._retreat(index) 7871 return None 7872 7873 right = self._parse_statement() or self._parse_id_var() 7874 if isinstance(right, (exp.Column, exp.Identifier)): 7875 right = exp.var(right.name) 7876 7877 this = self.expression(exp.EQ, this=left, expression=right) 7878 return self.expression(exp.SetItem, this=this, kind=kind) 7879 7880 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7881 self._match_text_seq("TRANSACTION") 7882 characteristics = self._parse_csv( 7883 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7884 ) 7885 return self.expression( 7886 exp.SetItem, 7887 expressions=characteristics, 7888 kind="TRANSACTION", 7889 **{"global": global_}, # type: ignore 7890 ) 7891 7892 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7893 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7894 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7895 7896 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7897 index = self._index 7898 set_ = self.expression( 7899 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7900 ) 7901 7902 if self._curr: 7903 self._retreat(index) 7904 return self._parse_as_command(self._prev) 7905 7906 return set_ 7907 7908 def _parse_var_from_options( 7909 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7910 ) -> t.Optional[exp.Var]: 7911 start = self._curr 7912 if not start: 7913 return None 7914 7915 option = start.text.upper() 7916 continuations = options.get(option) 7917 7918 index = self._index 7919 self._advance() 7920 for keywords in continuations or []: 7921 if isinstance(keywords, str): 7922 keywords = (keywords,) 7923 7924 if self._match_text_seq(*keywords): 7925 option = f"{option} {' '.join(keywords)}" 7926 break 7927 else: 7928 if continuations or continuations is None: 7929 if raise_unmatched: 7930 self.raise_error(f"Unknown option {option}") 7931 7932 self._retreat(index) 7933 return None 7934 7935 return exp.var(option) 7936 7937 def _parse_as_command(self, start: Token) -> exp.Command: 7938 while self._curr: 7939 self._advance() 7940 text = self._find_sql(start, self._prev) 7941 size = len(start.text) 7942 self._warn_unsupported() 7943 return exp.Command(this=text[:size], expression=text[size:]) 7944 7945 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7946 settings = [] 7947 7948 self._match_l_paren() 7949 kind = self._parse_id_var() 7950 7951 if self._match(TokenType.L_PAREN): 7952 while True: 7953 key = self._parse_id_var() 7954 value = self._parse_primary() 7955 if not key and value is None: 7956 break 7957 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7958 self._match(TokenType.R_PAREN) 7959 7960 self._match_r_paren() 7961 7962 return self.expression( 7963 exp.DictProperty, 7964 this=this, 7965 kind=kind.this if kind else None, 7966 settings=settings, 7967 ) 7968 7969 def _parse_dict_range(self, this: str) -> exp.DictRange: 7970 self._match_l_paren() 7971 has_min = self._match_text_seq("MIN") 7972 if has_min: 7973 min = self._parse_var() or self._parse_primary() 7974 self._match_text_seq("MAX") 7975 max = self._parse_var() or self._parse_primary() 7976 else: 7977 max = self._parse_var() or self._parse_primary() 7978 min = exp.Literal.number(0) 7979 self._match_r_paren() 7980 return self.expression(exp.DictRange, this=this, min=min, max=max) 7981 7982 def _parse_comprehension( 7983 self, this: t.Optional[exp.Expression] 7984 ) -> t.Optional[exp.Comprehension]: 7985 index = self._index 7986 expression = self._parse_column() 7987 if not self._match(TokenType.IN): 7988 self._retreat(index - 1) 7989 return None 7990 iterator = self._parse_column() 7991 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7992 return self.expression( 7993 exp.Comprehension, 7994 this=this, 7995 expression=expression, 7996 iterator=iterator, 7997 condition=condition, 7998 ) 7999 8000 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8001 if self._match(TokenType.HEREDOC_STRING): 8002 return self.expression(exp.Heredoc, this=self._prev.text) 8003 8004 if not self._match_text_seq("$"): 8005 return None 8006 8007 tags = ["$"] 8008 tag_text = None 8009 8010 if self._is_connected(): 8011 self._advance() 8012 tags.append(self._prev.text.upper()) 8013 else: 8014 self.raise_error("No closing $ found") 8015 8016 if tags[-1] != "$": 8017 if self._is_connected() and self._match_text_seq("$"): 8018 tag_text = tags[-1] 8019 tags.append("$") 8020 else: 8021 self.raise_error("No closing $ found") 8022 8023 heredoc_start = self._curr 8024 8025 while self._curr: 8026 if self._match_text_seq(*tags, advance=False): 8027 this = self._find_sql(heredoc_start, self._prev) 8028 self._advance(len(tags)) 8029 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8030 8031 self._advance() 8032 8033 self.raise_error(f"No closing {''.join(tags)} found") 8034 return None 8035 8036 def _find_parser( 8037 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8038 ) -> t.Optional[t.Callable]: 8039 if not self._curr: 8040 return None 8041 8042 index = self._index 8043 this = [] 8044 while True: 8045 # The current token might be multiple words 8046 curr = self._curr.text.upper() 8047 key = curr.split(" ") 8048 this.append(curr) 8049 8050 self._advance() 8051 result, trie = in_trie(trie, key) 8052 if result == TrieResult.FAILED: 8053 break 8054 8055 if result == TrieResult.EXISTS: 8056 subparser = parsers[" ".join(this)] 8057 return subparser 8058 8059 self._retreat(index) 8060 return None 8061 8062 def _match(self, token_type, advance=True, expression=None): 8063 if not self._curr: 8064 return None 8065 8066 if self._curr.token_type == token_type: 8067 if advance: 8068 self._advance() 8069 self._add_comments(expression) 8070 return True 8071 8072 return None 8073 8074 def _match_set(self, types, advance=True): 8075 if not self._curr: 8076 return None 8077 8078 if self._curr.token_type in types: 8079 if advance: 8080 self._advance() 8081 return True 8082 8083 return None 8084 8085 def _match_pair(self, token_type_a, token_type_b, advance=True): 8086 if not self._curr or not self._next: 8087 return None 8088 8089 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8090 if advance: 8091 self._advance(2) 8092 return True 8093 8094 return None 8095 8096 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8097 if not self._match(TokenType.L_PAREN, expression=expression): 8098 self.raise_error("Expecting (") 8099 8100 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8101 if not self._match(TokenType.R_PAREN, expression=expression): 8102 self.raise_error("Expecting )") 8103 8104 def _match_texts(self, texts, advance=True): 8105 if ( 8106 self._curr 8107 and self._curr.token_type != TokenType.STRING 8108 and self._curr.text.upper() in texts 8109 ): 8110 if advance: 8111 self._advance() 8112 return True 8113 return None 8114 8115 def _match_text_seq(self, *texts, advance=True): 8116 index = self._index 8117 for text in texts: 8118 if ( 8119 self._curr 8120 and self._curr.token_type != TokenType.STRING 8121 and self._curr.text.upper() == text 8122 ): 8123 self._advance() 8124 else: 8125 self._retreat(index) 8126 return None 8127 8128 if not advance: 8129 self._retreat(index) 8130 8131 return True 8132 8133 def _replace_lambda( 8134 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8135 ) -> t.Optional[exp.Expression]: 8136 if not node: 8137 return node 8138 8139 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8140 8141 for column in node.find_all(exp.Column): 8142 typ = lambda_types.get(column.parts[0].name) 8143 if typ is not None: 8144 dot_or_id = column.to_dot() if column.table else column.this 8145 8146 if typ: 8147 dot_or_id = self.expression( 8148 exp.Cast, 8149 this=dot_or_id, 8150 to=typ, 8151 ) 8152 8153 parent = column.parent 8154 8155 while isinstance(parent, exp.Dot): 8156 if not isinstance(parent.parent, exp.Dot): 8157 parent.replace(dot_or_id) 8158 break 8159 parent = parent.parent 8160 else: 8161 if column is node: 8162 node = dot_or_id 8163 else: 8164 column.replace(dot_or_id) 8165 return node 8166 8167 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8168 start = self._prev 8169 8170 # Not to be confused with TRUNCATE(number, decimals) function call 8171 if self._match(TokenType.L_PAREN): 8172 self._retreat(self._index - 2) 8173 return self._parse_function() 8174 8175 # Clickhouse supports TRUNCATE DATABASE as well 8176 is_database = self._match(TokenType.DATABASE) 8177 8178 self._match(TokenType.TABLE) 8179 8180 exists = self._parse_exists(not_=False) 8181 8182 expressions = self._parse_csv( 8183 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8184 ) 8185 8186 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8187 8188 if self._match_text_seq("RESTART", "IDENTITY"): 8189 identity = "RESTART" 8190 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8191 identity = "CONTINUE" 8192 else: 8193 identity = None 8194 8195 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8196 option = self._prev.text 8197 else: 8198 option = None 8199 8200 partition = self._parse_partition() 8201 8202 # Fallback case 8203 if self._curr: 8204 return self._parse_as_command(start) 8205 8206 return self.expression( 8207 exp.TruncateTable, 8208 expressions=expressions, 8209 is_database=is_database, 8210 exists=exists, 8211 cluster=cluster, 8212 identity=identity, 8213 option=option, 8214 partition=partition, 8215 ) 8216 8217 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8218 this = self._parse_ordered(self._parse_opclass) 8219 8220 if not self._match(TokenType.WITH): 8221 return this 8222 8223 op = self._parse_var(any_token=True) 8224 8225 return self.expression(exp.WithOperator, this=this, op=op) 8226 8227 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8228 self._match(TokenType.EQ) 8229 self._match(TokenType.L_PAREN) 8230 8231 opts: t.List[t.Optional[exp.Expression]] = [] 8232 option: exp.Expression | None 8233 while self._curr and not self._match(TokenType.R_PAREN): 8234 if self._match_text_seq("FORMAT_NAME", "="): 8235 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8236 option = self._parse_format_name() 8237 else: 8238 option = self._parse_property() 8239 8240 if option is None: 8241 self.raise_error("Unable to parse option") 8242 break 8243 8244 opts.append(option) 8245 8246 return opts 8247 8248 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8249 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8250 8251 options = [] 8252 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8253 option = self._parse_var(any_token=True) 8254 prev = self._prev.text.upper() 8255 8256 # Different dialects might separate options and values by white space, "=" and "AS" 8257 self._match(TokenType.EQ) 8258 self._match(TokenType.ALIAS) 8259 8260 param = self.expression(exp.CopyParameter, this=option) 8261 8262 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8263 TokenType.L_PAREN, advance=False 8264 ): 8265 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8266 param.set("expressions", self._parse_wrapped_options()) 8267 elif prev == "FILE_FORMAT": 8268 # T-SQL's external file format case 8269 param.set("expression", self._parse_field()) 8270 else: 8271 param.set("expression", self._parse_unquoted_field()) 8272 8273 options.append(param) 8274 self._match(sep) 8275 8276 return options 8277 8278 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8279 expr = self.expression(exp.Credentials) 8280 8281 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8282 expr.set("storage", self._parse_field()) 8283 if self._match_text_seq("CREDENTIALS"): 8284 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8285 creds = ( 8286 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8287 ) 8288 expr.set("credentials", creds) 8289 if self._match_text_seq("ENCRYPTION"): 8290 expr.set("encryption", self._parse_wrapped_options()) 8291 if self._match_text_seq("IAM_ROLE"): 8292 expr.set("iam_role", self._parse_field()) 8293 if self._match_text_seq("REGION"): 8294 expr.set("region", self._parse_field()) 8295 8296 return expr 8297 8298 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8299 return self._parse_field() 8300 8301 def _parse_copy(self) -> exp.Copy | exp.Command: 8302 start = self._prev 8303 8304 self._match(TokenType.INTO) 8305 8306 this = ( 8307 self._parse_select(nested=True, parse_subquery_alias=False) 8308 if self._match(TokenType.L_PAREN, advance=False) 8309 else self._parse_table(schema=True) 8310 ) 8311 8312 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8313 8314 files = self._parse_csv(self._parse_file_location) 8315 credentials = self._parse_credentials() 8316 8317 self._match_text_seq("WITH") 8318 8319 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8320 8321 # Fallback case 8322 if self._curr: 8323 return self._parse_as_command(start) 8324 8325 return self.expression( 8326 exp.Copy, 8327 this=this, 8328 kind=kind, 8329 credentials=credentials, 8330 files=files, 8331 params=params, 8332 ) 8333 8334 def _parse_normalize(self) -> exp.Normalize: 8335 return self.expression( 8336 exp.Normalize, 8337 this=self._parse_bitwise(), 8338 form=self._match(TokenType.COMMA) and self._parse_var(), 8339 ) 8340 8341 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8342 args = self._parse_csv(lambda: self._parse_lambda()) 8343 8344 this = seq_get(args, 0) 8345 decimals = seq_get(args, 1) 8346 8347 return expr_type( 8348 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8349 ) 8350 8351 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8352 star_token = self._prev 8353 8354 if self._match_text_seq("COLUMNS", "(", advance=False): 8355 this = self._parse_function() 8356 if isinstance(this, exp.Columns): 8357 this.set("unpack", True) 8358 return this 8359 8360 return self.expression( 8361 exp.Star, 8362 **{ # type: ignore 8363 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8364 "replace": self._parse_star_op("REPLACE"), 8365 "rename": self._parse_star_op("RENAME"), 8366 }, 8367 ).update_positions(star_token) 8368 8369 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8370 privilege_parts = [] 8371 8372 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8373 # (end of privilege list) or L_PAREN (start of column list) are met 8374 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8375 privilege_parts.append(self._curr.text.upper()) 8376 self._advance() 8377 8378 this = exp.var(" ".join(privilege_parts)) 8379 expressions = ( 8380 self._parse_wrapped_csv(self._parse_column) 8381 if self._match(TokenType.L_PAREN, advance=False) 8382 else None 8383 ) 8384 8385 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8386 8387 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8388 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8389 principal = self._parse_id_var() 8390 8391 if not principal: 8392 return None 8393 8394 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8395 8396 def _parse_grant(self) -> exp.Grant | exp.Command: 8397 start = self._prev 8398 8399 privileges = self._parse_csv(self._parse_grant_privilege) 8400 8401 self._match(TokenType.ON) 8402 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8403 8404 # Attempt to parse the securable e.g. MySQL allows names 8405 # such as "foo.*", "*.*" which are not easily parseable yet 8406 securable = self._try_parse(self._parse_table_parts) 8407 8408 if not securable or not self._match_text_seq("TO"): 8409 return self._parse_as_command(start) 8410 8411 principals = self._parse_csv(self._parse_grant_principal) 8412 8413 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8414 8415 if self._curr: 8416 return self._parse_as_command(start) 8417 8418 return self.expression( 8419 exp.Grant, 8420 privileges=privileges, 8421 kind=kind, 8422 securable=securable, 8423 principals=principals, 8424 grant_option=grant_option, 8425 ) 8426 8427 def _parse_overlay(self) -> exp.Overlay: 8428 return self.expression( 8429 exp.Overlay, 8430 **{ # type: ignore 8431 "this": self._parse_bitwise(), 8432 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8433 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8434 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8435 }, 8436 ) 8437 8438 def _parse_format_name(self) -> exp.Property: 8439 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8440 # for FILE_FORMAT = <format_name> 8441 return self.expression( 8442 exp.Property, 8443 this=exp.var("FORMAT_NAME"), 8444 value=self._parse_string() or self._parse_table_parts(), 8445 ) 8446 8447 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8448 args: t.List[exp.Expression] = [] 8449 8450 if self._match(TokenType.DISTINCT): 8451 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8452 self._match(TokenType.COMMA) 8453 8454 args.extend(self._parse_csv(self._parse_assignment)) 8455 8456 return self.expression( 8457 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8458 ) 8459 8460 def _identifier_expression( 8461 self, token: t.Optional[Token] = None, **kwargs: t.Any 8462 ) -> exp.Identifier: 8463 token = token or self._prev 8464 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8465 expression.update_positions(token) 8466 return expression 8467 8468 def _build_pipe_cte( 8469 self, 8470 query: exp.Query, 8471 expressions: t.List[exp.Expression], 8472 alias_cte: t.Optional[exp.TableAlias] = None, 8473 ) -> exp.Select: 8474 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8475 if alias_cte: 8476 new_cte = alias_cte 8477 else: 8478 self._pipe_cte_counter += 1 8479 new_cte = f"__tmp{self._pipe_cte_counter}" 8480 8481 with_ = query.args.get("with") 8482 ctes = with_.pop() if with_ else None 8483 8484 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8485 if ctes: 8486 new_select.set("with", ctes) 8487 8488 return new_select.with_(new_cte, as_=query, copy=False) 8489 8490 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8491 select = self._parse_select(consume_pipe=False) 8492 if not select: 8493 return query 8494 8495 return self._build_pipe_cte( 8496 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8497 ) 8498 8499 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8500 limit = self._parse_limit() 8501 offset = self._parse_offset() 8502 if limit: 8503 curr_limit = query.args.get("limit", limit) 8504 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8505 query.limit(limit, copy=False) 8506 if offset: 8507 curr_offset = query.args.get("offset") 8508 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8509 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8510 8511 return query 8512 8513 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8514 this = self._parse_assignment() 8515 if self._match_text_seq("GROUP", "AND", advance=False): 8516 return this 8517 8518 this = self._parse_alias(this) 8519 8520 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8521 return self._parse_ordered(lambda: this) 8522 8523 return this 8524 8525 def _parse_pipe_syntax_aggregate_group_order_by( 8526 self, query: exp.Select, group_by_exists: bool = True 8527 ) -> exp.Select: 8528 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8529 aggregates_or_groups, orders = [], [] 8530 for element in expr: 8531 if isinstance(element, exp.Ordered): 8532 this = element.this 8533 if isinstance(this, exp.Alias): 8534 element.set("this", this.args["alias"]) 8535 orders.append(element) 8536 else: 8537 this = element 8538 aggregates_or_groups.append(this) 8539 8540 if group_by_exists: 8541 query.select(*aggregates_or_groups, copy=False).group_by( 8542 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8543 copy=False, 8544 ) 8545 else: 8546 query.select(*aggregates_or_groups, append=False, copy=False) 8547 8548 if orders: 8549 return query.order_by(*orders, append=False, copy=False) 8550 8551 return query 8552 8553 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8554 self._match_text_seq("AGGREGATE") 8555 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8556 8557 if self._match(TokenType.GROUP_BY) or ( 8558 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8559 ): 8560 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8561 8562 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8563 8564 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8565 first_setop = self.parse_set_operation(this=query) 8566 if not first_setop: 8567 return None 8568 8569 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8570 expr = self._parse_paren() 8571 return expr.assert_is(exp.Subquery).unnest() if expr else None 8572 8573 first_setop.this.pop() 8574 8575 setops = [ 8576 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8577 *self._parse_csv(_parse_and_unwrap_query), 8578 ] 8579 8580 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8581 with_ = query.args.get("with") 8582 ctes = with_.pop() if with_ else None 8583 8584 if isinstance(first_setop, exp.Union): 8585 query = query.union(*setops, copy=False, **first_setop.args) 8586 elif isinstance(first_setop, exp.Except): 8587 query = query.except_(*setops, copy=False, **first_setop.args) 8588 else: 8589 query = query.intersect(*setops, copy=False, **first_setop.args) 8590 8591 query.set("with", ctes) 8592 8593 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8594 8595 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8596 join = self._parse_join() 8597 if not join: 8598 return None 8599 8600 if isinstance(query, exp.Select): 8601 return query.join(join, copy=False) 8602 8603 return query 8604 8605 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8606 pivots = self._parse_pivots() 8607 if not pivots: 8608 return query 8609 8610 from_ = query.args.get("from") 8611 if from_: 8612 from_.this.set("pivots", pivots) 8613 8614 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8615 8616 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8617 self._match_text_seq("EXTEND") 8618 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8619 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8620 8621 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8622 sample = self._parse_table_sample() 8623 8624 with_ = query.args.get("with") 8625 if with_: 8626 with_.expressions[-1].this.set("sample", sample) 8627 else: 8628 query.set("sample", sample) 8629 8630 return query 8631 8632 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8633 if isinstance(query, exp.Subquery): 8634 query = exp.select("*").from_(query, copy=False) 8635 8636 if not query.args.get("from"): 8637 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8638 8639 while self._match(TokenType.PIPE_GT): 8640 start = self._curr 8641 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8642 if not parser: 8643 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8644 # keywords, making it tricky to disambiguate them without lookahead. The approach 8645 # here is to try and parse a set operation and if that fails, then try to parse a 8646 # join operator. If that fails as well, then the operator is not supported. 8647 parsed_query = self._parse_pipe_syntax_set_operator(query) 8648 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8649 if not parsed_query: 8650 self._retreat(start) 8651 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8652 break 8653 query = parsed_query 8654 else: 8655 query = parser(self, query) 8656 8657 return query 8658 8659 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8660 vars = self._parse_csv(self._parse_id_var) 8661 if not vars: 8662 return None 8663 8664 return self.expression( 8665 exp.DeclareItem, 8666 this=vars, 8667 kind=self._parse_types(), 8668 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8669 ) 8670 8671 def _parse_declare(self) -> exp.Declare | exp.Command: 8672 start = self._prev 8673 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8674 8675 if not expressions or self._curr: 8676 return self._parse_as_command(start) 8677 8678 return self.expression(exp.Declare, expressions=expressions) 8679 8680 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8681 exp_class = exp.Cast if strict else exp.TryCast 8682 8683 if exp_class == exp.TryCast: 8684 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8685 8686 return self.expression(exp_class, **kwargs)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.APPLY, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.WINDOW, 647 TokenType.XOR, 648 *TYPE_TOKENS, 649 *SUBQUERY_PREDICATES, 650 } 651 652 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 653 TokenType.AND: exp.And, 654 } 655 656 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.COLON_EQ: exp.PropertyEQ, 658 } 659 660 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.OR: exp.Or, 662 } 663 664 EQUALITY = { 665 TokenType.EQ: exp.EQ, 666 TokenType.NEQ: exp.NEQ, 667 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 668 } 669 670 COMPARISON = { 671 TokenType.GT: exp.GT, 672 TokenType.GTE: exp.GTE, 673 TokenType.LT: exp.LT, 674 TokenType.LTE: exp.LTE, 675 } 676 677 BITWISE = { 678 TokenType.AMP: exp.BitwiseAnd, 679 TokenType.CARET: exp.BitwiseXor, 680 TokenType.PIPE: exp.BitwiseOr, 681 } 682 683 TERM = { 684 TokenType.DASH: exp.Sub, 685 TokenType.PLUS: exp.Add, 686 TokenType.MOD: exp.Mod, 687 TokenType.COLLATE: exp.Collate, 688 } 689 690 FACTOR = { 691 TokenType.DIV: exp.IntDiv, 692 TokenType.LR_ARROW: exp.Distance, 693 TokenType.SLASH: exp.Div, 694 TokenType.STAR: exp.Mul, 695 } 696 697 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 698 699 TIMES = { 700 TokenType.TIME, 701 TokenType.TIMETZ, 702 } 703 704 TIMESTAMPS = { 705 TokenType.TIMESTAMP, 706 TokenType.TIMESTAMPNTZ, 707 TokenType.TIMESTAMPTZ, 708 TokenType.TIMESTAMPLTZ, 709 *TIMES, 710 } 711 712 SET_OPERATIONS = { 713 TokenType.UNION, 714 TokenType.INTERSECT, 715 TokenType.EXCEPT, 716 } 717 718 JOIN_METHODS = { 719 TokenType.ASOF, 720 TokenType.NATURAL, 721 TokenType.POSITIONAL, 722 } 723 724 JOIN_SIDES = { 725 TokenType.LEFT, 726 TokenType.RIGHT, 727 TokenType.FULL, 728 } 729 730 JOIN_KINDS = { 731 TokenType.ANTI, 732 TokenType.CROSS, 733 TokenType.INNER, 734 TokenType.OUTER, 735 TokenType.SEMI, 736 TokenType.STRAIGHT_JOIN, 737 } 738 739 JOIN_HINTS: t.Set[str] = set() 740 741 LAMBDAS = { 742 TokenType.ARROW: lambda self, expressions: self.expression( 743 exp.Lambda, 744 this=self._replace_lambda( 745 self._parse_assignment(), 746 expressions, 747 ), 748 expressions=expressions, 749 ), 750 TokenType.FARROW: lambda self, expressions: self.expression( 751 exp.Kwarg, 752 this=exp.var(expressions[0].name), 753 expression=self._parse_assignment(), 754 ), 755 } 756 757 COLUMN_OPERATORS = { 758 TokenType.DOT: None, 759 TokenType.DOTCOLON: lambda self, this, to: self.expression( 760 exp.JSONCast, 761 this=this, 762 to=to, 763 ), 764 TokenType.DCOLON: lambda self, this, to: self.build_cast( 765 strict=self.STRICT_CAST, this=this, to=to 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 CAST_COLUMN_OPERATORS = { 797 TokenType.DOTCOLON, 798 TokenType.DCOLON, 799 } 800 801 EXPRESSION_PARSERS = { 802 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 803 exp.Column: lambda self: self._parse_column(), 804 exp.Condition: lambda self: self._parse_assignment(), 805 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 806 exp.Expression: lambda self: self._parse_expression(), 807 exp.From: lambda self: self._parse_from(joins=True), 808 exp.Group: lambda self: self._parse_group(), 809 exp.Having: lambda self: self._parse_having(), 810 exp.Hint: lambda self: self._parse_hint_body(), 811 exp.Identifier: lambda self: self._parse_id_var(), 812 exp.Join: lambda self: self._parse_join(), 813 exp.Lambda: lambda self: self._parse_lambda(), 814 exp.Lateral: lambda self: self._parse_lateral(), 815 exp.Limit: lambda self: self._parse_limit(), 816 exp.Offset: lambda self: self._parse_offset(), 817 exp.Order: lambda self: self._parse_order(), 818 exp.Ordered: lambda self: self._parse_ordered(), 819 exp.Properties: lambda self: self._parse_properties(), 820 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 821 exp.Qualify: lambda self: self._parse_qualify(), 822 exp.Returning: lambda self: self._parse_returning(), 823 exp.Select: lambda self: self._parse_select(), 824 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 825 exp.Table: lambda self: self._parse_table_parts(), 826 exp.TableAlias: lambda self: self._parse_table_alias(), 827 exp.Tuple: lambda self: self._parse_value(values=False), 828 exp.Whens: lambda self: self._parse_when_matched(), 829 exp.Where: lambda self: self._parse_where(), 830 exp.Window: lambda self: self._parse_named_window(), 831 exp.With: lambda self: self._parse_with(), 832 "JOIN_TYPE": lambda self: self._parse_join_parts(), 833 } 834 835 STATEMENT_PARSERS = { 836 TokenType.ALTER: lambda self: self._parse_alter(), 837 TokenType.ANALYZE: lambda self: self._parse_analyze(), 838 TokenType.BEGIN: lambda self: self._parse_transaction(), 839 TokenType.CACHE: lambda self: self._parse_cache(), 840 TokenType.COMMENT: lambda self: self._parse_comment(), 841 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 842 TokenType.COPY: lambda self: self._parse_copy(), 843 TokenType.CREATE: lambda self: self._parse_create(), 844 TokenType.DELETE: lambda self: self._parse_delete(), 845 TokenType.DESC: lambda self: self._parse_describe(), 846 TokenType.DESCRIBE: lambda self: self._parse_describe(), 847 TokenType.DROP: lambda self: self._parse_drop(), 848 TokenType.GRANT: lambda self: self._parse_grant(), 849 TokenType.INSERT: lambda self: self._parse_insert(), 850 TokenType.KILL: lambda self: self._parse_kill(), 851 TokenType.LOAD: lambda self: self._parse_load(), 852 TokenType.MERGE: lambda self: self._parse_merge(), 853 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 854 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 855 TokenType.REFRESH: lambda self: self._parse_refresh(), 856 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 857 TokenType.SET: lambda self: self._parse_set(), 858 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 859 TokenType.UNCACHE: lambda self: self._parse_uncache(), 860 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 861 TokenType.UPDATE: lambda self: self._parse_update(), 862 TokenType.USE: lambda self: self._parse_use(), 863 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 864 } 865 866 UNARY_PARSERS = { 867 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 868 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 869 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 870 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 871 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 872 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 873 } 874 875 STRING_PARSERS = { 876 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 877 exp.RawString, this=token.text 878 ), 879 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 880 exp.National, this=token.text 881 ), 882 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 883 TokenType.STRING: lambda self, token: self.expression( 884 exp.Literal, this=token.text, is_string=True 885 ), 886 TokenType.UNICODE_STRING: lambda self, token: self.expression( 887 exp.UnicodeString, 888 this=token.text, 889 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 890 ), 891 } 892 893 NUMERIC_PARSERS = { 894 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 895 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 896 TokenType.HEX_STRING: lambda self, token: self.expression( 897 exp.HexString, 898 this=token.text, 899 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 900 ), 901 TokenType.NUMBER: lambda self, token: self.expression( 902 exp.Literal, this=token.text, is_string=False 903 ), 904 } 905 906 PRIMARY_PARSERS = { 907 **STRING_PARSERS, 908 **NUMERIC_PARSERS, 909 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 910 TokenType.NULL: lambda self, _: self.expression(exp.Null), 911 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 912 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 913 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 914 TokenType.STAR: lambda self, _: self._parse_star_ops(), 915 } 916 917 PLACEHOLDER_PARSERS = { 918 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 919 TokenType.PARAMETER: lambda self: self._parse_parameter(), 920 TokenType.COLON: lambda self: ( 921 self.expression(exp.Placeholder, this=self._prev.text) 922 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 923 else None 924 ), 925 } 926 927 RANGE_PARSERS = { 928 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 929 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 930 TokenType.GLOB: binary_range_parser(exp.Glob), 931 TokenType.ILIKE: binary_range_parser(exp.ILike), 932 TokenType.IN: lambda self, this: self._parse_in(this), 933 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 934 TokenType.IS: lambda self, this: self._parse_is(this), 935 TokenType.LIKE: binary_range_parser(exp.Like), 936 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 937 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 938 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 939 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 940 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 941 } 942 943 PIPE_SYNTAX_TRANSFORM_PARSERS = { 944 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 945 "AS": lambda self, query: self._build_pipe_cte( 946 query, [exp.Star()], self._parse_table_alias() 947 ), 948 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 949 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 950 "ORDER BY": lambda self, query: query.order_by( 951 self._parse_order(), append=False, copy=False 952 ), 953 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 954 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 955 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 956 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 957 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 958 } 959 960 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 961 "ALLOWED_VALUES": lambda self: self.expression( 962 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 963 ), 964 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 965 "AUTO": lambda self: self._parse_auto_property(), 966 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 967 "BACKUP": lambda self: self.expression( 968 exp.BackupProperty, this=self._parse_var(any_token=True) 969 ), 970 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 971 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 973 "CHECKSUM": lambda self: self._parse_checksum(), 974 "CLUSTER BY": lambda self: self._parse_cluster(), 975 "CLUSTERED": lambda self: self._parse_clustered_by(), 976 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 977 exp.CollateProperty, **kwargs 978 ), 979 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 980 "CONTAINS": lambda self: self._parse_contains_property(), 981 "COPY": lambda self: self._parse_copy_property(), 982 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 983 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 984 "DEFINER": lambda self: self._parse_definer(), 985 "DETERMINISTIC": lambda self: self.expression( 986 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 987 ), 988 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 989 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 990 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 991 "DISTKEY": lambda self: self._parse_distkey(), 992 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 993 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 994 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 995 "ENVIRONMENT": lambda self: self.expression( 996 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 997 ), 998 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 999 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1000 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1001 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1002 "FREESPACE": lambda self: self._parse_freespace(), 1003 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1004 "HEAP": lambda self: self.expression(exp.HeapProperty), 1005 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1006 "IMMUTABLE": lambda self: self.expression( 1007 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1008 ), 1009 "INHERITS": lambda self: self.expression( 1010 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1011 ), 1012 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1013 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1014 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1015 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1016 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1017 "LIKE": lambda self: self._parse_create_like(), 1018 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1019 "LOCK": lambda self: self._parse_locking(), 1020 "LOCKING": lambda self: self._parse_locking(), 1021 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1022 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1023 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1024 "MODIFIES": lambda self: self._parse_modifies_property(), 1025 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1026 "NO": lambda self: self._parse_no_property(), 1027 "ON": lambda self: self._parse_on_property(), 1028 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1029 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1030 "PARTITION": lambda self: self._parse_partitioned_of(), 1031 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1033 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1034 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1035 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1036 "READS": lambda self: self._parse_reads_property(), 1037 "REMOTE": lambda self: self._parse_remote_with_connection(), 1038 "RETURNS": lambda self: self._parse_returns(), 1039 "STRICT": lambda self: self.expression(exp.StrictProperty), 1040 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1041 "ROW": lambda self: self._parse_row(), 1042 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1043 "SAMPLE": lambda self: self.expression( 1044 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1045 ), 1046 "SECURE": lambda self: self.expression(exp.SecureProperty), 1047 "SECURITY": lambda self: self._parse_security(), 1048 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1049 "SETTINGS": lambda self: self._parse_settings_property(), 1050 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1051 "SORTKEY": lambda self: self._parse_sortkey(), 1052 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1053 "STABLE": lambda self: self.expression( 1054 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1055 ), 1056 "STORED": lambda self: self._parse_stored(), 1057 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1058 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1059 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1060 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1061 "TO": lambda self: self._parse_to_table(), 1062 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1063 "TRANSFORM": lambda self: self.expression( 1064 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1065 ), 1066 "TTL": lambda self: self._parse_ttl(), 1067 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1068 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1069 "VOLATILE": lambda self: self._parse_volatile_property(), 1070 "WITH": lambda self: self._parse_with_property(), 1071 } 1072 1073 CONSTRAINT_PARSERS = { 1074 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1075 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1076 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1077 "CHARACTER SET": lambda self: self.expression( 1078 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "CHECK": lambda self: self.expression( 1081 exp.CheckColumnConstraint, 1082 this=self._parse_wrapped(self._parse_assignment), 1083 enforced=self._match_text_seq("ENFORCED"), 1084 ), 1085 "COLLATE": lambda self: self.expression( 1086 exp.CollateColumnConstraint, 1087 this=self._parse_identifier() or self._parse_column(), 1088 ), 1089 "COMMENT": lambda self: self.expression( 1090 exp.CommentColumnConstraint, this=self._parse_string() 1091 ), 1092 "COMPRESS": lambda self: self._parse_compress(), 1093 "CLUSTERED": lambda self: self.expression( 1094 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1095 ), 1096 "NONCLUSTERED": lambda self: self.expression( 1097 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1098 ), 1099 "DEFAULT": lambda self: self.expression( 1100 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1101 ), 1102 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1103 "EPHEMERAL": lambda self: self.expression( 1104 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1105 ), 1106 "EXCLUDE": lambda self: self.expression( 1107 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1108 ), 1109 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1110 "FORMAT": lambda self: self.expression( 1111 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1112 ), 1113 "GENERATED": lambda self: self._parse_generated_as_identity(), 1114 "IDENTITY": lambda self: self._parse_auto_increment(), 1115 "INLINE": lambda self: self._parse_inline(), 1116 "LIKE": lambda self: self._parse_create_like(), 1117 "NOT": lambda self: self._parse_not_constraint(), 1118 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1119 "ON": lambda self: ( 1120 self._match(TokenType.UPDATE) 1121 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1122 ) 1123 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1124 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1125 "PERIOD": lambda self: self._parse_period_for_system_time(), 1126 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1127 "REFERENCES": lambda self: self._parse_references(match=False), 1128 "TITLE": lambda self: self.expression( 1129 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1130 ), 1131 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1132 "UNIQUE": lambda self: self._parse_unique(), 1133 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1134 "WATERMARK": lambda self: self.expression( 1135 exp.WatermarkColumnConstraint, 1136 this=self._match(TokenType.FOR) and self._parse_column(), 1137 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1138 ), 1139 "WITH": lambda self: self.expression( 1140 exp.Properties, expressions=self._parse_wrapped_properties() 1141 ), 1142 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1144 } 1145 1146 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1147 if not self._match(TokenType.L_PAREN, advance=False): 1148 # Partitioning by bucket or truncate follows the syntax: 1149 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1150 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1151 self._retreat(self._index - 1) 1152 return None 1153 1154 klass = ( 1155 exp.PartitionedByBucket 1156 if self._prev.text.upper() == "BUCKET" 1157 else exp.PartitionByTruncate 1158 ) 1159 1160 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1161 this, expression = seq_get(args, 0), seq_get(args, 1) 1162 1163 if isinstance(this, exp.Literal): 1164 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1165 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1166 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1167 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1168 # 1169 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1170 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1171 this, expression = expression, this 1172 1173 return self.expression(klass, this=this, expression=expression) 1174 1175 ALTER_PARSERS = { 1176 "ADD": lambda self: self._parse_alter_table_add(), 1177 "AS": lambda self: self._parse_select(), 1178 "ALTER": lambda self: self._parse_alter_table_alter(), 1179 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1180 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1181 "DROP": lambda self: self._parse_alter_table_drop(), 1182 "RENAME": lambda self: self._parse_alter_table_rename(), 1183 "SET": lambda self: self._parse_alter_table_set(), 1184 "SWAP": lambda self: self.expression( 1185 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1186 ), 1187 } 1188 1189 ALTER_ALTER_PARSERS = { 1190 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1191 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1192 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1193 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1194 } 1195 1196 SCHEMA_UNNAMED_CONSTRAINTS = { 1197 "CHECK", 1198 "EXCLUDE", 1199 "FOREIGN KEY", 1200 "LIKE", 1201 "PERIOD", 1202 "PRIMARY KEY", 1203 "UNIQUE", 1204 "WATERMARK", 1205 "BUCKET", 1206 "TRUNCATE", 1207 } 1208 1209 NO_PAREN_FUNCTION_PARSERS = { 1210 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1211 "CASE": lambda self: self._parse_case(), 1212 "CONNECT_BY_ROOT": lambda self: self.expression( 1213 exp.ConnectByRoot, this=self._parse_column() 1214 ), 1215 "IF": lambda self: self._parse_if(), 1216 } 1217 1218 INVALID_FUNC_NAME_TOKENS = { 1219 TokenType.IDENTIFIER, 1220 TokenType.STRING, 1221 } 1222 1223 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1224 1225 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1226 1227 FUNCTION_PARSERS = { 1228 **{ 1229 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1230 }, 1231 **{ 1232 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1233 }, 1234 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1235 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1236 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1237 "DECODE": lambda self: self._parse_decode(), 1238 "EXTRACT": lambda self: self._parse_extract(), 1239 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1240 "GAP_FILL": lambda self: self._parse_gap_fill(), 1241 "JSON_OBJECT": lambda self: self._parse_json_object(), 1242 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1243 "JSON_TABLE": lambda self: self._parse_json_table(), 1244 "MATCH": lambda self: self._parse_match_against(), 1245 "NORMALIZE": lambda self: self._parse_normalize(), 1246 "OPENJSON": lambda self: self._parse_open_json(), 1247 "OVERLAY": lambda self: self._parse_overlay(), 1248 "POSITION": lambda self: self._parse_position(), 1249 "PREDICT": lambda self: self._parse_predict(), 1250 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1251 "STRING_AGG": lambda self: self._parse_string_agg(), 1252 "SUBSTRING": lambda self: self._parse_substring(), 1253 "TRIM": lambda self: self._parse_trim(), 1254 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1255 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1256 "XMLELEMENT": lambda self: self.expression( 1257 exp.XMLElement, 1258 this=self._match_text_seq("NAME") and self._parse_id_var(), 1259 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1260 ), 1261 "XMLTABLE": lambda self: self._parse_xml_table(), 1262 } 1263 1264 QUERY_MODIFIER_PARSERS = { 1265 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1266 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1267 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1268 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1269 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1270 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1271 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1272 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1273 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1274 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1275 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1276 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1277 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1278 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1279 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1280 TokenType.CLUSTER_BY: lambda self: ( 1281 "cluster", 1282 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1283 ), 1284 TokenType.DISTRIBUTE_BY: lambda self: ( 1285 "distribute", 1286 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1287 ), 1288 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1289 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1290 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1291 } 1292 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1293 1294 SET_PARSERS = { 1295 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1296 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1297 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1298 "TRANSACTION": lambda self: self._parse_set_transaction(), 1299 } 1300 1301 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1302 1303 TYPE_LITERAL_PARSERS = { 1304 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1305 } 1306 1307 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1308 1309 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1310 1311 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1312 1313 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1314 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1315 "ISOLATION": ( 1316 ("LEVEL", "REPEATABLE", "READ"), 1317 ("LEVEL", "READ", "COMMITTED"), 1318 ("LEVEL", "READ", "UNCOMITTED"), 1319 ("LEVEL", "SERIALIZABLE"), 1320 ), 1321 "READ": ("WRITE", "ONLY"), 1322 } 1323 1324 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1325 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1326 ) 1327 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1328 1329 CREATE_SEQUENCE: OPTIONS_TYPE = { 1330 "SCALE": ("EXTEND", "NOEXTEND"), 1331 "SHARD": ("EXTEND", "NOEXTEND"), 1332 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1333 **dict.fromkeys( 1334 ( 1335 "SESSION", 1336 "GLOBAL", 1337 "KEEP", 1338 "NOKEEP", 1339 "ORDER", 1340 "NOORDER", 1341 "NOCACHE", 1342 "CYCLE", 1343 "NOCYCLE", 1344 "NOMINVALUE", 1345 "NOMAXVALUE", 1346 "NOSCALE", 1347 "NOSHARD", 1348 ), 1349 tuple(), 1350 ), 1351 } 1352 1353 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1354 1355 USABLES: OPTIONS_TYPE = dict.fromkeys( 1356 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1357 ) 1358 1359 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1360 1361 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1362 "TYPE": ("EVOLUTION",), 1363 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1364 } 1365 1366 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1367 1368 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1369 1370 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1371 "NOT": ("ENFORCED",), 1372 "MATCH": ( 1373 "FULL", 1374 "PARTIAL", 1375 "SIMPLE", 1376 ), 1377 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1378 "USING": ( 1379 "BTREE", 1380 "HASH", 1381 ), 1382 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1383 } 1384 1385 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1386 "NO": ("OTHERS",), 1387 "CURRENT": ("ROW",), 1388 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1389 } 1390 1391 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1392 1393 CLONE_KEYWORDS = {"CLONE", "COPY"} 1394 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1395 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1396 1397 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1398 1399 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1400 1401 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1402 1403 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1404 1405 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1406 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1407 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1408 1409 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1410 1411 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1412 1413 ADD_CONSTRAINT_TOKENS = { 1414 TokenType.CONSTRAINT, 1415 TokenType.FOREIGN_KEY, 1416 TokenType.INDEX, 1417 TokenType.KEY, 1418 TokenType.PRIMARY_KEY, 1419 TokenType.UNIQUE, 1420 } 1421 1422 DISTINCT_TOKENS = {TokenType.DISTINCT} 1423 1424 NULL_TOKENS = {TokenType.NULL} 1425 1426 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1427 1428 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1429 1430 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1431 1432 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1433 1434 ODBC_DATETIME_LITERALS = { 1435 "d": exp.Date, 1436 "t": exp.Time, 1437 "ts": exp.Timestamp, 1438 } 1439 1440 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1441 1442 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1443 1444 # The style options for the DESCRIBE statement 1445 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1446 1447 # The style options for the ANALYZE statement 1448 ANALYZE_STYLES = { 1449 "BUFFER_USAGE_LIMIT", 1450 "FULL", 1451 "LOCAL", 1452 "NO_WRITE_TO_BINLOG", 1453 "SAMPLE", 1454 "SKIP_LOCKED", 1455 "VERBOSE", 1456 } 1457 1458 ANALYZE_EXPRESSION_PARSERS = { 1459 "ALL": lambda self: self._parse_analyze_columns(), 1460 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1461 "DELETE": lambda self: self._parse_analyze_delete(), 1462 "DROP": lambda self: self._parse_analyze_histogram(), 1463 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1464 "LIST": lambda self: self._parse_analyze_list(), 1465 "PREDICATE": lambda self: self._parse_analyze_columns(), 1466 "UPDATE": lambda self: self._parse_analyze_histogram(), 1467 "VALIDATE": lambda self: self._parse_analyze_validate(), 1468 } 1469 1470 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1471 1472 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1473 1474 OPERATION_MODIFIERS: t.Set[str] = set() 1475 1476 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1477 1478 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1479 1480 STRICT_CAST = True 1481 1482 PREFIXED_PIVOT_COLUMNS = False 1483 IDENTIFY_PIVOT_STRINGS = False 1484 1485 LOG_DEFAULTS_TO_LN = False 1486 1487 # Whether the table sample clause expects CSV syntax 1488 TABLESAMPLE_CSV = False 1489 1490 # The default method used for table sampling 1491 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1492 1493 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1494 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1495 1496 # Whether the TRIM function expects the characters to trim as its first argument 1497 TRIM_PATTERN_FIRST = False 1498 1499 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1500 STRING_ALIASES = False 1501 1502 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1503 MODIFIERS_ATTACHED_TO_SET_OP = True 1504 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1505 1506 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1507 NO_PAREN_IF_COMMANDS = True 1508 1509 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1510 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1511 1512 # Whether the `:` operator is used to extract a value from a VARIANT column 1513 COLON_IS_VARIANT_EXTRACT = False 1514 1515 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1516 # If this is True and '(' is not found, the keyword will be treated as an identifier 1517 VALUES_FOLLOWED_BY_PAREN = True 1518 1519 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1520 SUPPORTS_IMPLICIT_UNNEST = False 1521 1522 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1523 INTERVAL_SPANS = True 1524 1525 # Whether a PARTITION clause can follow a table reference 1526 SUPPORTS_PARTITION_SELECTION = False 1527 1528 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1529 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1530 1531 # Whether the 'AS' keyword is optional in the CTE definition syntax 1532 OPTIONAL_ALIAS_TOKEN_CTE = True 1533 1534 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1535 ALTER_RENAME_REQUIRES_COLUMN = True 1536 1537 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1538 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1539 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1540 # as BigQuery, where all joins have the same precedence. 1541 JOINS_HAVE_EQUAL_PRECEDENCE = False 1542 1543 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1544 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1545 1546 # Whether map literals support arbitrary expressions as keys. 1547 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1548 # When False, keys are typically restricted to identifiers. 1549 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1550 1551 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1552 # is true for Snowflake but not for BigQuery which can also process strings 1553 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1554 1555 __slots__ = ( 1556 "error_level", 1557 "error_message_context", 1558 "max_errors", 1559 "dialect", 1560 "sql", 1561 "errors", 1562 "_tokens", 1563 "_index", 1564 "_curr", 1565 "_next", 1566 "_prev", 1567 "_prev_comments", 1568 "_pipe_cte_counter", 1569 ) 1570 1571 # Autofilled 1572 SHOW_TRIE: t.Dict = {} 1573 SET_TRIE: t.Dict = {} 1574 1575 def __init__( 1576 self, 1577 error_level: t.Optional[ErrorLevel] = None, 1578 error_message_context: int = 100, 1579 max_errors: int = 3, 1580 dialect: DialectType = None, 1581 ): 1582 from sqlglot.dialects import Dialect 1583 1584 self.error_level = error_level or ErrorLevel.IMMEDIATE 1585 self.error_message_context = error_message_context 1586 self.max_errors = max_errors 1587 self.dialect = Dialect.get_or_raise(dialect) 1588 self.reset() 1589 1590 def reset(self): 1591 self.sql = "" 1592 self.errors = [] 1593 self._tokens = [] 1594 self._index = 0 1595 self._curr = None 1596 self._next = None 1597 self._prev = None 1598 self._prev_comments = None 1599 self._pipe_cte_counter = 0 1600 1601 def parse( 1602 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1603 ) -> t.List[t.Optional[exp.Expression]]: 1604 """ 1605 Parses a list of tokens and returns a list of syntax trees, one tree 1606 per parsed SQL statement. 1607 1608 Args: 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The list of the produced syntax trees. 1614 """ 1615 return self._parse( 1616 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1617 ) 1618 1619 def parse_into( 1620 self, 1621 expression_types: exp.IntoType, 1622 raw_tokens: t.List[Token], 1623 sql: t.Optional[str] = None, 1624 ) -> t.List[t.Optional[exp.Expression]]: 1625 """ 1626 Parses a list of tokens into a given Expression type. If a collection of Expression 1627 types is given instead, this method will try to parse the token list into each one 1628 of them, stopping at the first for which the parsing succeeds. 1629 1630 Args: 1631 expression_types: The expression type(s) to try and parse the token list into. 1632 raw_tokens: The list of tokens. 1633 sql: The original SQL string, used to produce helpful debug messages. 1634 1635 Returns: 1636 The target Expression. 1637 """ 1638 errors = [] 1639 for expression_type in ensure_list(expression_types): 1640 parser = self.EXPRESSION_PARSERS.get(expression_type) 1641 if not parser: 1642 raise TypeError(f"No parser registered for {expression_type}") 1643 1644 try: 1645 return self._parse(parser, raw_tokens, sql) 1646 except ParseError as e: 1647 e.errors[0]["into_expression"] = expression_type 1648 errors.append(e) 1649 1650 raise ParseError( 1651 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1652 errors=merge_errors(errors), 1653 ) from errors[-1] 1654 1655 def _parse( 1656 self, 1657 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1658 raw_tokens: t.List[Token], 1659 sql: t.Optional[str] = None, 1660 ) -> t.List[t.Optional[exp.Expression]]: 1661 self.reset() 1662 self.sql = sql or "" 1663 1664 total = len(raw_tokens) 1665 chunks: t.List[t.List[Token]] = [[]] 1666 1667 for i, token in enumerate(raw_tokens): 1668 if token.token_type == TokenType.SEMICOLON: 1669 if token.comments: 1670 chunks.append([token]) 1671 1672 if i < total - 1: 1673 chunks.append([]) 1674 else: 1675 chunks[-1].append(token) 1676 1677 expressions = [] 1678 1679 for tokens in chunks: 1680 self._index = -1 1681 self._tokens = tokens 1682 self._advance() 1683 1684 expressions.append(parse_method(self)) 1685 1686 if self._index < len(self._tokens): 1687 self.raise_error("Invalid expression / Unexpected token") 1688 1689 self.check_errors() 1690 1691 return expressions 1692 1693 def check_errors(self) -> None: 1694 """Logs or raises any found errors, depending on the chosen error level setting.""" 1695 if self.error_level == ErrorLevel.WARN: 1696 for error in self.errors: 1697 logger.error(str(error)) 1698 elif self.error_level == ErrorLevel.RAISE and self.errors: 1699 raise ParseError( 1700 concat_messages(self.errors, self.max_errors), 1701 errors=merge_errors(self.errors), 1702 ) 1703 1704 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1705 """ 1706 Appends an error in the list of recorded errors or raises it, depending on the chosen 1707 error level setting. 1708 """ 1709 token = token or self._curr or self._prev or Token.string("") 1710 start = token.start 1711 end = token.end + 1 1712 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1713 highlight = self.sql[start:end] 1714 end_context = self.sql[end : end + self.error_message_context] 1715 1716 error = ParseError.new( 1717 f"{message}. Line {token.line}, Col: {token.col}.\n" 1718 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1719 description=message, 1720 line=token.line, 1721 col=token.col, 1722 start_context=start_context, 1723 highlight=highlight, 1724 end_context=end_context, 1725 ) 1726 1727 if self.error_level == ErrorLevel.IMMEDIATE: 1728 raise error 1729 1730 self.errors.append(error) 1731 1732 def expression( 1733 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1734 ) -> E: 1735 """ 1736 Creates a new, validated Expression. 1737 1738 Args: 1739 exp_class: The expression class to instantiate. 1740 comments: An optional list of comments to attach to the expression. 1741 kwargs: The arguments to set for the expression along with their respective values. 1742 1743 Returns: 1744 The target expression. 1745 """ 1746 instance = exp_class(**kwargs) 1747 instance.add_comments(comments) if comments else self._add_comments(instance) 1748 return self.validate_expression(instance) 1749 1750 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1751 if expression and self._prev_comments: 1752 expression.add_comments(self._prev_comments) 1753 self._prev_comments = None 1754 1755 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1756 """ 1757 Validates an Expression, making sure that all its mandatory arguments are set. 1758 1759 Args: 1760 expression: The expression to validate. 1761 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1762 1763 Returns: 1764 The validated expression. 1765 """ 1766 if self.error_level != ErrorLevel.IGNORE: 1767 for error_message in expression.error_messages(args): 1768 self.raise_error(error_message) 1769 1770 return expression 1771 1772 def _find_sql(self, start: Token, end: Token) -> str: 1773 return self.sql[start.start : end.end + 1] 1774 1775 def _is_connected(self) -> bool: 1776 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1777 1778 def _advance(self, times: int = 1) -> None: 1779 self._index += times 1780 self._curr = seq_get(self._tokens, self._index) 1781 self._next = seq_get(self._tokens, self._index + 1) 1782 1783 if self._index > 0: 1784 self._prev = self._tokens[self._index - 1] 1785 self._prev_comments = self._prev.comments 1786 else: 1787 self._prev = None 1788 self._prev_comments = None 1789 1790 def _retreat(self, index: int) -> None: 1791 if index != self._index: 1792 self._advance(index - self._index) 1793 1794 def _warn_unsupported(self) -> None: 1795 if len(self._tokens) <= 1: 1796 return 1797 1798 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1799 # interested in emitting a warning for the one being currently processed. 1800 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1801 1802 logger.warning( 1803 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1804 ) 1805 1806 def _parse_command(self) -> exp.Command: 1807 self._warn_unsupported() 1808 return self.expression( 1809 exp.Command, 1810 comments=self._prev_comments, 1811 this=self._prev.text.upper(), 1812 expression=self._parse_string(), 1813 ) 1814 1815 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1816 """ 1817 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1818 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1819 solve this by setting & resetting the parser state accordingly 1820 """ 1821 index = self._index 1822 error_level = self.error_level 1823 1824 self.error_level = ErrorLevel.IMMEDIATE 1825 try: 1826 this = parse_method() 1827 except ParseError: 1828 this = None 1829 finally: 1830 if not this or retreat: 1831 self._retreat(index) 1832 self.error_level = error_level 1833 1834 return this 1835 1836 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1837 start = self._prev 1838 exists = self._parse_exists() if allow_exists else None 1839 1840 self._match(TokenType.ON) 1841 1842 materialized = self._match_text_seq("MATERIALIZED") 1843 kind = self._match_set(self.CREATABLES) and self._prev 1844 if not kind: 1845 return self._parse_as_command(start) 1846 1847 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1848 this = self._parse_user_defined_function(kind=kind.token_type) 1849 elif kind.token_type == TokenType.TABLE: 1850 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1851 elif kind.token_type == TokenType.COLUMN: 1852 this = self._parse_column() 1853 else: 1854 this = self._parse_id_var() 1855 1856 self._match(TokenType.IS) 1857 1858 return self.expression( 1859 exp.Comment, 1860 this=this, 1861 kind=kind.text, 1862 expression=self._parse_string(), 1863 exists=exists, 1864 materialized=materialized, 1865 ) 1866 1867 def _parse_to_table( 1868 self, 1869 ) -> exp.ToTableProperty: 1870 table = self._parse_table_parts(schema=True) 1871 return self.expression(exp.ToTableProperty, this=table) 1872 1873 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1874 def _parse_ttl(self) -> exp.Expression: 1875 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1876 this = self._parse_bitwise() 1877 1878 if self._match_text_seq("DELETE"): 1879 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1880 if self._match_text_seq("RECOMPRESS"): 1881 return self.expression( 1882 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1883 ) 1884 if self._match_text_seq("TO", "DISK"): 1885 return self.expression( 1886 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1887 ) 1888 if self._match_text_seq("TO", "VOLUME"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1891 ) 1892 1893 return this 1894 1895 expressions = self._parse_csv(_parse_ttl_action) 1896 where = self._parse_where() 1897 group = self._parse_group() 1898 1899 aggregates = None 1900 if group and self._match(TokenType.SET): 1901 aggregates = self._parse_csv(self._parse_set_item) 1902 1903 return self.expression( 1904 exp.MergeTreeTTL, 1905 expressions=expressions, 1906 where=where, 1907 group=group, 1908 aggregates=aggregates, 1909 ) 1910 1911 def _parse_statement(self) -> t.Optional[exp.Expression]: 1912 if self._curr is None: 1913 return None 1914 1915 if self._match_set(self.STATEMENT_PARSERS): 1916 comments = self._prev_comments 1917 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1918 stmt.add_comments(comments, prepend=True) 1919 return stmt 1920 1921 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1922 return self._parse_command() 1923 1924 expression = self._parse_expression() 1925 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1926 return self._parse_query_modifiers(expression) 1927 1928 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1929 start = self._prev 1930 temporary = self._match(TokenType.TEMPORARY) 1931 materialized = self._match_text_seq("MATERIALIZED") 1932 1933 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1934 if not kind: 1935 return self._parse_as_command(start) 1936 1937 concurrently = self._match_text_seq("CONCURRENTLY") 1938 if_exists = exists or self._parse_exists() 1939 1940 if kind == "COLUMN": 1941 this = self._parse_column() 1942 else: 1943 this = self._parse_table_parts( 1944 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1945 ) 1946 1947 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1948 1949 if self._match(TokenType.L_PAREN, advance=False): 1950 expressions = self._parse_wrapped_csv(self._parse_types) 1951 else: 1952 expressions = None 1953 1954 return self.expression( 1955 exp.Drop, 1956 exists=if_exists, 1957 this=this, 1958 expressions=expressions, 1959 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1960 temporary=temporary, 1961 materialized=materialized, 1962 cascade=self._match_text_seq("CASCADE"), 1963 constraints=self._match_text_seq("CONSTRAINTS"), 1964 purge=self._match_text_seq("PURGE"), 1965 cluster=cluster, 1966 concurrently=concurrently, 1967 ) 1968 1969 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1970 return ( 1971 self._match_text_seq("IF") 1972 and (not not_ or self._match(TokenType.NOT)) 1973 and self._match(TokenType.EXISTS) 1974 ) 1975 1976 def _parse_create(self) -> exp.Create | exp.Command: 1977 # Note: this can't be None because we've matched a statement parser 1978 start = self._prev 1979 1980 replace = ( 1981 start.token_type == TokenType.REPLACE 1982 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1983 or self._match_pair(TokenType.OR, TokenType.ALTER) 1984 ) 1985 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1986 1987 unique = self._match(TokenType.UNIQUE) 1988 1989 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1990 clustered = True 1991 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1992 "COLUMNSTORE" 1993 ): 1994 clustered = False 1995 else: 1996 clustered = None 1997 1998 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1999 self._advance() 2000 2001 properties = None 2002 create_token = self._match_set(self.CREATABLES) and self._prev 2003 2004 if not create_token: 2005 # exp.Properties.Location.POST_CREATE 2006 properties = self._parse_properties() 2007 create_token = self._match_set(self.CREATABLES) and self._prev 2008 2009 if not properties or not create_token: 2010 return self._parse_as_command(start) 2011 2012 concurrently = self._match_text_seq("CONCURRENTLY") 2013 exists = self._parse_exists(not_=True) 2014 this = None 2015 expression: t.Optional[exp.Expression] = None 2016 indexes = None 2017 no_schema_binding = None 2018 begin = None 2019 end = None 2020 clone = None 2021 2022 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2023 nonlocal properties 2024 if properties and temp_props: 2025 properties.expressions.extend(temp_props.expressions) 2026 elif temp_props: 2027 properties = temp_props 2028 2029 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2030 this = self._parse_user_defined_function(kind=create_token.token_type) 2031 2032 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2033 extend_props(self._parse_properties()) 2034 2035 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2036 extend_props(self._parse_properties()) 2037 2038 if not expression: 2039 if self._match(TokenType.COMMAND): 2040 expression = self._parse_as_command(self._prev) 2041 else: 2042 begin = self._match(TokenType.BEGIN) 2043 return_ = self._match_text_seq("RETURN") 2044 2045 if self._match(TokenType.STRING, advance=False): 2046 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2047 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2048 expression = self._parse_string() 2049 extend_props(self._parse_properties()) 2050 else: 2051 expression = self._parse_user_defined_function_expression() 2052 2053 end = self._match_text_seq("END") 2054 2055 if return_: 2056 expression = self.expression(exp.Return, this=expression) 2057 elif create_token.token_type == TokenType.INDEX: 2058 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2059 if not self._match(TokenType.ON): 2060 index = self._parse_id_var() 2061 anonymous = False 2062 else: 2063 index = None 2064 anonymous = True 2065 2066 this = self._parse_index(index=index, anonymous=anonymous) 2067 elif create_token.token_type in self.DB_CREATABLES: 2068 table_parts = self._parse_table_parts( 2069 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2070 ) 2071 2072 # exp.Properties.Location.POST_NAME 2073 self._match(TokenType.COMMA) 2074 extend_props(self._parse_properties(before=True)) 2075 2076 this = self._parse_schema(this=table_parts) 2077 2078 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2079 extend_props(self._parse_properties()) 2080 2081 has_alias = self._match(TokenType.ALIAS) 2082 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2083 # exp.Properties.Location.POST_ALIAS 2084 extend_props(self._parse_properties()) 2085 2086 if create_token.token_type == TokenType.SEQUENCE: 2087 expression = self._parse_types() 2088 extend_props(self._parse_properties()) 2089 else: 2090 expression = self._parse_ddl_select() 2091 2092 # Some dialects also support using a table as an alias instead of a SELECT. 2093 # Here we fallback to this as an alternative. 2094 if not expression and has_alias: 2095 expression = self._try_parse(self._parse_table_parts) 2096 2097 if create_token.token_type == TokenType.TABLE: 2098 # exp.Properties.Location.POST_EXPRESSION 2099 extend_props(self._parse_properties()) 2100 2101 indexes = [] 2102 while True: 2103 index = self._parse_index() 2104 2105 # exp.Properties.Location.POST_INDEX 2106 extend_props(self._parse_properties()) 2107 if not index: 2108 break 2109 else: 2110 self._match(TokenType.COMMA) 2111 indexes.append(index) 2112 elif create_token.token_type == TokenType.VIEW: 2113 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2114 no_schema_binding = True 2115 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2116 extend_props(self._parse_properties()) 2117 2118 shallow = self._match_text_seq("SHALLOW") 2119 2120 if self._match_texts(self.CLONE_KEYWORDS): 2121 copy = self._prev.text.lower() == "copy" 2122 clone = self.expression( 2123 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2124 ) 2125 2126 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2127 return self._parse_as_command(start) 2128 2129 create_kind_text = create_token.text.upper() 2130 return self.expression( 2131 exp.Create, 2132 this=this, 2133 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2134 replace=replace, 2135 refresh=refresh, 2136 unique=unique, 2137 expression=expression, 2138 exists=exists, 2139 properties=properties, 2140 indexes=indexes, 2141 no_schema_binding=no_schema_binding, 2142 begin=begin, 2143 end=end, 2144 clone=clone, 2145 concurrently=concurrently, 2146 clustered=clustered, 2147 ) 2148 2149 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2150 seq = exp.SequenceProperties() 2151 2152 options = [] 2153 index = self._index 2154 2155 while self._curr: 2156 self._match(TokenType.COMMA) 2157 if self._match_text_seq("INCREMENT"): 2158 self._match_text_seq("BY") 2159 self._match_text_seq("=") 2160 seq.set("increment", self._parse_term()) 2161 elif self._match_text_seq("MINVALUE"): 2162 seq.set("minvalue", self._parse_term()) 2163 elif self._match_text_seq("MAXVALUE"): 2164 seq.set("maxvalue", self._parse_term()) 2165 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2166 self._match_text_seq("=") 2167 seq.set("start", self._parse_term()) 2168 elif self._match_text_seq("CACHE"): 2169 # T-SQL allows empty CACHE which is initialized dynamically 2170 seq.set("cache", self._parse_number() or True) 2171 elif self._match_text_seq("OWNED", "BY"): 2172 # "OWNED BY NONE" is the default 2173 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2174 else: 2175 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2176 if opt: 2177 options.append(opt) 2178 else: 2179 break 2180 2181 seq.set("options", options if options else None) 2182 return None if self._index == index else seq 2183 2184 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2185 # only used for teradata currently 2186 self._match(TokenType.COMMA) 2187 2188 kwargs = { 2189 "no": self._match_text_seq("NO"), 2190 "dual": self._match_text_seq("DUAL"), 2191 "before": self._match_text_seq("BEFORE"), 2192 "default": self._match_text_seq("DEFAULT"), 2193 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2194 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2195 "after": self._match_text_seq("AFTER"), 2196 "minimum": self._match_texts(("MIN", "MINIMUM")), 2197 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2198 } 2199 2200 if self._match_texts(self.PROPERTY_PARSERS): 2201 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2202 try: 2203 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2204 except TypeError: 2205 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2206 2207 return None 2208 2209 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2210 return self._parse_wrapped_csv(self._parse_property) 2211 2212 def _parse_property(self) -> t.Optional[exp.Expression]: 2213 if self._match_texts(self.PROPERTY_PARSERS): 2214 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2215 2216 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2217 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2218 2219 if self._match_text_seq("COMPOUND", "SORTKEY"): 2220 return self._parse_sortkey(compound=True) 2221 2222 if self._match_text_seq("SQL", "SECURITY"): 2223 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2224 2225 index = self._index 2226 key = self._parse_column() 2227 2228 if not self._match(TokenType.EQ): 2229 self._retreat(index) 2230 return self._parse_sequence_properties() 2231 2232 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2233 if isinstance(key, exp.Column): 2234 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2235 2236 value = self._parse_bitwise() or self._parse_var(any_token=True) 2237 2238 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2239 if isinstance(value, exp.Column): 2240 value = exp.var(value.name) 2241 2242 return self.expression(exp.Property, this=key, value=value) 2243 2244 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2245 if self._match_text_seq("BY"): 2246 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2247 2248 self._match(TokenType.ALIAS) 2249 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2250 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2251 2252 return self.expression( 2253 exp.FileFormatProperty, 2254 this=( 2255 self.expression( 2256 exp.InputOutputFormat, 2257 input_format=input_format, 2258 output_format=output_format, 2259 ) 2260 if input_format or output_format 2261 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2262 ), 2263 hive_format=True, 2264 ) 2265 2266 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2267 field = self._parse_field() 2268 if isinstance(field, exp.Identifier) and not field.quoted: 2269 field = exp.var(field) 2270 2271 return field 2272 2273 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2274 self._match(TokenType.EQ) 2275 self._match(TokenType.ALIAS) 2276 2277 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2278 2279 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2280 properties = [] 2281 while True: 2282 if before: 2283 prop = self._parse_property_before() 2284 else: 2285 prop = self._parse_property() 2286 if not prop: 2287 break 2288 for p in ensure_list(prop): 2289 properties.append(p) 2290 2291 if properties: 2292 return self.expression(exp.Properties, expressions=properties) 2293 2294 return None 2295 2296 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2297 return self.expression( 2298 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2299 ) 2300 2301 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2302 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2303 security_specifier = self._prev.text.upper() 2304 return self.expression(exp.SecurityProperty, this=security_specifier) 2305 return None 2306 2307 def _parse_settings_property(self) -> exp.SettingsProperty: 2308 return self.expression( 2309 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2310 ) 2311 2312 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2313 if self._index >= 2: 2314 pre_volatile_token = self._tokens[self._index - 2] 2315 else: 2316 pre_volatile_token = None 2317 2318 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2319 return exp.VolatileProperty() 2320 2321 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2322 2323 def _parse_retention_period(self) -> exp.Var: 2324 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2325 number = self._parse_number() 2326 number_str = f"{number} " if number else "" 2327 unit = self._parse_var(any_token=True) 2328 return exp.var(f"{number_str}{unit}") 2329 2330 def _parse_system_versioning_property( 2331 self, with_: bool = False 2332 ) -> exp.WithSystemVersioningProperty: 2333 self._match(TokenType.EQ) 2334 prop = self.expression( 2335 exp.WithSystemVersioningProperty, 2336 **{ # type: ignore 2337 "on": True, 2338 "with": with_, 2339 }, 2340 ) 2341 2342 if self._match_text_seq("OFF"): 2343 prop.set("on", False) 2344 return prop 2345 2346 self._match(TokenType.ON) 2347 if self._match(TokenType.L_PAREN): 2348 while self._curr and not self._match(TokenType.R_PAREN): 2349 if self._match_text_seq("HISTORY_TABLE", "="): 2350 prop.set("this", self._parse_table_parts()) 2351 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2352 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2353 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2354 prop.set("retention_period", self._parse_retention_period()) 2355 2356 self._match(TokenType.COMMA) 2357 2358 return prop 2359 2360 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2361 self._match(TokenType.EQ) 2362 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2363 prop = self.expression(exp.DataDeletionProperty, on=on) 2364 2365 if self._match(TokenType.L_PAREN): 2366 while self._curr and not self._match(TokenType.R_PAREN): 2367 if self._match_text_seq("FILTER_COLUMN", "="): 2368 prop.set("filter_column", self._parse_column()) 2369 elif self._match_text_seq("RETENTION_PERIOD", "="): 2370 prop.set("retention_period", self._parse_retention_period()) 2371 2372 self._match(TokenType.COMMA) 2373 2374 return prop 2375 2376 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2377 kind = "HASH" 2378 expressions: t.Optional[t.List[exp.Expression]] = None 2379 if self._match_text_seq("BY", "HASH"): 2380 expressions = self._parse_wrapped_csv(self._parse_id_var) 2381 elif self._match_text_seq("BY", "RANDOM"): 2382 kind = "RANDOM" 2383 2384 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2385 buckets: t.Optional[exp.Expression] = None 2386 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2387 buckets = self._parse_number() 2388 2389 return self.expression( 2390 exp.DistributedByProperty, 2391 expressions=expressions, 2392 kind=kind, 2393 buckets=buckets, 2394 order=self._parse_order(), 2395 ) 2396 2397 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2398 self._match_text_seq("KEY") 2399 expressions = self._parse_wrapped_id_vars() 2400 return self.expression(expr_type, expressions=expressions) 2401 2402 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2403 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2404 prop = self._parse_system_versioning_property(with_=True) 2405 self._match_r_paren() 2406 return prop 2407 2408 if self._match(TokenType.L_PAREN, advance=False): 2409 return self._parse_wrapped_properties() 2410 2411 if self._match_text_seq("JOURNAL"): 2412 return self._parse_withjournaltable() 2413 2414 if self._match_texts(self.VIEW_ATTRIBUTES): 2415 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2416 2417 if self._match_text_seq("DATA"): 2418 return self._parse_withdata(no=False) 2419 elif self._match_text_seq("NO", "DATA"): 2420 return self._parse_withdata(no=True) 2421 2422 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2423 return self._parse_serde_properties(with_=True) 2424 2425 if self._match(TokenType.SCHEMA): 2426 return self.expression( 2427 exp.WithSchemaBindingProperty, 2428 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2429 ) 2430 2431 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2432 return self.expression( 2433 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2434 ) 2435 2436 if not self._next: 2437 return None 2438 2439 return self._parse_withisolatedloading() 2440 2441 def _parse_procedure_option(self) -> exp.Expression | None: 2442 if self._match_text_seq("EXECUTE", "AS"): 2443 return self.expression( 2444 exp.ExecuteAsProperty, 2445 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2446 or self._parse_string(), 2447 ) 2448 2449 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2450 2451 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2452 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2453 self._match(TokenType.EQ) 2454 2455 user = self._parse_id_var() 2456 self._match(TokenType.PARAMETER) 2457 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2458 2459 if not user or not host: 2460 return None 2461 2462 return exp.DefinerProperty(this=f"{user}@{host}") 2463 2464 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2465 self._match(TokenType.TABLE) 2466 self._match(TokenType.EQ) 2467 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2468 2469 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2470 return self.expression(exp.LogProperty, no=no) 2471 2472 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2473 return self.expression(exp.JournalProperty, **kwargs) 2474 2475 def _parse_checksum(self) -> exp.ChecksumProperty: 2476 self._match(TokenType.EQ) 2477 2478 on = None 2479 if self._match(TokenType.ON): 2480 on = True 2481 elif self._match_text_seq("OFF"): 2482 on = False 2483 2484 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2485 2486 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2487 return self.expression( 2488 exp.Cluster, 2489 expressions=( 2490 self._parse_wrapped_csv(self._parse_ordered) 2491 if wrapped 2492 else self._parse_csv(self._parse_ordered) 2493 ), 2494 ) 2495 2496 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2497 self._match_text_seq("BY") 2498 2499 self._match_l_paren() 2500 expressions = self._parse_csv(self._parse_column) 2501 self._match_r_paren() 2502 2503 if self._match_text_seq("SORTED", "BY"): 2504 self._match_l_paren() 2505 sorted_by = self._parse_csv(self._parse_ordered) 2506 self._match_r_paren() 2507 else: 2508 sorted_by = None 2509 2510 self._match(TokenType.INTO) 2511 buckets = self._parse_number() 2512 self._match_text_seq("BUCKETS") 2513 2514 return self.expression( 2515 exp.ClusteredByProperty, 2516 expressions=expressions, 2517 sorted_by=sorted_by, 2518 buckets=buckets, 2519 ) 2520 2521 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2522 if not self._match_text_seq("GRANTS"): 2523 self._retreat(self._index - 1) 2524 return None 2525 2526 return self.expression(exp.CopyGrantsProperty) 2527 2528 def _parse_freespace(self) -> exp.FreespaceProperty: 2529 self._match(TokenType.EQ) 2530 return self.expression( 2531 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2532 ) 2533 2534 def _parse_mergeblockratio( 2535 self, no: bool = False, default: bool = False 2536 ) -> exp.MergeBlockRatioProperty: 2537 if self._match(TokenType.EQ): 2538 return self.expression( 2539 exp.MergeBlockRatioProperty, 2540 this=self._parse_number(), 2541 percent=self._match(TokenType.PERCENT), 2542 ) 2543 2544 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2545 2546 def _parse_datablocksize( 2547 self, 2548 default: t.Optional[bool] = None, 2549 minimum: t.Optional[bool] = None, 2550 maximum: t.Optional[bool] = None, 2551 ) -> exp.DataBlocksizeProperty: 2552 self._match(TokenType.EQ) 2553 size = self._parse_number() 2554 2555 units = None 2556 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2557 units = self._prev.text 2558 2559 return self.expression( 2560 exp.DataBlocksizeProperty, 2561 size=size, 2562 units=units, 2563 default=default, 2564 minimum=minimum, 2565 maximum=maximum, 2566 ) 2567 2568 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2569 self._match(TokenType.EQ) 2570 always = self._match_text_seq("ALWAYS") 2571 manual = self._match_text_seq("MANUAL") 2572 never = self._match_text_seq("NEVER") 2573 default = self._match_text_seq("DEFAULT") 2574 2575 autotemp = None 2576 if self._match_text_seq("AUTOTEMP"): 2577 autotemp = self._parse_schema() 2578 2579 return self.expression( 2580 exp.BlockCompressionProperty, 2581 always=always, 2582 manual=manual, 2583 never=never, 2584 default=default, 2585 autotemp=autotemp, 2586 ) 2587 2588 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2589 index = self._index 2590 no = self._match_text_seq("NO") 2591 concurrent = self._match_text_seq("CONCURRENT") 2592 2593 if not self._match_text_seq("ISOLATED", "LOADING"): 2594 self._retreat(index) 2595 return None 2596 2597 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2598 return self.expression( 2599 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2600 ) 2601 2602 def _parse_locking(self) -> exp.LockingProperty: 2603 if self._match(TokenType.TABLE): 2604 kind = "TABLE" 2605 elif self._match(TokenType.VIEW): 2606 kind = "VIEW" 2607 elif self._match(TokenType.ROW): 2608 kind = "ROW" 2609 elif self._match_text_seq("DATABASE"): 2610 kind = "DATABASE" 2611 else: 2612 kind = None 2613 2614 if kind in ("DATABASE", "TABLE", "VIEW"): 2615 this = self._parse_table_parts() 2616 else: 2617 this = None 2618 2619 if self._match(TokenType.FOR): 2620 for_or_in = "FOR" 2621 elif self._match(TokenType.IN): 2622 for_or_in = "IN" 2623 else: 2624 for_or_in = None 2625 2626 if self._match_text_seq("ACCESS"): 2627 lock_type = "ACCESS" 2628 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2629 lock_type = "EXCLUSIVE" 2630 elif self._match_text_seq("SHARE"): 2631 lock_type = "SHARE" 2632 elif self._match_text_seq("READ"): 2633 lock_type = "READ" 2634 elif self._match_text_seq("WRITE"): 2635 lock_type = "WRITE" 2636 elif self._match_text_seq("CHECKSUM"): 2637 lock_type = "CHECKSUM" 2638 else: 2639 lock_type = None 2640 2641 override = self._match_text_seq("OVERRIDE") 2642 2643 return self.expression( 2644 exp.LockingProperty, 2645 this=this, 2646 kind=kind, 2647 for_or_in=for_or_in, 2648 lock_type=lock_type, 2649 override=override, 2650 ) 2651 2652 def _parse_partition_by(self) -> t.List[exp.Expression]: 2653 if self._match(TokenType.PARTITION_BY): 2654 return self._parse_csv(self._parse_assignment) 2655 return [] 2656 2657 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2658 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2659 if self._match_text_seq("MINVALUE"): 2660 return exp.var("MINVALUE") 2661 if self._match_text_seq("MAXVALUE"): 2662 return exp.var("MAXVALUE") 2663 return self._parse_bitwise() 2664 2665 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2666 expression = None 2667 from_expressions = None 2668 to_expressions = None 2669 2670 if self._match(TokenType.IN): 2671 this = self._parse_wrapped_csv(self._parse_bitwise) 2672 elif self._match(TokenType.FROM): 2673 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2674 self._match_text_seq("TO") 2675 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2676 elif self._match_text_seq("WITH", "(", "MODULUS"): 2677 this = self._parse_number() 2678 self._match_text_seq(",", "REMAINDER") 2679 expression = self._parse_number() 2680 self._match_r_paren() 2681 else: 2682 self.raise_error("Failed to parse partition bound spec.") 2683 2684 return self.expression( 2685 exp.PartitionBoundSpec, 2686 this=this, 2687 expression=expression, 2688 from_expressions=from_expressions, 2689 to_expressions=to_expressions, 2690 ) 2691 2692 # https://www.postgresql.org/docs/current/sql-createtable.html 2693 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2694 if not self._match_text_seq("OF"): 2695 self._retreat(self._index - 1) 2696 return None 2697 2698 this = self._parse_table(schema=True) 2699 2700 if self._match(TokenType.DEFAULT): 2701 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2702 elif self._match_text_seq("FOR", "VALUES"): 2703 expression = self._parse_partition_bound_spec() 2704 else: 2705 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2706 2707 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2708 2709 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2710 self._match(TokenType.EQ) 2711 return self.expression( 2712 exp.PartitionedByProperty, 2713 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2714 ) 2715 2716 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2717 if self._match_text_seq("AND", "STATISTICS"): 2718 statistics = True 2719 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2720 statistics = False 2721 else: 2722 statistics = None 2723 2724 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2725 2726 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2727 if self._match_text_seq("SQL"): 2728 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2729 return None 2730 2731 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2732 if self._match_text_seq("SQL", "DATA"): 2733 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2734 return None 2735 2736 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2737 if self._match_text_seq("PRIMARY", "INDEX"): 2738 return exp.NoPrimaryIndexProperty() 2739 if self._match_text_seq("SQL"): 2740 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2741 return None 2742 2743 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2744 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2745 return exp.OnCommitProperty() 2746 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2747 return exp.OnCommitProperty(delete=True) 2748 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2749 2750 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2751 if self._match_text_seq("SQL", "DATA"): 2752 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2753 return None 2754 2755 def _parse_distkey(self) -> exp.DistKeyProperty: 2756 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2757 2758 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2759 table = self._parse_table(schema=True) 2760 2761 options = [] 2762 while self._match_texts(("INCLUDING", "EXCLUDING")): 2763 this = self._prev.text.upper() 2764 2765 id_var = self._parse_id_var() 2766 if not id_var: 2767 return None 2768 2769 options.append( 2770 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2771 ) 2772 2773 return self.expression(exp.LikeProperty, this=table, expressions=options) 2774 2775 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2776 return self.expression( 2777 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2778 ) 2779 2780 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2781 self._match(TokenType.EQ) 2782 return self.expression( 2783 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2784 ) 2785 2786 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2787 self._match_text_seq("WITH", "CONNECTION") 2788 return self.expression( 2789 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2790 ) 2791 2792 def _parse_returns(self) -> exp.ReturnsProperty: 2793 value: t.Optional[exp.Expression] 2794 null = None 2795 is_table = self._match(TokenType.TABLE) 2796 2797 if is_table: 2798 if self._match(TokenType.LT): 2799 value = self.expression( 2800 exp.Schema, 2801 this="TABLE", 2802 expressions=self._parse_csv(self._parse_struct_types), 2803 ) 2804 if not self._match(TokenType.GT): 2805 self.raise_error("Expecting >") 2806 else: 2807 value = self._parse_schema(exp.var("TABLE")) 2808 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2809 null = True 2810 value = None 2811 else: 2812 value = self._parse_types() 2813 2814 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2815 2816 def _parse_describe(self) -> exp.Describe: 2817 kind = self._match_set(self.CREATABLES) and self._prev.text 2818 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2819 if self._match(TokenType.DOT): 2820 style = None 2821 self._retreat(self._index - 2) 2822 2823 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2824 2825 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2826 this = self._parse_statement() 2827 else: 2828 this = self._parse_table(schema=True) 2829 2830 properties = self._parse_properties() 2831 expressions = properties.expressions if properties else None 2832 partition = self._parse_partition() 2833 return self.expression( 2834 exp.Describe, 2835 this=this, 2836 style=style, 2837 kind=kind, 2838 expressions=expressions, 2839 partition=partition, 2840 format=format, 2841 ) 2842 2843 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2844 kind = self._prev.text.upper() 2845 expressions = [] 2846 2847 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2848 if self._match(TokenType.WHEN): 2849 expression = self._parse_disjunction() 2850 self._match(TokenType.THEN) 2851 else: 2852 expression = None 2853 2854 else_ = self._match(TokenType.ELSE) 2855 2856 if not self._match(TokenType.INTO): 2857 return None 2858 2859 return self.expression( 2860 exp.ConditionalInsert, 2861 this=self.expression( 2862 exp.Insert, 2863 this=self._parse_table(schema=True), 2864 expression=self._parse_derived_table_values(), 2865 ), 2866 expression=expression, 2867 else_=else_, 2868 ) 2869 2870 expression = parse_conditional_insert() 2871 while expression is not None: 2872 expressions.append(expression) 2873 expression = parse_conditional_insert() 2874 2875 return self.expression( 2876 exp.MultitableInserts, 2877 kind=kind, 2878 comments=comments, 2879 expressions=expressions, 2880 source=self._parse_table(), 2881 ) 2882 2883 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2884 comments = [] 2885 hint = self._parse_hint() 2886 overwrite = self._match(TokenType.OVERWRITE) 2887 ignore = self._match(TokenType.IGNORE) 2888 local = self._match_text_seq("LOCAL") 2889 alternative = None 2890 is_function = None 2891 2892 if self._match_text_seq("DIRECTORY"): 2893 this: t.Optional[exp.Expression] = self.expression( 2894 exp.Directory, 2895 this=self._parse_var_or_string(), 2896 local=local, 2897 row_format=self._parse_row_format(match_row=True), 2898 ) 2899 else: 2900 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2901 comments += ensure_list(self._prev_comments) 2902 return self._parse_multitable_inserts(comments) 2903 2904 if self._match(TokenType.OR): 2905 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2906 2907 self._match(TokenType.INTO) 2908 comments += ensure_list(self._prev_comments) 2909 self._match(TokenType.TABLE) 2910 is_function = self._match(TokenType.FUNCTION) 2911 2912 this = ( 2913 self._parse_table(schema=True, parse_partition=True) 2914 if not is_function 2915 else self._parse_function() 2916 ) 2917 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2918 this.set("alias", self._parse_table_alias()) 2919 2920 returning = self._parse_returning() 2921 2922 return self.expression( 2923 exp.Insert, 2924 comments=comments, 2925 hint=hint, 2926 is_function=is_function, 2927 this=this, 2928 stored=self._match_text_seq("STORED") and self._parse_stored(), 2929 by_name=self._match_text_seq("BY", "NAME"), 2930 exists=self._parse_exists(), 2931 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2932 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2933 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2934 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2935 conflict=self._parse_on_conflict(), 2936 returning=returning or self._parse_returning(), 2937 overwrite=overwrite, 2938 alternative=alternative, 2939 ignore=ignore, 2940 source=self._match(TokenType.TABLE) and self._parse_table(), 2941 ) 2942 2943 def _parse_kill(self) -> exp.Kill: 2944 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2945 2946 return self.expression( 2947 exp.Kill, 2948 this=self._parse_primary(), 2949 kind=kind, 2950 ) 2951 2952 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2953 conflict = self._match_text_seq("ON", "CONFLICT") 2954 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2955 2956 if not conflict and not duplicate: 2957 return None 2958 2959 conflict_keys = None 2960 constraint = None 2961 2962 if conflict: 2963 if self._match_text_seq("ON", "CONSTRAINT"): 2964 constraint = self._parse_id_var() 2965 elif self._match(TokenType.L_PAREN): 2966 conflict_keys = self._parse_csv(self._parse_id_var) 2967 self._match_r_paren() 2968 2969 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2970 if self._prev.token_type == TokenType.UPDATE: 2971 self._match(TokenType.SET) 2972 expressions = self._parse_csv(self._parse_equality) 2973 else: 2974 expressions = None 2975 2976 return self.expression( 2977 exp.OnConflict, 2978 duplicate=duplicate, 2979 expressions=expressions, 2980 action=action, 2981 conflict_keys=conflict_keys, 2982 constraint=constraint, 2983 where=self._parse_where(), 2984 ) 2985 2986 def _parse_returning(self) -> t.Optional[exp.Returning]: 2987 if not self._match(TokenType.RETURNING): 2988 return None 2989 return self.expression( 2990 exp.Returning, 2991 expressions=self._parse_csv(self._parse_expression), 2992 into=self._match(TokenType.INTO) and self._parse_table_part(), 2993 ) 2994 2995 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2996 if not self._match(TokenType.FORMAT): 2997 return None 2998 return self._parse_row_format() 2999 3000 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3001 index = self._index 3002 with_ = with_ or self._match_text_seq("WITH") 3003 3004 if not self._match(TokenType.SERDE_PROPERTIES): 3005 self._retreat(index) 3006 return None 3007 return self.expression( 3008 exp.SerdeProperties, 3009 **{ # type: ignore 3010 "expressions": self._parse_wrapped_properties(), 3011 "with": with_, 3012 }, 3013 ) 3014 3015 def _parse_row_format( 3016 self, match_row: bool = False 3017 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3018 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3019 return None 3020 3021 if self._match_text_seq("SERDE"): 3022 this = self._parse_string() 3023 3024 serde_properties = self._parse_serde_properties() 3025 3026 return self.expression( 3027 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3028 ) 3029 3030 self._match_text_seq("DELIMITED") 3031 3032 kwargs = {} 3033 3034 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3035 kwargs["fields"] = self._parse_string() 3036 if self._match_text_seq("ESCAPED", "BY"): 3037 kwargs["escaped"] = self._parse_string() 3038 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3039 kwargs["collection_items"] = self._parse_string() 3040 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3041 kwargs["map_keys"] = self._parse_string() 3042 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3043 kwargs["lines"] = self._parse_string() 3044 if self._match_text_seq("NULL", "DEFINED", "AS"): 3045 kwargs["null"] = self._parse_string() 3046 3047 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3048 3049 def _parse_load(self) -> exp.LoadData | exp.Command: 3050 if self._match_text_seq("DATA"): 3051 local = self._match_text_seq("LOCAL") 3052 self._match_text_seq("INPATH") 3053 inpath = self._parse_string() 3054 overwrite = self._match(TokenType.OVERWRITE) 3055 self._match_pair(TokenType.INTO, TokenType.TABLE) 3056 3057 return self.expression( 3058 exp.LoadData, 3059 this=self._parse_table(schema=True), 3060 local=local, 3061 overwrite=overwrite, 3062 inpath=inpath, 3063 partition=self._parse_partition(), 3064 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3065 serde=self._match_text_seq("SERDE") and self._parse_string(), 3066 ) 3067 return self._parse_as_command(self._prev) 3068 3069 def _parse_delete(self) -> exp.Delete: 3070 # This handles MySQL's "Multiple-Table Syntax" 3071 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3072 tables = None 3073 if not self._match(TokenType.FROM, advance=False): 3074 tables = self._parse_csv(self._parse_table) or None 3075 3076 returning = self._parse_returning() 3077 3078 return self.expression( 3079 exp.Delete, 3080 tables=tables, 3081 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3082 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3083 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3084 where=self._parse_where(), 3085 returning=returning or self._parse_returning(), 3086 limit=self._parse_limit(), 3087 ) 3088 3089 def _parse_update(self) -> exp.Update: 3090 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3091 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3092 returning = self._parse_returning() 3093 return self.expression( 3094 exp.Update, 3095 **{ # type: ignore 3096 "this": this, 3097 "expressions": expressions, 3098 "from": self._parse_from(joins=True), 3099 "where": self._parse_where(), 3100 "returning": returning or self._parse_returning(), 3101 "order": self._parse_order(), 3102 "limit": self._parse_limit(), 3103 }, 3104 ) 3105 3106 def _parse_use(self) -> exp.Use: 3107 return self.expression( 3108 exp.Use, 3109 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3110 this=self._parse_table(schema=False), 3111 ) 3112 3113 def _parse_uncache(self) -> exp.Uncache: 3114 if not self._match(TokenType.TABLE): 3115 self.raise_error("Expecting TABLE after UNCACHE") 3116 3117 return self.expression( 3118 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3119 ) 3120 3121 def _parse_cache(self) -> exp.Cache: 3122 lazy = self._match_text_seq("LAZY") 3123 self._match(TokenType.TABLE) 3124 table = self._parse_table(schema=True) 3125 3126 options = [] 3127 if self._match_text_seq("OPTIONS"): 3128 self._match_l_paren() 3129 k = self._parse_string() 3130 self._match(TokenType.EQ) 3131 v = self._parse_string() 3132 options = [k, v] 3133 self._match_r_paren() 3134 3135 self._match(TokenType.ALIAS) 3136 return self.expression( 3137 exp.Cache, 3138 this=table, 3139 lazy=lazy, 3140 options=options, 3141 expression=self._parse_select(nested=True), 3142 ) 3143 3144 def _parse_partition(self) -> t.Optional[exp.Partition]: 3145 if not self._match_texts(self.PARTITION_KEYWORDS): 3146 return None 3147 3148 return self.expression( 3149 exp.Partition, 3150 subpartition=self._prev.text.upper() == "SUBPARTITION", 3151 expressions=self._parse_wrapped_csv(self._parse_assignment), 3152 ) 3153 3154 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3155 def _parse_value_expression() -> t.Optional[exp.Expression]: 3156 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3157 return exp.var(self._prev.text.upper()) 3158 return self._parse_expression() 3159 3160 if self._match(TokenType.L_PAREN): 3161 expressions = self._parse_csv(_parse_value_expression) 3162 self._match_r_paren() 3163 return self.expression(exp.Tuple, expressions=expressions) 3164 3165 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3166 expression = self._parse_expression() 3167 if expression: 3168 return self.expression(exp.Tuple, expressions=[expression]) 3169 return None 3170 3171 def _parse_projections(self) -> t.List[exp.Expression]: 3172 return self._parse_expressions() 3173 3174 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3175 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3176 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3177 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3178 ) 3179 elif self._match(TokenType.FROM): 3180 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3181 # Support parentheses for duckdb FROM-first syntax 3182 select = self._parse_select() 3183 if select: 3184 select.set("from", from_) 3185 this = select 3186 else: 3187 this = exp.select("*").from_(t.cast(exp.From, from_)) 3188 else: 3189 this = ( 3190 self._parse_table(consume_pipe=True) 3191 if table 3192 else self._parse_select(nested=True, parse_set_operation=False) 3193 ) 3194 3195 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3196 # in case a modifier (e.g. join) is following 3197 if table and isinstance(this, exp.Values) and this.alias: 3198 alias = this.args["alias"].pop() 3199 this = exp.Table(this=this, alias=alias) 3200 3201 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3202 3203 return this 3204 3205 def _parse_select( 3206 self, 3207 nested: bool = False, 3208 table: bool = False, 3209 parse_subquery_alias: bool = True, 3210 parse_set_operation: bool = True, 3211 consume_pipe: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 query = self._parse_select_query( 3214 nested=nested, 3215 table=table, 3216 parse_subquery_alias=parse_subquery_alias, 3217 parse_set_operation=parse_set_operation, 3218 ) 3219 3220 if ( 3221 consume_pipe 3222 and self._match(TokenType.PIPE_GT, advance=False) 3223 and isinstance(query, exp.Query) 3224 ): 3225 query = self._parse_pipe_syntax_query(query) 3226 query = query.subquery(copy=False) if query and table else query 3227 3228 return query 3229 3230 def _parse_select_query( 3231 self, 3232 nested: bool = False, 3233 table: bool = False, 3234 parse_subquery_alias: bool = True, 3235 parse_set_operation: bool = True, 3236 ) -> t.Optional[exp.Expression]: 3237 cte = self._parse_with() 3238 3239 if cte: 3240 this = self._parse_statement() 3241 3242 if not this: 3243 self.raise_error("Failed to parse any statement following CTE") 3244 return cte 3245 3246 if "with" in this.arg_types: 3247 this.set("with", cte) 3248 else: 3249 self.raise_error(f"{this.key} does not support CTE") 3250 this = cte 3251 3252 return this 3253 3254 # duckdb supports leading with FROM x 3255 from_ = ( 3256 self._parse_from(consume_pipe=True) 3257 if self._match(TokenType.FROM, advance=False) 3258 else None 3259 ) 3260 3261 if self._match(TokenType.SELECT): 3262 comments = self._prev_comments 3263 3264 hint = self._parse_hint() 3265 3266 if self._next and not self._next.token_type == TokenType.DOT: 3267 all_ = self._match(TokenType.ALL) 3268 distinct = self._match_set(self.DISTINCT_TOKENS) 3269 else: 3270 all_, distinct = None, None 3271 3272 kind = ( 3273 self._match(TokenType.ALIAS) 3274 and self._match_texts(("STRUCT", "VALUE")) 3275 and self._prev.text.upper() 3276 ) 3277 3278 if distinct: 3279 distinct = self.expression( 3280 exp.Distinct, 3281 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3282 ) 3283 3284 if all_ and distinct: 3285 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3286 3287 operation_modifiers = [] 3288 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3289 operation_modifiers.append(exp.var(self._prev.text.upper())) 3290 3291 limit = self._parse_limit(top=True) 3292 projections = self._parse_projections() 3293 3294 this = self.expression( 3295 exp.Select, 3296 kind=kind, 3297 hint=hint, 3298 distinct=distinct, 3299 expressions=projections, 3300 limit=limit, 3301 operation_modifiers=operation_modifiers or None, 3302 ) 3303 this.comments = comments 3304 3305 into = self._parse_into() 3306 if into: 3307 this.set("into", into) 3308 3309 if not from_: 3310 from_ = self._parse_from() 3311 3312 if from_: 3313 this.set("from", from_) 3314 3315 this = self._parse_query_modifiers(this) 3316 elif (table or nested) and self._match(TokenType.L_PAREN): 3317 this = self._parse_wrapped_select(table=table) 3318 3319 # We return early here so that the UNION isn't attached to the subquery by the 3320 # following call to _parse_set_operations, but instead becomes the parent node 3321 self._match_r_paren() 3322 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3323 elif self._match(TokenType.VALUES, advance=False): 3324 this = self._parse_derived_table_values() 3325 elif from_: 3326 this = exp.select("*").from_(from_.this, copy=False) 3327 elif self._match(TokenType.SUMMARIZE): 3328 table = self._match(TokenType.TABLE) 3329 this = self._parse_select() or self._parse_string() or self._parse_table() 3330 return self.expression(exp.Summarize, this=this, table=table) 3331 elif self._match(TokenType.DESCRIBE): 3332 this = self._parse_describe() 3333 elif self._match_text_seq("STREAM"): 3334 this = self._parse_function() 3335 if this: 3336 this = self.expression(exp.Stream, this=this) 3337 else: 3338 self._retreat(self._index - 1) 3339 else: 3340 this = None 3341 3342 return self._parse_set_operations(this) if parse_set_operation else this 3343 3344 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3345 self._match_text_seq("SEARCH") 3346 3347 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3348 3349 if not kind: 3350 return None 3351 3352 self._match_text_seq("FIRST", "BY") 3353 3354 return self.expression( 3355 exp.RecursiveWithSearch, 3356 kind=kind, 3357 this=self._parse_id_var(), 3358 expression=self._match_text_seq("SET") and self._parse_id_var(), 3359 using=self._match_text_seq("USING") and self._parse_id_var(), 3360 ) 3361 3362 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3363 if not skip_with_token and not self._match(TokenType.WITH): 3364 return None 3365 3366 comments = self._prev_comments 3367 recursive = self._match(TokenType.RECURSIVE) 3368 3369 last_comments = None 3370 expressions = [] 3371 while True: 3372 cte = self._parse_cte() 3373 if isinstance(cte, exp.CTE): 3374 expressions.append(cte) 3375 if last_comments: 3376 cte.add_comments(last_comments) 3377 3378 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3379 break 3380 else: 3381 self._match(TokenType.WITH) 3382 3383 last_comments = self._prev_comments 3384 3385 return self.expression( 3386 exp.With, 3387 comments=comments, 3388 expressions=expressions, 3389 recursive=recursive, 3390 search=self._parse_recursive_with_search(), 3391 ) 3392 3393 def _parse_cte(self) -> t.Optional[exp.CTE]: 3394 index = self._index 3395 3396 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3397 if not alias or not alias.this: 3398 self.raise_error("Expected CTE to have alias") 3399 3400 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3401 self._retreat(index) 3402 return None 3403 3404 comments = self._prev_comments 3405 3406 if self._match_text_seq("NOT", "MATERIALIZED"): 3407 materialized = False 3408 elif self._match_text_seq("MATERIALIZED"): 3409 materialized = True 3410 else: 3411 materialized = None 3412 3413 cte = self.expression( 3414 exp.CTE, 3415 this=self._parse_wrapped(self._parse_statement), 3416 alias=alias, 3417 materialized=materialized, 3418 comments=comments, 3419 ) 3420 3421 values = cte.this 3422 if isinstance(values, exp.Values): 3423 if values.alias: 3424 cte.set("this", exp.select("*").from_(values)) 3425 else: 3426 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3427 3428 return cte 3429 3430 def _parse_table_alias( 3431 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3432 ) -> t.Optional[exp.TableAlias]: 3433 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3434 # so this section tries to parse the clause version and if it fails, it treats the token 3435 # as an identifier (alias) 3436 if self._can_parse_limit_or_offset(): 3437 return None 3438 3439 any_token = self._match(TokenType.ALIAS) 3440 alias = ( 3441 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3442 or self._parse_string_as_identifier() 3443 ) 3444 3445 index = self._index 3446 if self._match(TokenType.L_PAREN): 3447 columns = self._parse_csv(self._parse_function_parameter) 3448 self._match_r_paren() if columns else self._retreat(index) 3449 else: 3450 columns = None 3451 3452 if not alias and not columns: 3453 return None 3454 3455 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3456 3457 # We bubble up comments from the Identifier to the TableAlias 3458 if isinstance(alias, exp.Identifier): 3459 table_alias.add_comments(alias.pop_comments()) 3460 3461 return table_alias 3462 3463 def _parse_subquery( 3464 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3465 ) -> t.Optional[exp.Subquery]: 3466 if not this: 3467 return None 3468 3469 return self.expression( 3470 exp.Subquery, 3471 this=this, 3472 pivots=self._parse_pivots(), 3473 alias=self._parse_table_alias() if parse_alias else None, 3474 sample=self._parse_table_sample(), 3475 ) 3476 3477 def _implicit_unnests_to_explicit(self, this: E) -> E: 3478 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3479 3480 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3481 for i, join in enumerate(this.args.get("joins") or []): 3482 table = join.this 3483 normalized_table = table.copy() 3484 normalized_table.meta["maybe_column"] = True 3485 normalized_table = _norm(normalized_table, dialect=self.dialect) 3486 3487 if isinstance(table, exp.Table) and not join.args.get("on"): 3488 if normalized_table.parts[0].name in refs: 3489 table_as_column = table.to_column() 3490 unnest = exp.Unnest(expressions=[table_as_column]) 3491 3492 # Table.to_column creates a parent Alias node that we want to convert to 3493 # a TableAlias and attach to the Unnest, so it matches the parser's output 3494 if isinstance(table.args.get("alias"), exp.TableAlias): 3495 table_as_column.replace(table_as_column.this) 3496 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3497 3498 table.replace(unnest) 3499 3500 refs.add(normalized_table.alias_or_name) 3501 3502 return this 3503 3504 def _parse_query_modifiers( 3505 self, this: t.Optional[exp.Expression] 3506 ) -> t.Optional[exp.Expression]: 3507 if isinstance(this, self.MODIFIABLES): 3508 for join in self._parse_joins(): 3509 this.append("joins", join) 3510 for lateral in iter(self._parse_lateral, None): 3511 this.append("laterals", lateral) 3512 3513 while True: 3514 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3515 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3516 key, expression = parser(self) 3517 3518 if expression: 3519 this.set(key, expression) 3520 if key == "limit": 3521 offset = expression.args.pop("offset", None) 3522 3523 if offset: 3524 offset = exp.Offset(expression=offset) 3525 this.set("offset", offset) 3526 3527 limit_by_expressions = expression.expressions 3528 expression.set("expressions", None) 3529 offset.set("expressions", limit_by_expressions) 3530 continue 3531 break 3532 3533 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3534 this = self._implicit_unnests_to_explicit(this) 3535 3536 return this 3537 3538 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3539 start = self._curr 3540 while self._curr: 3541 self._advance() 3542 3543 end = self._tokens[self._index - 1] 3544 return exp.Hint(expressions=[self._find_sql(start, end)]) 3545 3546 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3547 return self._parse_function_call() 3548 3549 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3550 start_index = self._index 3551 should_fallback_to_string = False 3552 3553 hints = [] 3554 try: 3555 for hint in iter( 3556 lambda: self._parse_csv( 3557 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3558 ), 3559 [], 3560 ): 3561 hints.extend(hint) 3562 except ParseError: 3563 should_fallback_to_string = True 3564 3565 if should_fallback_to_string or self._curr: 3566 self._retreat(start_index) 3567 return self._parse_hint_fallback_to_string() 3568 3569 return self.expression(exp.Hint, expressions=hints) 3570 3571 def _parse_hint(self) -> t.Optional[exp.Hint]: 3572 if self._match(TokenType.HINT) and self._prev_comments: 3573 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3574 3575 return None 3576 3577 def _parse_into(self) -> t.Optional[exp.Into]: 3578 if not self._match(TokenType.INTO): 3579 return None 3580 3581 temp = self._match(TokenType.TEMPORARY) 3582 unlogged = self._match_text_seq("UNLOGGED") 3583 self._match(TokenType.TABLE) 3584 3585 return self.expression( 3586 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3587 ) 3588 3589 def _parse_from( 3590 self, 3591 joins: bool = False, 3592 skip_from_token: bool = False, 3593 consume_pipe: bool = False, 3594 ) -> t.Optional[exp.From]: 3595 if not skip_from_token and not self._match(TokenType.FROM): 3596 return None 3597 3598 return self.expression( 3599 exp.From, 3600 comments=self._prev_comments, 3601 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3602 ) 3603 3604 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3605 return self.expression( 3606 exp.MatchRecognizeMeasure, 3607 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3608 this=self._parse_expression(), 3609 ) 3610 3611 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3612 if not self._match(TokenType.MATCH_RECOGNIZE): 3613 return None 3614 3615 self._match_l_paren() 3616 3617 partition = self._parse_partition_by() 3618 order = self._parse_order() 3619 3620 measures = ( 3621 self._parse_csv(self._parse_match_recognize_measure) 3622 if self._match_text_seq("MEASURES") 3623 else None 3624 ) 3625 3626 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3627 rows = exp.var("ONE ROW PER MATCH") 3628 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3629 text = "ALL ROWS PER MATCH" 3630 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3631 text += " SHOW EMPTY MATCHES" 3632 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3633 text += " OMIT EMPTY MATCHES" 3634 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3635 text += " WITH UNMATCHED ROWS" 3636 rows = exp.var(text) 3637 else: 3638 rows = None 3639 3640 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3641 text = "AFTER MATCH SKIP" 3642 if self._match_text_seq("PAST", "LAST", "ROW"): 3643 text += " PAST LAST ROW" 3644 elif self._match_text_seq("TO", "NEXT", "ROW"): 3645 text += " TO NEXT ROW" 3646 elif self._match_text_seq("TO", "FIRST"): 3647 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3648 elif self._match_text_seq("TO", "LAST"): 3649 text += f" TO LAST {self._advance_any().text}" # type: ignore 3650 after = exp.var(text) 3651 else: 3652 after = None 3653 3654 if self._match_text_seq("PATTERN"): 3655 self._match_l_paren() 3656 3657 if not self._curr: 3658 self.raise_error("Expecting )", self._curr) 3659 3660 paren = 1 3661 start = self._curr 3662 3663 while self._curr and paren > 0: 3664 if self._curr.token_type == TokenType.L_PAREN: 3665 paren += 1 3666 if self._curr.token_type == TokenType.R_PAREN: 3667 paren -= 1 3668 3669 end = self._prev 3670 self._advance() 3671 3672 if paren > 0: 3673 self.raise_error("Expecting )", self._curr) 3674 3675 pattern = exp.var(self._find_sql(start, end)) 3676 else: 3677 pattern = None 3678 3679 define = ( 3680 self._parse_csv(self._parse_name_as_expression) 3681 if self._match_text_seq("DEFINE") 3682 else None 3683 ) 3684 3685 self._match_r_paren() 3686 3687 return self.expression( 3688 exp.MatchRecognize, 3689 partition_by=partition, 3690 order=order, 3691 measures=measures, 3692 rows=rows, 3693 after=after, 3694 pattern=pattern, 3695 define=define, 3696 alias=self._parse_table_alias(), 3697 ) 3698 3699 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3700 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3701 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3702 cross_apply = False 3703 3704 if cross_apply is not None: 3705 this = self._parse_select(table=True) 3706 view = None 3707 outer = None 3708 elif self._match(TokenType.LATERAL): 3709 this = self._parse_select(table=True) 3710 view = self._match(TokenType.VIEW) 3711 outer = self._match(TokenType.OUTER) 3712 else: 3713 return None 3714 3715 if not this: 3716 this = ( 3717 self._parse_unnest() 3718 or self._parse_function() 3719 or self._parse_id_var(any_token=False) 3720 ) 3721 3722 while self._match(TokenType.DOT): 3723 this = exp.Dot( 3724 this=this, 3725 expression=self._parse_function() or self._parse_id_var(any_token=False), 3726 ) 3727 3728 ordinality: t.Optional[bool] = None 3729 3730 if view: 3731 table = self._parse_id_var(any_token=False) 3732 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3733 table_alias: t.Optional[exp.TableAlias] = self.expression( 3734 exp.TableAlias, this=table, columns=columns 3735 ) 3736 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3737 # We move the alias from the lateral's child node to the lateral itself 3738 table_alias = this.args["alias"].pop() 3739 else: 3740 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3741 table_alias = self._parse_table_alias() 3742 3743 return self.expression( 3744 exp.Lateral, 3745 this=this, 3746 view=view, 3747 outer=outer, 3748 alias=table_alias, 3749 cross_apply=cross_apply, 3750 ordinality=ordinality, 3751 ) 3752 3753 def _parse_join_parts( 3754 self, 3755 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3756 return ( 3757 self._match_set(self.JOIN_METHODS) and self._prev, 3758 self._match_set(self.JOIN_SIDES) and self._prev, 3759 self._match_set(self.JOIN_KINDS) and self._prev, 3760 ) 3761 3762 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3763 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3764 this = self._parse_column() 3765 if isinstance(this, exp.Column): 3766 return this.this 3767 return this 3768 3769 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3770 3771 def _parse_join( 3772 self, skip_join_token: bool = False, parse_bracket: bool = False 3773 ) -> t.Optional[exp.Join]: 3774 if self._match(TokenType.COMMA): 3775 table = self._try_parse(self._parse_table) 3776 cross_join = self.expression(exp.Join, this=table) if table else None 3777 3778 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3779 cross_join.set("kind", "CROSS") 3780 3781 return cross_join 3782 3783 index = self._index 3784 method, side, kind = self._parse_join_parts() 3785 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3786 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3787 join_comments = self._prev_comments 3788 3789 if not skip_join_token and not join: 3790 self._retreat(index) 3791 kind = None 3792 method = None 3793 side = None 3794 3795 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3796 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3797 3798 if not skip_join_token and not join and not outer_apply and not cross_apply: 3799 return None 3800 3801 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3802 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3803 kwargs["expressions"] = self._parse_csv( 3804 lambda: self._parse_table(parse_bracket=parse_bracket) 3805 ) 3806 3807 if method: 3808 kwargs["method"] = method.text 3809 if side: 3810 kwargs["side"] = side.text 3811 if kind: 3812 kwargs["kind"] = kind.text 3813 if hint: 3814 kwargs["hint"] = hint 3815 3816 if self._match(TokenType.MATCH_CONDITION): 3817 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3818 3819 if self._match(TokenType.ON): 3820 kwargs["on"] = self._parse_assignment() 3821 elif self._match(TokenType.USING): 3822 kwargs["using"] = self._parse_using_identifiers() 3823 elif ( 3824 not (outer_apply or cross_apply) 3825 and not isinstance(kwargs["this"], exp.Unnest) 3826 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3827 ): 3828 index = self._index 3829 joins: t.Optional[list] = list(self._parse_joins()) 3830 3831 if joins and self._match(TokenType.ON): 3832 kwargs["on"] = self._parse_assignment() 3833 elif joins and self._match(TokenType.USING): 3834 kwargs["using"] = self._parse_using_identifiers() 3835 else: 3836 joins = None 3837 self._retreat(index) 3838 3839 kwargs["this"].set("joins", joins if joins else None) 3840 3841 kwargs["pivots"] = self._parse_pivots() 3842 3843 comments = [c for token in (method, side, kind) if token for c in token.comments] 3844 comments = (join_comments or []) + comments 3845 return self.expression(exp.Join, comments=comments, **kwargs) 3846 3847 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3848 this = self._parse_assignment() 3849 3850 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3851 return this 3852 3853 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3854 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3855 3856 return this 3857 3858 def _parse_index_params(self) -> exp.IndexParameters: 3859 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3860 3861 if self._match(TokenType.L_PAREN, advance=False): 3862 columns = self._parse_wrapped_csv(self._parse_with_operator) 3863 else: 3864 columns = None 3865 3866 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3867 partition_by = self._parse_partition_by() 3868 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3869 tablespace = ( 3870 self._parse_var(any_token=True) 3871 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3872 else None 3873 ) 3874 where = self._parse_where() 3875 3876 on = self._parse_field() if self._match(TokenType.ON) else None 3877 3878 return self.expression( 3879 exp.IndexParameters, 3880 using=using, 3881 columns=columns, 3882 include=include, 3883 partition_by=partition_by, 3884 where=where, 3885 with_storage=with_storage, 3886 tablespace=tablespace, 3887 on=on, 3888 ) 3889 3890 def _parse_index( 3891 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3892 ) -> t.Optional[exp.Index]: 3893 if index or anonymous: 3894 unique = None 3895 primary = None 3896 amp = None 3897 3898 self._match(TokenType.ON) 3899 self._match(TokenType.TABLE) # hive 3900 table = self._parse_table_parts(schema=True) 3901 else: 3902 unique = self._match(TokenType.UNIQUE) 3903 primary = self._match_text_seq("PRIMARY") 3904 amp = self._match_text_seq("AMP") 3905 3906 if not self._match(TokenType.INDEX): 3907 return None 3908 3909 index = self._parse_id_var() 3910 table = None 3911 3912 params = self._parse_index_params() 3913 3914 return self.expression( 3915 exp.Index, 3916 this=index, 3917 table=table, 3918 unique=unique, 3919 primary=primary, 3920 amp=amp, 3921 params=params, 3922 ) 3923 3924 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3925 hints: t.List[exp.Expression] = [] 3926 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3927 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3928 hints.append( 3929 self.expression( 3930 exp.WithTableHint, 3931 expressions=self._parse_csv( 3932 lambda: self._parse_function() or self._parse_var(any_token=True) 3933 ), 3934 ) 3935 ) 3936 self._match_r_paren() 3937 else: 3938 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3939 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3940 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3941 3942 self._match_set((TokenType.INDEX, TokenType.KEY)) 3943 if self._match(TokenType.FOR): 3944 hint.set("target", self._advance_any() and self._prev.text.upper()) 3945 3946 hint.set("expressions", self._parse_wrapped_id_vars()) 3947 hints.append(hint) 3948 3949 return hints or None 3950 3951 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3952 return ( 3953 (not schema and self._parse_function(optional_parens=False)) 3954 or self._parse_id_var(any_token=False) 3955 or self._parse_string_as_identifier() 3956 or self._parse_placeholder() 3957 ) 3958 3959 def _parse_table_parts( 3960 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3961 ) -> exp.Table: 3962 catalog = None 3963 db = None 3964 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3965 3966 while self._match(TokenType.DOT): 3967 if catalog: 3968 # This allows nesting the table in arbitrarily many dot expressions if needed 3969 table = self.expression( 3970 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3971 ) 3972 else: 3973 catalog = db 3974 db = table 3975 # "" used for tsql FROM a..b case 3976 table = self._parse_table_part(schema=schema) or "" 3977 3978 if ( 3979 wildcard 3980 and self._is_connected() 3981 and (isinstance(table, exp.Identifier) or not table) 3982 and self._match(TokenType.STAR) 3983 ): 3984 if isinstance(table, exp.Identifier): 3985 table.args["this"] += "*" 3986 else: 3987 table = exp.Identifier(this="*") 3988 3989 # We bubble up comments from the Identifier to the Table 3990 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3991 3992 if is_db_reference: 3993 catalog = db 3994 db = table 3995 table = None 3996 3997 if not table and not is_db_reference: 3998 self.raise_error(f"Expected table name but got {self._curr}") 3999 if not db and is_db_reference: 4000 self.raise_error(f"Expected database name but got {self._curr}") 4001 4002 table = self.expression( 4003 exp.Table, 4004 comments=comments, 4005 this=table, 4006 db=db, 4007 catalog=catalog, 4008 ) 4009 4010 changes = self._parse_changes() 4011 if changes: 4012 table.set("changes", changes) 4013 4014 at_before = self._parse_historical_data() 4015 if at_before: 4016 table.set("when", at_before) 4017 4018 pivots = self._parse_pivots() 4019 if pivots: 4020 table.set("pivots", pivots) 4021 4022 return table 4023 4024 def _parse_table( 4025 self, 4026 schema: bool = False, 4027 joins: bool = False, 4028 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4029 parse_bracket: bool = False, 4030 is_db_reference: bool = False, 4031 parse_partition: bool = False, 4032 consume_pipe: bool = False, 4033 ) -> t.Optional[exp.Expression]: 4034 lateral = self._parse_lateral() 4035 if lateral: 4036 return lateral 4037 4038 unnest = self._parse_unnest() 4039 if unnest: 4040 return unnest 4041 4042 values = self._parse_derived_table_values() 4043 if values: 4044 return values 4045 4046 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4047 if subquery: 4048 if not subquery.args.get("pivots"): 4049 subquery.set("pivots", self._parse_pivots()) 4050 return subquery 4051 4052 bracket = parse_bracket and self._parse_bracket(None) 4053 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4054 4055 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4056 self._parse_table 4057 ) 4058 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4059 4060 only = self._match(TokenType.ONLY) 4061 4062 this = t.cast( 4063 exp.Expression, 4064 bracket 4065 or rows_from 4066 or self._parse_bracket( 4067 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4068 ), 4069 ) 4070 4071 if only: 4072 this.set("only", only) 4073 4074 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4075 self._match_text_seq("*") 4076 4077 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4078 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4079 this.set("partition", self._parse_partition()) 4080 4081 if schema: 4082 return self._parse_schema(this=this) 4083 4084 version = self._parse_version() 4085 4086 if version: 4087 this.set("version", version) 4088 4089 if self.dialect.ALIAS_POST_TABLESAMPLE: 4090 this.set("sample", self._parse_table_sample()) 4091 4092 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4093 if alias: 4094 this.set("alias", alias) 4095 4096 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4097 return self.expression( 4098 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4099 ) 4100 4101 this.set("hints", self._parse_table_hints()) 4102 4103 if not this.args.get("pivots"): 4104 this.set("pivots", self._parse_pivots()) 4105 4106 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4107 this.set("sample", self._parse_table_sample()) 4108 4109 if joins: 4110 for join in self._parse_joins(): 4111 this.append("joins", join) 4112 4113 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4114 this.set("ordinality", True) 4115 this.set("alias", self._parse_table_alias()) 4116 4117 return this 4118 4119 def _parse_version(self) -> t.Optional[exp.Version]: 4120 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4121 this = "TIMESTAMP" 4122 elif self._match(TokenType.VERSION_SNAPSHOT): 4123 this = "VERSION" 4124 else: 4125 return None 4126 4127 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4128 kind = self._prev.text.upper() 4129 start = self._parse_bitwise() 4130 self._match_texts(("TO", "AND")) 4131 end = self._parse_bitwise() 4132 expression: t.Optional[exp.Expression] = self.expression( 4133 exp.Tuple, expressions=[start, end] 4134 ) 4135 elif self._match_text_seq("CONTAINED", "IN"): 4136 kind = "CONTAINED IN" 4137 expression = self.expression( 4138 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4139 ) 4140 elif self._match(TokenType.ALL): 4141 kind = "ALL" 4142 expression = None 4143 else: 4144 self._match_text_seq("AS", "OF") 4145 kind = "AS OF" 4146 expression = self._parse_type() 4147 4148 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4149 4150 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4151 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4152 index = self._index 4153 historical_data = None 4154 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4155 this = self._prev.text.upper() 4156 kind = ( 4157 self._match(TokenType.L_PAREN) 4158 and self._match_texts(self.HISTORICAL_DATA_KIND) 4159 and self._prev.text.upper() 4160 ) 4161 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4162 4163 if expression: 4164 self._match_r_paren() 4165 historical_data = self.expression( 4166 exp.HistoricalData, this=this, kind=kind, expression=expression 4167 ) 4168 else: 4169 self._retreat(index) 4170 4171 return historical_data 4172 4173 def _parse_changes(self) -> t.Optional[exp.Changes]: 4174 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4175 return None 4176 4177 information = self._parse_var(any_token=True) 4178 self._match_r_paren() 4179 4180 return self.expression( 4181 exp.Changes, 4182 information=information, 4183 at_before=self._parse_historical_data(), 4184 end=self._parse_historical_data(), 4185 ) 4186 4187 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4188 if not self._match(TokenType.UNNEST): 4189 return None 4190 4191 expressions = self._parse_wrapped_csv(self._parse_equality) 4192 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4193 4194 alias = self._parse_table_alias() if with_alias else None 4195 4196 if alias: 4197 if self.dialect.UNNEST_COLUMN_ONLY: 4198 if alias.args.get("columns"): 4199 self.raise_error("Unexpected extra column alias in unnest.") 4200 4201 alias.set("columns", [alias.this]) 4202 alias.set("this", None) 4203 4204 columns = alias.args.get("columns") or [] 4205 if offset and len(expressions) < len(columns): 4206 offset = columns.pop() 4207 4208 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4209 self._match(TokenType.ALIAS) 4210 offset = self._parse_id_var( 4211 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4212 ) or exp.to_identifier("offset") 4213 4214 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4215 4216 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4217 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4218 if not is_derived and not ( 4219 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4220 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4221 ): 4222 return None 4223 4224 expressions = self._parse_csv(self._parse_value) 4225 alias = self._parse_table_alias() 4226 4227 if is_derived: 4228 self._match_r_paren() 4229 4230 return self.expression( 4231 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4232 ) 4233 4234 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4235 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4236 as_modifier and self._match_text_seq("USING", "SAMPLE") 4237 ): 4238 return None 4239 4240 bucket_numerator = None 4241 bucket_denominator = None 4242 bucket_field = None 4243 percent = None 4244 size = None 4245 seed = None 4246 4247 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4248 matched_l_paren = self._match(TokenType.L_PAREN) 4249 4250 if self.TABLESAMPLE_CSV: 4251 num = None 4252 expressions = self._parse_csv(self._parse_primary) 4253 else: 4254 expressions = None 4255 num = ( 4256 self._parse_factor() 4257 if self._match(TokenType.NUMBER, advance=False) 4258 else self._parse_primary() or self._parse_placeholder() 4259 ) 4260 4261 if self._match_text_seq("BUCKET"): 4262 bucket_numerator = self._parse_number() 4263 self._match_text_seq("OUT", "OF") 4264 bucket_denominator = bucket_denominator = self._parse_number() 4265 self._match(TokenType.ON) 4266 bucket_field = self._parse_field() 4267 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4268 percent = num 4269 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4270 size = num 4271 else: 4272 percent = num 4273 4274 if matched_l_paren: 4275 self._match_r_paren() 4276 4277 if self._match(TokenType.L_PAREN): 4278 method = self._parse_var(upper=True) 4279 seed = self._match(TokenType.COMMA) and self._parse_number() 4280 self._match_r_paren() 4281 elif self._match_texts(("SEED", "REPEATABLE")): 4282 seed = self._parse_wrapped(self._parse_number) 4283 4284 if not method and self.DEFAULT_SAMPLING_METHOD: 4285 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4286 4287 return self.expression( 4288 exp.TableSample, 4289 expressions=expressions, 4290 method=method, 4291 bucket_numerator=bucket_numerator, 4292 bucket_denominator=bucket_denominator, 4293 bucket_field=bucket_field, 4294 percent=percent, 4295 size=size, 4296 seed=seed, 4297 ) 4298 4299 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4300 return list(iter(self._parse_pivot, None)) or None 4301 4302 def _parse_joins(self) -> t.Iterator[exp.Join]: 4303 return iter(self._parse_join, None) 4304 4305 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4306 if not self._match(TokenType.INTO): 4307 return None 4308 4309 return self.expression( 4310 exp.UnpivotColumns, 4311 this=self._match_text_seq("NAME") and self._parse_column(), 4312 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4313 ) 4314 4315 # https://duckdb.org/docs/sql/statements/pivot 4316 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4317 def _parse_on() -> t.Optional[exp.Expression]: 4318 this = self._parse_bitwise() 4319 4320 if self._match(TokenType.IN): 4321 # PIVOT ... ON col IN (row_val1, row_val2) 4322 return self._parse_in(this) 4323 if self._match(TokenType.ALIAS, advance=False): 4324 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4325 return self._parse_alias(this) 4326 4327 return this 4328 4329 this = self._parse_table() 4330 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4331 into = self._parse_unpivot_columns() 4332 using = self._match(TokenType.USING) and self._parse_csv( 4333 lambda: self._parse_alias(self._parse_function()) 4334 ) 4335 group = self._parse_group() 4336 4337 return self.expression( 4338 exp.Pivot, 4339 this=this, 4340 expressions=expressions, 4341 using=using, 4342 group=group, 4343 unpivot=is_unpivot, 4344 into=into, 4345 ) 4346 4347 def _parse_pivot_in(self) -> exp.In: 4348 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4349 this = self._parse_select_or_expression() 4350 4351 self._match(TokenType.ALIAS) 4352 alias = self._parse_bitwise() 4353 if alias: 4354 if isinstance(alias, exp.Column) and not alias.db: 4355 alias = alias.this 4356 return self.expression(exp.PivotAlias, this=this, alias=alias) 4357 4358 return this 4359 4360 value = self._parse_column() 4361 4362 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4363 self.raise_error("Expecting IN (") 4364 4365 if self._match(TokenType.ANY): 4366 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4367 else: 4368 exprs = self._parse_csv(_parse_aliased_expression) 4369 4370 self._match_r_paren() 4371 return self.expression(exp.In, this=value, expressions=exprs) 4372 4373 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4374 func = self._parse_function() 4375 if not func: 4376 self.raise_error("Expecting an aggregation function in PIVOT") 4377 4378 return self._parse_alias(func) 4379 4380 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4381 index = self._index 4382 include_nulls = None 4383 4384 if self._match(TokenType.PIVOT): 4385 unpivot = False 4386 elif self._match(TokenType.UNPIVOT): 4387 unpivot = True 4388 4389 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4390 if self._match_text_seq("INCLUDE", "NULLS"): 4391 include_nulls = True 4392 elif self._match_text_seq("EXCLUDE", "NULLS"): 4393 include_nulls = False 4394 else: 4395 return None 4396 4397 expressions = [] 4398 4399 if not self._match(TokenType.L_PAREN): 4400 self._retreat(index) 4401 return None 4402 4403 if unpivot: 4404 expressions = self._parse_csv(self._parse_column) 4405 else: 4406 expressions = self._parse_csv(self._parse_pivot_aggregation) 4407 4408 if not expressions: 4409 self.raise_error("Failed to parse PIVOT's aggregation list") 4410 4411 if not self._match(TokenType.FOR): 4412 self.raise_error("Expecting FOR") 4413 4414 fields = [] 4415 while True: 4416 field = self._try_parse(self._parse_pivot_in) 4417 if not field: 4418 break 4419 fields.append(field) 4420 4421 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4422 self._parse_bitwise 4423 ) 4424 4425 group = self._parse_group() 4426 4427 self._match_r_paren() 4428 4429 pivot = self.expression( 4430 exp.Pivot, 4431 expressions=expressions, 4432 fields=fields, 4433 unpivot=unpivot, 4434 include_nulls=include_nulls, 4435 default_on_null=default_on_null, 4436 group=group, 4437 ) 4438 4439 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4440 pivot.set("alias", self._parse_table_alias()) 4441 4442 if not unpivot: 4443 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4444 4445 columns: t.List[exp.Expression] = [] 4446 all_fields = [] 4447 for pivot_field in pivot.fields: 4448 pivot_field_expressions = pivot_field.expressions 4449 4450 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4451 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4452 continue 4453 4454 all_fields.append( 4455 [ 4456 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4457 for fld in pivot_field_expressions 4458 ] 4459 ) 4460 4461 if all_fields: 4462 if names: 4463 all_fields.append(names) 4464 4465 # Generate all possible combinations of the pivot columns 4466 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4467 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4468 for fld_parts_tuple in itertools.product(*all_fields): 4469 fld_parts = list(fld_parts_tuple) 4470 4471 if names and self.PREFIXED_PIVOT_COLUMNS: 4472 # Move the "name" to the front of the list 4473 fld_parts.insert(0, fld_parts.pop(-1)) 4474 4475 columns.append(exp.to_identifier("_".join(fld_parts))) 4476 4477 pivot.set("columns", columns) 4478 4479 return pivot 4480 4481 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4482 return [agg.alias for agg in aggregations if agg.alias] 4483 4484 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4485 if not skip_where_token and not self._match(TokenType.PREWHERE): 4486 return None 4487 4488 return self.expression( 4489 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4490 ) 4491 4492 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4493 if not skip_where_token and not self._match(TokenType.WHERE): 4494 return None 4495 4496 return self.expression( 4497 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4498 ) 4499 4500 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4501 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4502 return None 4503 comments = self._prev_comments 4504 4505 elements: t.Dict[str, t.Any] = defaultdict(list) 4506 4507 if self._match(TokenType.ALL): 4508 elements["all"] = True 4509 elif self._match(TokenType.DISTINCT): 4510 elements["all"] = False 4511 4512 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4513 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4514 4515 while True: 4516 index = self._index 4517 4518 elements["expressions"].extend( 4519 self._parse_csv( 4520 lambda: None 4521 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4522 else self._parse_assignment() 4523 ) 4524 ) 4525 4526 before_with_index = self._index 4527 with_prefix = self._match(TokenType.WITH) 4528 4529 if self._match(TokenType.ROLLUP): 4530 elements["rollup"].append( 4531 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4532 ) 4533 elif self._match(TokenType.CUBE): 4534 elements["cube"].append( 4535 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4536 ) 4537 elif self._match(TokenType.GROUPING_SETS): 4538 elements["grouping_sets"].append( 4539 self.expression( 4540 exp.GroupingSets, 4541 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4542 ) 4543 ) 4544 elif self._match_text_seq("TOTALS"): 4545 elements["totals"] = True # type: ignore 4546 4547 if before_with_index <= self._index <= before_with_index + 1: 4548 self._retreat(before_with_index) 4549 break 4550 4551 if index == self._index: 4552 break 4553 4554 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4555 4556 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4557 return self.expression( 4558 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4559 ) 4560 4561 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4562 if self._match(TokenType.L_PAREN): 4563 grouping_set = self._parse_csv(self._parse_column) 4564 self._match_r_paren() 4565 return self.expression(exp.Tuple, expressions=grouping_set) 4566 4567 return self._parse_column() 4568 4569 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4570 if not skip_having_token and not self._match(TokenType.HAVING): 4571 return None 4572 return self.expression( 4573 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4574 ) 4575 4576 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4577 if not self._match(TokenType.QUALIFY): 4578 return None 4579 return self.expression(exp.Qualify, this=self._parse_assignment()) 4580 4581 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4582 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4583 exp.Prior, this=self._parse_bitwise() 4584 ) 4585 connect = self._parse_assignment() 4586 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4587 return connect 4588 4589 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4590 if skip_start_token: 4591 start = None 4592 elif self._match(TokenType.START_WITH): 4593 start = self._parse_assignment() 4594 else: 4595 return None 4596 4597 self._match(TokenType.CONNECT_BY) 4598 nocycle = self._match_text_seq("NOCYCLE") 4599 connect = self._parse_connect_with_prior() 4600 4601 if not start and self._match(TokenType.START_WITH): 4602 start = self._parse_assignment() 4603 4604 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4605 4606 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4607 this = self._parse_id_var(any_token=True) 4608 if self._match(TokenType.ALIAS): 4609 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4610 return this 4611 4612 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4613 if self._match_text_seq("INTERPOLATE"): 4614 return self._parse_wrapped_csv(self._parse_name_as_expression) 4615 return None 4616 4617 def _parse_order( 4618 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4619 ) -> t.Optional[exp.Expression]: 4620 siblings = None 4621 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4622 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4623 return this 4624 4625 siblings = True 4626 4627 return self.expression( 4628 exp.Order, 4629 comments=self._prev_comments, 4630 this=this, 4631 expressions=self._parse_csv(self._parse_ordered), 4632 siblings=siblings, 4633 ) 4634 4635 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4636 if not self._match(token): 4637 return None 4638 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4639 4640 def _parse_ordered( 4641 self, parse_method: t.Optional[t.Callable] = None 4642 ) -> t.Optional[exp.Ordered]: 4643 this = parse_method() if parse_method else self._parse_assignment() 4644 if not this: 4645 return None 4646 4647 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4648 this = exp.var("ALL") 4649 4650 asc = self._match(TokenType.ASC) 4651 desc = self._match(TokenType.DESC) or (asc and False) 4652 4653 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4654 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4655 4656 nulls_first = is_nulls_first or False 4657 explicitly_null_ordered = is_nulls_first or is_nulls_last 4658 4659 if ( 4660 not explicitly_null_ordered 4661 and ( 4662 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4663 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4664 ) 4665 and self.dialect.NULL_ORDERING != "nulls_are_last" 4666 ): 4667 nulls_first = True 4668 4669 if self._match_text_seq("WITH", "FILL"): 4670 with_fill = self.expression( 4671 exp.WithFill, 4672 **{ # type: ignore 4673 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4674 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4675 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4676 "interpolate": self._parse_interpolate(), 4677 }, 4678 ) 4679 else: 4680 with_fill = None 4681 4682 return self.expression( 4683 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4684 ) 4685 4686 def _parse_limit_options(self) -> exp.LimitOptions: 4687 percent = self._match(TokenType.PERCENT) 4688 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4689 self._match_text_seq("ONLY") 4690 with_ties = self._match_text_seq("WITH", "TIES") 4691 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4692 4693 def _parse_limit( 4694 self, 4695 this: t.Optional[exp.Expression] = None, 4696 top: bool = False, 4697 skip_limit_token: bool = False, 4698 ) -> t.Optional[exp.Expression]: 4699 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4700 comments = self._prev_comments 4701 if top: 4702 limit_paren = self._match(TokenType.L_PAREN) 4703 expression = self._parse_term() if limit_paren else self._parse_number() 4704 4705 if limit_paren: 4706 self._match_r_paren() 4707 4708 limit_options = self._parse_limit_options() 4709 else: 4710 limit_options = None 4711 expression = self._parse_term() 4712 4713 if self._match(TokenType.COMMA): 4714 offset = expression 4715 expression = self._parse_term() 4716 else: 4717 offset = None 4718 4719 limit_exp = self.expression( 4720 exp.Limit, 4721 this=this, 4722 expression=expression, 4723 offset=offset, 4724 comments=comments, 4725 limit_options=limit_options, 4726 expressions=self._parse_limit_by(), 4727 ) 4728 4729 return limit_exp 4730 4731 if self._match(TokenType.FETCH): 4732 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4733 direction = self._prev.text.upper() if direction else "FIRST" 4734 4735 count = self._parse_field(tokens=self.FETCH_TOKENS) 4736 4737 return self.expression( 4738 exp.Fetch, 4739 direction=direction, 4740 count=count, 4741 limit_options=self._parse_limit_options(), 4742 ) 4743 4744 return this 4745 4746 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4747 if not self._match(TokenType.OFFSET): 4748 return this 4749 4750 count = self._parse_term() 4751 self._match_set((TokenType.ROW, TokenType.ROWS)) 4752 4753 return self.expression( 4754 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4755 ) 4756 4757 def _can_parse_limit_or_offset(self) -> bool: 4758 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4759 return False 4760 4761 index = self._index 4762 result = bool( 4763 self._try_parse(self._parse_limit, retreat=True) 4764 or self._try_parse(self._parse_offset, retreat=True) 4765 ) 4766 self._retreat(index) 4767 return result 4768 4769 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4770 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4771 4772 def _parse_locks(self) -> t.List[exp.Lock]: 4773 locks = [] 4774 while True: 4775 update, key = None, None 4776 if self._match_text_seq("FOR", "UPDATE"): 4777 update = True 4778 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4779 "LOCK", "IN", "SHARE", "MODE" 4780 ): 4781 update = False 4782 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4783 update, key = False, True 4784 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4785 update, key = True, True 4786 else: 4787 break 4788 4789 expressions = None 4790 if self._match_text_seq("OF"): 4791 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4792 4793 wait: t.Optional[bool | exp.Expression] = None 4794 if self._match_text_seq("NOWAIT"): 4795 wait = True 4796 elif self._match_text_seq("WAIT"): 4797 wait = self._parse_primary() 4798 elif self._match_text_seq("SKIP", "LOCKED"): 4799 wait = False 4800 4801 locks.append( 4802 self.expression( 4803 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4804 ) 4805 ) 4806 4807 return locks 4808 4809 def parse_set_operation( 4810 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4811 ) -> t.Optional[exp.Expression]: 4812 start = self._index 4813 _, side_token, kind_token = self._parse_join_parts() 4814 4815 side = side_token.text if side_token else None 4816 kind = kind_token.text if kind_token else None 4817 4818 if not self._match_set(self.SET_OPERATIONS): 4819 self._retreat(start) 4820 return None 4821 4822 token_type = self._prev.token_type 4823 4824 if token_type == TokenType.UNION: 4825 operation: t.Type[exp.SetOperation] = exp.Union 4826 elif token_type == TokenType.EXCEPT: 4827 operation = exp.Except 4828 else: 4829 operation = exp.Intersect 4830 4831 comments = self._prev.comments 4832 4833 if self._match(TokenType.DISTINCT): 4834 distinct: t.Optional[bool] = True 4835 elif self._match(TokenType.ALL): 4836 distinct = False 4837 else: 4838 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4839 if distinct is None: 4840 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4841 4842 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4843 "STRICT", "CORRESPONDING" 4844 ) 4845 if self._match_text_seq("CORRESPONDING"): 4846 by_name = True 4847 if not side and not kind: 4848 kind = "INNER" 4849 4850 on_column_list = None 4851 if by_name and self._match_texts(("ON", "BY")): 4852 on_column_list = self._parse_wrapped_csv(self._parse_column) 4853 4854 expression = self._parse_select( 4855 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4856 ) 4857 4858 return self.expression( 4859 operation, 4860 comments=comments, 4861 this=this, 4862 distinct=distinct, 4863 by_name=by_name, 4864 expression=expression, 4865 side=side, 4866 kind=kind, 4867 on=on_column_list, 4868 ) 4869 4870 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4871 while this: 4872 setop = self.parse_set_operation(this) 4873 if not setop: 4874 break 4875 this = setop 4876 4877 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4878 expression = this.expression 4879 4880 if expression: 4881 for arg in self.SET_OP_MODIFIERS: 4882 expr = expression.args.get(arg) 4883 if expr: 4884 this.set(arg, expr.pop()) 4885 4886 return this 4887 4888 def _parse_expression(self) -> t.Optional[exp.Expression]: 4889 return self._parse_alias(self._parse_assignment()) 4890 4891 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4892 this = self._parse_disjunction() 4893 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4894 # This allows us to parse <non-identifier token> := <expr> 4895 this = exp.column( 4896 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4897 ) 4898 4899 while self._match_set(self.ASSIGNMENT): 4900 if isinstance(this, exp.Column) and len(this.parts) == 1: 4901 this = this.this 4902 4903 this = self.expression( 4904 self.ASSIGNMENT[self._prev.token_type], 4905 this=this, 4906 comments=self._prev_comments, 4907 expression=self._parse_assignment(), 4908 ) 4909 4910 return this 4911 4912 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4913 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4914 4915 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4916 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4917 4918 def _parse_equality(self) -> t.Optional[exp.Expression]: 4919 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4920 4921 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4922 return self._parse_tokens(self._parse_range, self.COMPARISON) 4923 4924 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4925 this = this or self._parse_bitwise() 4926 negate = self._match(TokenType.NOT) 4927 4928 if self._match_set(self.RANGE_PARSERS): 4929 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4930 if not expression: 4931 return this 4932 4933 this = expression 4934 elif self._match(TokenType.ISNULL): 4935 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4936 4937 # Postgres supports ISNULL and NOTNULL for conditions. 4938 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4939 if self._match(TokenType.NOTNULL): 4940 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4941 this = self.expression(exp.Not, this=this) 4942 4943 if negate: 4944 this = self._negate_range(this) 4945 4946 if self._match(TokenType.IS): 4947 this = self._parse_is(this) 4948 4949 return this 4950 4951 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4952 if not this: 4953 return this 4954 4955 return self.expression(exp.Not, this=this) 4956 4957 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4958 index = self._index - 1 4959 negate = self._match(TokenType.NOT) 4960 4961 if self._match_text_seq("DISTINCT", "FROM"): 4962 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4963 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4964 4965 if self._match(TokenType.JSON): 4966 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4967 4968 if self._match_text_seq("WITH"): 4969 _with = True 4970 elif self._match_text_seq("WITHOUT"): 4971 _with = False 4972 else: 4973 _with = None 4974 4975 unique = self._match(TokenType.UNIQUE) 4976 self._match_text_seq("KEYS") 4977 expression: t.Optional[exp.Expression] = self.expression( 4978 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4979 ) 4980 else: 4981 expression = self._parse_primary() or self._parse_null() 4982 if not expression: 4983 self._retreat(index) 4984 return None 4985 4986 this = self.expression(exp.Is, this=this, expression=expression) 4987 return self.expression(exp.Not, this=this) if negate else this 4988 4989 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4990 unnest = self._parse_unnest(with_alias=False) 4991 if unnest: 4992 this = self.expression(exp.In, this=this, unnest=unnest) 4993 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4994 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4995 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4996 4997 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4998 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4999 else: 5000 this = self.expression(exp.In, this=this, expressions=expressions) 5001 5002 if matched_l_paren: 5003 self._match_r_paren(this) 5004 elif not self._match(TokenType.R_BRACKET, expression=this): 5005 self.raise_error("Expecting ]") 5006 else: 5007 this = self.expression(exp.In, this=this, field=self._parse_column()) 5008 5009 return this 5010 5011 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5012 symmetric = None 5013 if self._match_text_seq("SYMMETRIC"): 5014 symmetric = True 5015 elif self._match_text_seq("ASYMMETRIC"): 5016 symmetric = False 5017 5018 low = self._parse_bitwise() 5019 self._match(TokenType.AND) 5020 high = self._parse_bitwise() 5021 5022 return self.expression( 5023 exp.Between, 5024 this=this, 5025 low=low, 5026 high=high, 5027 symmetric=symmetric, 5028 ) 5029 5030 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5031 if not self._match(TokenType.ESCAPE): 5032 return this 5033 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5034 5035 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5036 index = self._index 5037 5038 if not self._match(TokenType.INTERVAL) and match_interval: 5039 return None 5040 5041 if self._match(TokenType.STRING, advance=False): 5042 this = self._parse_primary() 5043 else: 5044 this = self._parse_term() 5045 5046 if not this or ( 5047 isinstance(this, exp.Column) 5048 and not this.table 5049 and not this.this.quoted 5050 and this.name.upper() == "IS" 5051 ): 5052 self._retreat(index) 5053 return None 5054 5055 unit = self._parse_function() or ( 5056 not self._match(TokenType.ALIAS, advance=False) 5057 and self._parse_var(any_token=True, upper=True) 5058 ) 5059 5060 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5061 # each INTERVAL expression into this canonical form so it's easy to transpile 5062 if this and this.is_number: 5063 this = exp.Literal.string(this.to_py()) 5064 elif this and this.is_string: 5065 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5066 if parts and unit: 5067 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5068 unit = None 5069 self._retreat(self._index - 1) 5070 5071 if len(parts) == 1: 5072 this = exp.Literal.string(parts[0][0]) 5073 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5074 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5075 unit = self.expression( 5076 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5077 ) 5078 5079 interval = self.expression(exp.Interval, this=this, unit=unit) 5080 5081 index = self._index 5082 self._match(TokenType.PLUS) 5083 5084 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5085 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5086 return self.expression( 5087 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5088 ) 5089 5090 self._retreat(index) 5091 return interval 5092 5093 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5094 this = self._parse_term() 5095 5096 while True: 5097 if self._match_set(self.BITWISE): 5098 this = self.expression( 5099 self.BITWISE[self._prev.token_type], 5100 this=this, 5101 expression=self._parse_term(), 5102 ) 5103 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5104 this = self.expression( 5105 exp.DPipe, 5106 this=this, 5107 expression=self._parse_term(), 5108 safe=not self.dialect.STRICT_STRING_CONCAT, 5109 ) 5110 elif self._match(TokenType.DQMARK): 5111 this = self.expression( 5112 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5113 ) 5114 elif self._match_pair(TokenType.LT, TokenType.LT): 5115 this = self.expression( 5116 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5117 ) 5118 elif self._match_pair(TokenType.GT, TokenType.GT): 5119 this = self.expression( 5120 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5121 ) 5122 else: 5123 break 5124 5125 return this 5126 5127 def _parse_term(self) -> t.Optional[exp.Expression]: 5128 this = self._parse_factor() 5129 5130 while self._match_set(self.TERM): 5131 klass = self.TERM[self._prev.token_type] 5132 comments = self._prev_comments 5133 expression = self._parse_factor() 5134 5135 this = self.expression(klass, this=this, comments=comments, expression=expression) 5136 5137 if isinstance(this, exp.Collate): 5138 expr = this.expression 5139 5140 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5141 # fallback to Identifier / Var 5142 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5143 ident = expr.this 5144 if isinstance(ident, exp.Identifier): 5145 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5146 5147 return this 5148 5149 def _parse_factor(self) -> t.Optional[exp.Expression]: 5150 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5151 this = parse_method() 5152 5153 while self._match_set(self.FACTOR): 5154 klass = self.FACTOR[self._prev.token_type] 5155 comments = self._prev_comments 5156 expression = parse_method() 5157 5158 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5159 self._retreat(self._index - 1) 5160 return this 5161 5162 this = self.expression(klass, this=this, comments=comments, expression=expression) 5163 5164 if isinstance(this, exp.Div): 5165 this.args["typed"] = self.dialect.TYPED_DIVISION 5166 this.args["safe"] = self.dialect.SAFE_DIVISION 5167 5168 return this 5169 5170 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5171 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5172 5173 def _parse_unary(self) -> t.Optional[exp.Expression]: 5174 if self._match_set(self.UNARY_PARSERS): 5175 return self.UNARY_PARSERS[self._prev.token_type](self) 5176 return self._parse_at_time_zone(self._parse_type()) 5177 5178 def _parse_type( 5179 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5180 ) -> t.Optional[exp.Expression]: 5181 interval = parse_interval and self._parse_interval() 5182 if interval: 5183 return interval 5184 5185 index = self._index 5186 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5187 5188 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5189 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5190 if isinstance(data_type, exp.Cast): 5191 # This constructor can contain ops directly after it, for instance struct unnesting: 5192 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5193 return self._parse_column_ops(data_type) 5194 5195 if data_type: 5196 index2 = self._index 5197 this = self._parse_primary() 5198 5199 if isinstance(this, exp.Literal): 5200 literal = this.name 5201 this = self._parse_column_ops(this) 5202 5203 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5204 if parser: 5205 return parser(self, this, data_type) 5206 5207 if ( 5208 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5209 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5210 and TIME_ZONE_RE.search(literal) 5211 ): 5212 data_type = exp.DataType.build("TIMESTAMPTZ") 5213 5214 return self.expression(exp.Cast, this=this, to=data_type) 5215 5216 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5217 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5218 # 5219 # If the index difference here is greater than 1, that means the parser itself must have 5220 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5221 # 5222 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5223 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5224 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5225 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5226 # 5227 # In these cases, we don't really want to return the converted type, but instead retreat 5228 # and try to parse a Column or Identifier in the section below. 5229 if data_type.expressions and index2 - index > 1: 5230 self._retreat(index2) 5231 return self._parse_column_ops(data_type) 5232 5233 self._retreat(index) 5234 5235 if fallback_to_identifier: 5236 return self._parse_id_var() 5237 5238 this = self._parse_column() 5239 return this and self._parse_column_ops(this) 5240 5241 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5242 this = self._parse_type() 5243 if not this: 5244 return None 5245 5246 if isinstance(this, exp.Column) and not this.table: 5247 this = exp.var(this.name.upper()) 5248 5249 return self.expression( 5250 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5251 ) 5252 5253 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5254 type_name = identifier.name 5255 5256 while self._match(TokenType.DOT): 5257 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5258 5259 return exp.DataType.build(type_name, udt=True) 5260 5261 def _parse_types( 5262 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5263 ) -> t.Optional[exp.Expression]: 5264 index = self._index 5265 5266 this: t.Optional[exp.Expression] = None 5267 prefix = self._match_text_seq("SYSUDTLIB", ".") 5268 5269 if not self._match_set(self.TYPE_TOKENS): 5270 identifier = allow_identifiers and self._parse_id_var( 5271 any_token=False, tokens=(TokenType.VAR,) 5272 ) 5273 if isinstance(identifier, exp.Identifier): 5274 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5275 5276 if len(tokens) != 1: 5277 self.raise_error("Unexpected identifier", self._prev) 5278 5279 if tokens[0].token_type in self.TYPE_TOKENS: 5280 self._prev = tokens[0] 5281 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5282 this = self._parse_user_defined_type(identifier) 5283 else: 5284 self._retreat(self._index - 1) 5285 return None 5286 else: 5287 return None 5288 5289 type_token = self._prev.token_type 5290 5291 if type_token == TokenType.PSEUDO_TYPE: 5292 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5293 5294 if type_token == TokenType.OBJECT_IDENTIFIER: 5295 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5296 5297 # https://materialize.com/docs/sql/types/map/ 5298 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5299 key_type = self._parse_types( 5300 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5301 ) 5302 if not self._match(TokenType.FARROW): 5303 self._retreat(index) 5304 return None 5305 5306 value_type = self._parse_types( 5307 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5308 ) 5309 if not self._match(TokenType.R_BRACKET): 5310 self._retreat(index) 5311 return None 5312 5313 return exp.DataType( 5314 this=exp.DataType.Type.MAP, 5315 expressions=[key_type, value_type], 5316 nested=True, 5317 prefix=prefix, 5318 ) 5319 5320 nested = type_token in self.NESTED_TYPE_TOKENS 5321 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5322 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5323 expressions = None 5324 maybe_func = False 5325 5326 if self._match(TokenType.L_PAREN): 5327 if is_struct: 5328 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5329 elif nested: 5330 expressions = self._parse_csv( 5331 lambda: self._parse_types( 5332 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5333 ) 5334 ) 5335 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5336 this = expressions[0] 5337 this.set("nullable", True) 5338 self._match_r_paren() 5339 return this 5340 elif type_token in self.ENUM_TYPE_TOKENS: 5341 expressions = self._parse_csv(self._parse_equality) 5342 elif is_aggregate: 5343 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5344 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5345 ) 5346 if not func_or_ident: 5347 return None 5348 expressions = [func_or_ident] 5349 if self._match(TokenType.COMMA): 5350 expressions.extend( 5351 self._parse_csv( 5352 lambda: self._parse_types( 5353 check_func=check_func, 5354 schema=schema, 5355 allow_identifiers=allow_identifiers, 5356 ) 5357 ) 5358 ) 5359 else: 5360 expressions = self._parse_csv(self._parse_type_size) 5361 5362 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5363 if type_token == TokenType.VECTOR and len(expressions) == 2: 5364 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5365 5366 if not expressions or not self._match(TokenType.R_PAREN): 5367 self._retreat(index) 5368 return None 5369 5370 maybe_func = True 5371 5372 values: t.Optional[t.List[exp.Expression]] = None 5373 5374 if nested and self._match(TokenType.LT): 5375 if is_struct: 5376 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5377 else: 5378 expressions = self._parse_csv( 5379 lambda: self._parse_types( 5380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5381 ) 5382 ) 5383 5384 if not self._match(TokenType.GT): 5385 self.raise_error("Expecting >") 5386 5387 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5388 values = self._parse_csv(self._parse_assignment) 5389 if not values and is_struct: 5390 values = None 5391 self._retreat(self._index - 1) 5392 else: 5393 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5394 5395 if type_token in self.TIMESTAMPS: 5396 if self._match_text_seq("WITH", "TIME", "ZONE"): 5397 maybe_func = False 5398 tz_type = ( 5399 exp.DataType.Type.TIMETZ 5400 if type_token in self.TIMES 5401 else exp.DataType.Type.TIMESTAMPTZ 5402 ) 5403 this = exp.DataType(this=tz_type, expressions=expressions) 5404 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5405 maybe_func = False 5406 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5407 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5408 maybe_func = False 5409 elif type_token == TokenType.INTERVAL: 5410 unit = self._parse_var(upper=True) 5411 if unit: 5412 if self._match_text_seq("TO"): 5413 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5414 5415 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5416 else: 5417 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5418 elif type_token == TokenType.VOID: 5419 this = exp.DataType(this=exp.DataType.Type.NULL) 5420 5421 if maybe_func and check_func: 5422 index2 = self._index 5423 peek = self._parse_string() 5424 5425 if not peek: 5426 self._retreat(index) 5427 return None 5428 5429 self._retreat(index2) 5430 5431 if not this: 5432 if self._match_text_seq("UNSIGNED"): 5433 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5434 if not unsigned_type_token: 5435 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5436 5437 type_token = unsigned_type_token or type_token 5438 5439 this = exp.DataType( 5440 this=exp.DataType.Type[type_token.value], 5441 expressions=expressions, 5442 nested=nested, 5443 prefix=prefix, 5444 ) 5445 5446 # Empty arrays/structs are allowed 5447 if values is not None: 5448 cls = exp.Struct if is_struct else exp.Array 5449 this = exp.cast(cls(expressions=values), this, copy=False) 5450 5451 elif expressions: 5452 this.set("expressions", expressions) 5453 5454 # https://materialize.com/docs/sql/types/list/#type-name 5455 while self._match(TokenType.LIST): 5456 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5457 5458 index = self._index 5459 5460 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5461 matched_array = self._match(TokenType.ARRAY) 5462 5463 while self._curr: 5464 datatype_token = self._prev.token_type 5465 matched_l_bracket = self._match(TokenType.L_BRACKET) 5466 5467 if (not matched_l_bracket and not matched_array) or ( 5468 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5469 ): 5470 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5471 # not to be confused with the fixed size array parsing 5472 break 5473 5474 matched_array = False 5475 values = self._parse_csv(self._parse_assignment) or None 5476 if ( 5477 values 5478 and not schema 5479 and ( 5480 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5481 ) 5482 ): 5483 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5484 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5485 self._retreat(index) 5486 break 5487 5488 this = exp.DataType( 5489 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5490 ) 5491 self._match(TokenType.R_BRACKET) 5492 5493 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5494 converter = self.TYPE_CONVERTERS.get(this.this) 5495 if converter: 5496 this = converter(t.cast(exp.DataType, this)) 5497 5498 return this 5499 5500 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5501 index = self._index 5502 5503 if ( 5504 self._curr 5505 and self._next 5506 and self._curr.token_type in self.TYPE_TOKENS 5507 and self._next.token_type in self.TYPE_TOKENS 5508 ): 5509 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5510 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5511 this = self._parse_id_var() 5512 else: 5513 this = ( 5514 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5515 or self._parse_id_var() 5516 ) 5517 5518 self._match(TokenType.COLON) 5519 5520 if ( 5521 type_required 5522 and not isinstance(this, exp.DataType) 5523 and not self._match_set(self.TYPE_TOKENS, advance=False) 5524 ): 5525 self._retreat(index) 5526 return self._parse_types() 5527 5528 return self._parse_column_def(this) 5529 5530 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5531 if not self._match_text_seq("AT", "TIME", "ZONE"): 5532 return this 5533 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5534 5535 def _parse_column(self) -> t.Optional[exp.Expression]: 5536 this = self._parse_column_reference() 5537 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5538 5539 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5540 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5541 5542 return column 5543 5544 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5545 this = self._parse_field() 5546 if ( 5547 not this 5548 and self._match(TokenType.VALUES, advance=False) 5549 and self.VALUES_FOLLOWED_BY_PAREN 5550 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5551 ): 5552 this = self._parse_id_var() 5553 5554 if isinstance(this, exp.Identifier): 5555 # We bubble up comments from the Identifier to the Column 5556 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5557 5558 return this 5559 5560 def _parse_colon_as_variant_extract( 5561 self, this: t.Optional[exp.Expression] 5562 ) -> t.Optional[exp.Expression]: 5563 casts = [] 5564 json_path = [] 5565 escape = None 5566 5567 while self._match(TokenType.COLON): 5568 start_index = self._index 5569 5570 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5571 path = self._parse_column_ops( 5572 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5573 ) 5574 5575 # The cast :: operator has a lower precedence than the extraction operator :, so 5576 # we rearrange the AST appropriately to avoid casting the JSON path 5577 while isinstance(path, exp.Cast): 5578 casts.append(path.to) 5579 path = path.this 5580 5581 if casts: 5582 dcolon_offset = next( 5583 i 5584 for i, t in enumerate(self._tokens[start_index:]) 5585 if t.token_type == TokenType.DCOLON 5586 ) 5587 end_token = self._tokens[start_index + dcolon_offset - 1] 5588 else: 5589 end_token = self._prev 5590 5591 if path: 5592 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5593 # it'll roundtrip to a string literal in GET_PATH 5594 if isinstance(path, exp.Identifier) and path.quoted: 5595 escape = True 5596 5597 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5598 5599 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5600 # Databricks transforms it back to the colon/dot notation 5601 if json_path: 5602 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5603 5604 if json_path_expr: 5605 json_path_expr.set("escape", escape) 5606 5607 this = self.expression( 5608 exp.JSONExtract, 5609 this=this, 5610 expression=json_path_expr, 5611 variant_extract=True, 5612 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5613 ) 5614 5615 while casts: 5616 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5617 5618 return this 5619 5620 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5621 return self._parse_types() 5622 5623 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5624 this = self._parse_bracket(this) 5625 5626 while self._match_set(self.COLUMN_OPERATORS): 5627 op_token = self._prev.token_type 5628 op = self.COLUMN_OPERATORS.get(op_token) 5629 5630 if op_token in self.CAST_COLUMN_OPERATORS: 5631 field = self._parse_dcolon() 5632 if not field: 5633 self.raise_error("Expected type") 5634 elif op and self._curr: 5635 field = self._parse_column_reference() or self._parse_bracket() 5636 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5637 field = self._parse_column_ops(field) 5638 else: 5639 field = self._parse_field(any_token=True, anonymous_func=True) 5640 5641 # Function calls can be qualified, e.g., x.y.FOO() 5642 # This converts the final AST to a series of Dots leading to the function call 5643 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5644 if isinstance(field, (exp.Func, exp.Window)) and this: 5645 this = this.transform( 5646 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5647 ) 5648 5649 if op: 5650 this = op(self, this, field) 5651 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5652 this = self.expression( 5653 exp.Column, 5654 comments=this.comments, 5655 this=field, 5656 table=this.this, 5657 db=this.args.get("table"), 5658 catalog=this.args.get("db"), 5659 ) 5660 elif isinstance(field, exp.Window): 5661 # Move the exp.Dot's to the window's function 5662 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5663 field.set("this", window_func) 5664 this = field 5665 else: 5666 this = self.expression(exp.Dot, this=this, expression=field) 5667 5668 if field and field.comments: 5669 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5670 5671 this = self._parse_bracket(this) 5672 5673 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5674 5675 def _parse_paren(self) -> t.Optional[exp.Expression]: 5676 if not self._match(TokenType.L_PAREN): 5677 return None 5678 5679 comments = self._prev_comments 5680 query = self._parse_select() 5681 5682 if query: 5683 expressions = [query] 5684 else: 5685 expressions = self._parse_expressions() 5686 5687 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5688 5689 if not this and self._match(TokenType.R_PAREN, advance=False): 5690 this = self.expression(exp.Tuple) 5691 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5692 this = self._parse_subquery(this=this, parse_alias=False) 5693 elif isinstance(this, exp.Subquery): 5694 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5695 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5696 this = self.expression(exp.Tuple, expressions=expressions) 5697 else: 5698 this = self.expression(exp.Paren, this=this) 5699 5700 if this: 5701 this.add_comments(comments) 5702 5703 self._match_r_paren(expression=this) 5704 return this 5705 5706 def _parse_primary(self) -> t.Optional[exp.Expression]: 5707 if self._match_set(self.PRIMARY_PARSERS): 5708 token_type = self._prev.token_type 5709 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5710 5711 if token_type == TokenType.STRING: 5712 expressions = [primary] 5713 while self._match(TokenType.STRING): 5714 expressions.append(exp.Literal.string(self._prev.text)) 5715 5716 if len(expressions) > 1: 5717 return self.expression(exp.Concat, expressions=expressions) 5718 5719 return primary 5720 5721 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5722 return exp.Literal.number(f"0.{self._prev.text}") 5723 5724 return self._parse_paren() 5725 5726 def _parse_field( 5727 self, 5728 any_token: bool = False, 5729 tokens: t.Optional[t.Collection[TokenType]] = None, 5730 anonymous_func: bool = False, 5731 ) -> t.Optional[exp.Expression]: 5732 if anonymous_func: 5733 field = ( 5734 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5735 or self._parse_primary() 5736 ) 5737 else: 5738 field = self._parse_primary() or self._parse_function( 5739 anonymous=anonymous_func, any_token=any_token 5740 ) 5741 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5742 5743 def _parse_function( 5744 self, 5745 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5746 anonymous: bool = False, 5747 optional_parens: bool = True, 5748 any_token: bool = False, 5749 ) -> t.Optional[exp.Expression]: 5750 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5751 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5752 fn_syntax = False 5753 if ( 5754 self._match(TokenType.L_BRACE, advance=False) 5755 and self._next 5756 and self._next.text.upper() == "FN" 5757 ): 5758 self._advance(2) 5759 fn_syntax = True 5760 5761 func = self._parse_function_call( 5762 functions=functions, 5763 anonymous=anonymous, 5764 optional_parens=optional_parens, 5765 any_token=any_token, 5766 ) 5767 5768 if fn_syntax: 5769 self._match(TokenType.R_BRACE) 5770 5771 return func 5772 5773 def _parse_function_call( 5774 self, 5775 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5776 anonymous: bool = False, 5777 optional_parens: bool = True, 5778 any_token: bool = False, 5779 ) -> t.Optional[exp.Expression]: 5780 if not self._curr: 5781 return None 5782 5783 comments = self._curr.comments 5784 prev = self._prev 5785 token = self._curr 5786 token_type = self._curr.token_type 5787 this = self._curr.text 5788 upper = this.upper() 5789 5790 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5791 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5792 self._advance() 5793 return self._parse_window(parser(self)) 5794 5795 if not self._next or self._next.token_type != TokenType.L_PAREN: 5796 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5797 self._advance() 5798 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5799 5800 return None 5801 5802 if any_token: 5803 if token_type in self.RESERVED_TOKENS: 5804 return None 5805 elif token_type not in self.FUNC_TOKENS: 5806 return None 5807 5808 self._advance(2) 5809 5810 parser = self.FUNCTION_PARSERS.get(upper) 5811 if parser and not anonymous: 5812 this = parser(self) 5813 else: 5814 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5815 5816 if subquery_predicate: 5817 expr = None 5818 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5819 expr = self._parse_select() 5820 self._match_r_paren() 5821 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5822 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5823 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5824 self._advance(-1) 5825 expr = self._parse_bitwise() 5826 5827 if expr: 5828 return self.expression(subquery_predicate, comments=comments, this=expr) 5829 5830 if functions is None: 5831 functions = self.FUNCTIONS 5832 5833 function = functions.get(upper) 5834 known_function = function and not anonymous 5835 5836 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5837 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5838 5839 post_func_comments = self._curr and self._curr.comments 5840 if known_function and post_func_comments: 5841 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5842 # call we'll construct it as exp.Anonymous, even if it's "known" 5843 if any( 5844 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5845 for comment in post_func_comments 5846 ): 5847 known_function = False 5848 5849 if alias and known_function: 5850 args = self._kv_to_prop_eq(args) 5851 5852 if known_function: 5853 func_builder = t.cast(t.Callable, function) 5854 5855 if "dialect" in func_builder.__code__.co_varnames: 5856 func = func_builder(args, dialect=self.dialect) 5857 else: 5858 func = func_builder(args) 5859 5860 func = self.validate_expression(func, args) 5861 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5862 func.meta["name"] = this 5863 5864 this = func 5865 else: 5866 if token_type == TokenType.IDENTIFIER: 5867 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5868 5869 this = self.expression(exp.Anonymous, this=this, expressions=args) 5870 this = this.update_positions(token) 5871 5872 if isinstance(this, exp.Expression): 5873 this.add_comments(comments) 5874 5875 self._match_r_paren(this) 5876 return self._parse_window(this) 5877 5878 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5879 return expression 5880 5881 def _kv_to_prop_eq( 5882 self, expressions: t.List[exp.Expression], parse_map: bool = False 5883 ) -> t.List[exp.Expression]: 5884 transformed = [] 5885 5886 for index, e in enumerate(expressions): 5887 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5888 if isinstance(e, exp.Alias): 5889 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5890 5891 if not isinstance(e, exp.PropertyEQ): 5892 e = self.expression( 5893 exp.PropertyEQ, 5894 this=e.this if parse_map else exp.to_identifier(e.this.name), 5895 expression=e.expression, 5896 ) 5897 5898 if isinstance(e.this, exp.Column): 5899 e.this.replace(e.this.this) 5900 else: 5901 e = self._to_prop_eq(e, index) 5902 5903 transformed.append(e) 5904 5905 return transformed 5906 5907 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5908 return self._parse_statement() 5909 5910 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5911 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5912 5913 def _parse_user_defined_function( 5914 self, kind: t.Optional[TokenType] = None 5915 ) -> t.Optional[exp.Expression]: 5916 this = self._parse_table_parts(schema=True) 5917 5918 if not self._match(TokenType.L_PAREN): 5919 return this 5920 5921 expressions = self._parse_csv(self._parse_function_parameter) 5922 self._match_r_paren() 5923 return self.expression( 5924 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5925 ) 5926 5927 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5928 literal = self._parse_primary() 5929 if literal: 5930 return self.expression(exp.Introducer, this=token.text, expression=literal) 5931 5932 return self._identifier_expression(token) 5933 5934 def _parse_session_parameter(self) -> exp.SessionParameter: 5935 kind = None 5936 this = self._parse_id_var() or self._parse_primary() 5937 5938 if this and self._match(TokenType.DOT): 5939 kind = this.name 5940 this = self._parse_var() or self._parse_primary() 5941 5942 return self.expression(exp.SessionParameter, this=this, kind=kind) 5943 5944 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5945 return self._parse_id_var() 5946 5947 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5948 index = self._index 5949 5950 if self._match(TokenType.L_PAREN): 5951 expressions = t.cast( 5952 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5953 ) 5954 5955 if not self._match(TokenType.R_PAREN): 5956 self._retreat(index) 5957 else: 5958 expressions = [self._parse_lambda_arg()] 5959 5960 if self._match_set(self.LAMBDAS): 5961 return self.LAMBDAS[self._prev.token_type](self, expressions) 5962 5963 self._retreat(index) 5964 5965 this: t.Optional[exp.Expression] 5966 5967 if self._match(TokenType.DISTINCT): 5968 this = self.expression( 5969 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5970 ) 5971 else: 5972 this = self._parse_select_or_expression(alias=alias) 5973 5974 return self._parse_limit( 5975 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5976 ) 5977 5978 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5979 index = self._index 5980 if not self._match(TokenType.L_PAREN): 5981 return this 5982 5983 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5984 # expr can be of both types 5985 if self._match_set(self.SELECT_START_TOKENS): 5986 self._retreat(index) 5987 return this 5988 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5989 self._match_r_paren() 5990 return self.expression(exp.Schema, this=this, expressions=args) 5991 5992 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5993 return self._parse_column_def(self._parse_field(any_token=True)) 5994 5995 def _parse_column_def( 5996 self, this: t.Optional[exp.Expression], computed_column: bool = True 5997 ) -> t.Optional[exp.Expression]: 5998 # column defs are not really columns, they're identifiers 5999 if isinstance(this, exp.Column): 6000 this = this.this 6001 6002 if not computed_column: 6003 self._match(TokenType.ALIAS) 6004 6005 kind = self._parse_types(schema=True) 6006 6007 if self._match_text_seq("FOR", "ORDINALITY"): 6008 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6009 6010 constraints: t.List[exp.Expression] = [] 6011 6012 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6013 ("ALIAS", "MATERIALIZED") 6014 ): 6015 persisted = self._prev.text.upper() == "MATERIALIZED" 6016 constraint_kind = exp.ComputedColumnConstraint( 6017 this=self._parse_assignment(), 6018 persisted=persisted or self._match_text_seq("PERSISTED"), 6019 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6020 ) 6021 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6022 elif ( 6023 kind 6024 and self._match(TokenType.ALIAS, advance=False) 6025 and ( 6026 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6027 or (self._next and self._next.token_type == TokenType.L_PAREN) 6028 ) 6029 ): 6030 self._advance() 6031 constraints.append( 6032 self.expression( 6033 exp.ColumnConstraint, 6034 kind=exp.ComputedColumnConstraint( 6035 this=self._parse_disjunction(), 6036 persisted=self._match_texts(("STORED", "VIRTUAL")) 6037 and self._prev.text.upper() == "STORED", 6038 ), 6039 ) 6040 ) 6041 6042 while True: 6043 constraint = self._parse_column_constraint() 6044 if not constraint: 6045 break 6046 constraints.append(constraint) 6047 6048 if not kind and not constraints: 6049 return this 6050 6051 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6052 6053 def _parse_auto_increment( 6054 self, 6055 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6056 start = None 6057 increment = None 6058 order = None 6059 6060 if self._match(TokenType.L_PAREN, advance=False): 6061 args = self._parse_wrapped_csv(self._parse_bitwise) 6062 start = seq_get(args, 0) 6063 increment = seq_get(args, 1) 6064 elif self._match_text_seq("START"): 6065 start = self._parse_bitwise() 6066 self._match_text_seq("INCREMENT") 6067 increment = self._parse_bitwise() 6068 if self._match_text_seq("ORDER"): 6069 order = True 6070 elif self._match_text_seq("NOORDER"): 6071 order = False 6072 6073 if start and increment: 6074 return exp.GeneratedAsIdentityColumnConstraint( 6075 start=start, increment=increment, this=False, order=order 6076 ) 6077 6078 return exp.AutoIncrementColumnConstraint() 6079 6080 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6081 if not self._match_text_seq("REFRESH"): 6082 self._retreat(self._index - 1) 6083 return None 6084 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6085 6086 def _parse_compress(self) -> exp.CompressColumnConstraint: 6087 if self._match(TokenType.L_PAREN, advance=False): 6088 return self.expression( 6089 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6090 ) 6091 6092 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6093 6094 def _parse_generated_as_identity( 6095 self, 6096 ) -> ( 6097 exp.GeneratedAsIdentityColumnConstraint 6098 | exp.ComputedColumnConstraint 6099 | exp.GeneratedAsRowColumnConstraint 6100 ): 6101 if self._match_text_seq("BY", "DEFAULT"): 6102 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6103 this = self.expression( 6104 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6105 ) 6106 else: 6107 self._match_text_seq("ALWAYS") 6108 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6109 6110 self._match(TokenType.ALIAS) 6111 6112 if self._match_text_seq("ROW"): 6113 start = self._match_text_seq("START") 6114 if not start: 6115 self._match(TokenType.END) 6116 hidden = self._match_text_seq("HIDDEN") 6117 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6118 6119 identity = self._match_text_seq("IDENTITY") 6120 6121 if self._match(TokenType.L_PAREN): 6122 if self._match(TokenType.START_WITH): 6123 this.set("start", self._parse_bitwise()) 6124 if self._match_text_seq("INCREMENT", "BY"): 6125 this.set("increment", self._parse_bitwise()) 6126 if self._match_text_seq("MINVALUE"): 6127 this.set("minvalue", self._parse_bitwise()) 6128 if self._match_text_seq("MAXVALUE"): 6129 this.set("maxvalue", self._parse_bitwise()) 6130 6131 if self._match_text_seq("CYCLE"): 6132 this.set("cycle", True) 6133 elif self._match_text_seq("NO", "CYCLE"): 6134 this.set("cycle", False) 6135 6136 if not identity: 6137 this.set("expression", self._parse_range()) 6138 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6139 args = self._parse_csv(self._parse_bitwise) 6140 this.set("start", seq_get(args, 0)) 6141 this.set("increment", seq_get(args, 1)) 6142 6143 self._match_r_paren() 6144 6145 return this 6146 6147 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6148 self._match_text_seq("LENGTH") 6149 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6150 6151 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6152 if self._match_text_seq("NULL"): 6153 return self.expression(exp.NotNullColumnConstraint) 6154 if self._match_text_seq("CASESPECIFIC"): 6155 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6156 if self._match_text_seq("FOR", "REPLICATION"): 6157 return self.expression(exp.NotForReplicationColumnConstraint) 6158 6159 # Unconsume the `NOT` token 6160 self._retreat(self._index - 1) 6161 return None 6162 6163 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6164 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6165 6166 procedure_option_follows = ( 6167 self._match(TokenType.WITH, advance=False) 6168 and self._next 6169 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6170 ) 6171 6172 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6173 return self.expression( 6174 exp.ColumnConstraint, 6175 this=this, 6176 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6177 ) 6178 6179 return this 6180 6181 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6182 if not self._match(TokenType.CONSTRAINT): 6183 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6184 6185 return self.expression( 6186 exp.Constraint, 6187 this=self._parse_id_var(), 6188 expressions=self._parse_unnamed_constraints(), 6189 ) 6190 6191 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6192 constraints = [] 6193 while True: 6194 constraint = self._parse_unnamed_constraint() or self._parse_function() 6195 if not constraint: 6196 break 6197 constraints.append(constraint) 6198 6199 return constraints 6200 6201 def _parse_unnamed_constraint( 6202 self, constraints: t.Optional[t.Collection[str]] = None 6203 ) -> t.Optional[exp.Expression]: 6204 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6205 constraints or self.CONSTRAINT_PARSERS 6206 ): 6207 return None 6208 6209 constraint = self._prev.text.upper() 6210 if constraint not in self.CONSTRAINT_PARSERS: 6211 self.raise_error(f"No parser found for schema constraint {constraint}.") 6212 6213 return self.CONSTRAINT_PARSERS[constraint](self) 6214 6215 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6216 return self._parse_id_var(any_token=False) 6217 6218 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6219 self._match_texts(("KEY", "INDEX")) 6220 return self.expression( 6221 exp.UniqueColumnConstraint, 6222 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6223 this=self._parse_schema(self._parse_unique_key()), 6224 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6225 on_conflict=self._parse_on_conflict(), 6226 options=self._parse_key_constraint_options(), 6227 ) 6228 6229 def _parse_key_constraint_options(self) -> t.List[str]: 6230 options = [] 6231 while True: 6232 if not self._curr: 6233 break 6234 6235 if self._match(TokenType.ON): 6236 action = None 6237 on = self._advance_any() and self._prev.text 6238 6239 if self._match_text_seq("NO", "ACTION"): 6240 action = "NO ACTION" 6241 elif self._match_text_seq("CASCADE"): 6242 action = "CASCADE" 6243 elif self._match_text_seq("RESTRICT"): 6244 action = "RESTRICT" 6245 elif self._match_pair(TokenType.SET, TokenType.NULL): 6246 action = "SET NULL" 6247 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6248 action = "SET DEFAULT" 6249 else: 6250 self.raise_error("Invalid key constraint") 6251 6252 options.append(f"ON {on} {action}") 6253 else: 6254 var = self._parse_var_from_options( 6255 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6256 ) 6257 if not var: 6258 break 6259 options.append(var.name) 6260 6261 return options 6262 6263 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6264 if match and not self._match(TokenType.REFERENCES): 6265 return None 6266 6267 expressions = None 6268 this = self._parse_table(schema=True) 6269 options = self._parse_key_constraint_options() 6270 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6271 6272 def _parse_foreign_key(self) -> exp.ForeignKey: 6273 expressions = ( 6274 self._parse_wrapped_id_vars() 6275 if not self._match(TokenType.REFERENCES, advance=False) 6276 else None 6277 ) 6278 reference = self._parse_references() 6279 on_options = {} 6280 6281 while self._match(TokenType.ON): 6282 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6283 self.raise_error("Expected DELETE or UPDATE") 6284 6285 kind = self._prev.text.lower() 6286 6287 if self._match_text_seq("NO", "ACTION"): 6288 action = "NO ACTION" 6289 elif self._match(TokenType.SET): 6290 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6291 action = "SET " + self._prev.text.upper() 6292 else: 6293 self._advance() 6294 action = self._prev.text.upper() 6295 6296 on_options[kind] = action 6297 6298 return self.expression( 6299 exp.ForeignKey, 6300 expressions=expressions, 6301 reference=reference, 6302 options=self._parse_key_constraint_options(), 6303 **on_options, # type: ignore 6304 ) 6305 6306 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6307 return self._parse_ordered() or self._parse_field() 6308 6309 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6310 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6311 self._retreat(self._index - 1) 6312 return None 6313 6314 id_vars = self._parse_wrapped_id_vars() 6315 return self.expression( 6316 exp.PeriodForSystemTimeConstraint, 6317 this=seq_get(id_vars, 0), 6318 expression=seq_get(id_vars, 1), 6319 ) 6320 6321 def _parse_primary_key( 6322 self, wrapped_optional: bool = False, in_props: bool = False 6323 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6324 desc = ( 6325 self._match_set((TokenType.ASC, TokenType.DESC)) 6326 and self._prev.token_type == TokenType.DESC 6327 ) 6328 6329 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6330 return self.expression( 6331 exp.PrimaryKeyColumnConstraint, 6332 desc=desc, 6333 options=self._parse_key_constraint_options(), 6334 ) 6335 6336 expressions = self._parse_wrapped_csv( 6337 self._parse_primary_key_part, optional=wrapped_optional 6338 ) 6339 6340 return self.expression( 6341 exp.PrimaryKey, 6342 expressions=expressions, 6343 include=self._parse_index_params(), 6344 options=self._parse_key_constraint_options(), 6345 ) 6346 6347 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6348 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6349 6350 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6351 """ 6352 Parses a datetime column in ODBC format. We parse the column into the corresponding 6353 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6354 same as we did for `DATE('yyyy-mm-dd')`. 6355 6356 Reference: 6357 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6358 """ 6359 self._match(TokenType.VAR) 6360 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6361 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6362 if not self._match(TokenType.R_BRACE): 6363 self.raise_error("Expected }") 6364 return expression 6365 6366 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6367 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6368 return this 6369 6370 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6371 map_token = seq_get(self._tokens, self._index - 2) 6372 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6373 else: 6374 parse_map = False 6375 6376 bracket_kind = self._prev.token_type 6377 if ( 6378 bracket_kind == TokenType.L_BRACE 6379 and self._curr 6380 and self._curr.token_type == TokenType.VAR 6381 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6382 ): 6383 return self._parse_odbc_datetime_literal() 6384 6385 expressions = self._parse_csv( 6386 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6387 ) 6388 6389 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6390 self.raise_error("Expected ]") 6391 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6392 self.raise_error("Expected }") 6393 6394 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6395 if bracket_kind == TokenType.L_BRACE: 6396 this = self.expression( 6397 exp.Struct, 6398 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6399 ) 6400 elif not this: 6401 this = build_array_constructor( 6402 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6403 ) 6404 else: 6405 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6406 if constructor_type: 6407 return build_array_constructor( 6408 constructor_type, 6409 args=expressions, 6410 bracket_kind=bracket_kind, 6411 dialect=self.dialect, 6412 ) 6413 6414 expressions = apply_index_offset( 6415 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6416 ) 6417 this = self.expression( 6418 exp.Bracket, 6419 this=this, 6420 expressions=expressions, 6421 comments=this.pop_comments(), 6422 ) 6423 6424 self._add_comments(this) 6425 return self._parse_bracket(this) 6426 6427 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6428 if self._match(TokenType.COLON): 6429 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6430 return this 6431 6432 def _parse_case(self) -> t.Optional[exp.Expression]: 6433 ifs = [] 6434 default = None 6435 6436 comments = self._prev_comments 6437 expression = self._parse_assignment() 6438 6439 while self._match(TokenType.WHEN): 6440 this = self._parse_assignment() 6441 self._match(TokenType.THEN) 6442 then = self._parse_assignment() 6443 ifs.append(self.expression(exp.If, this=this, true=then)) 6444 6445 if self._match(TokenType.ELSE): 6446 default = self._parse_assignment() 6447 6448 if not self._match(TokenType.END): 6449 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6450 default = exp.column("interval") 6451 else: 6452 self.raise_error("Expected END after CASE", self._prev) 6453 6454 return self.expression( 6455 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6456 ) 6457 6458 def _parse_if(self) -> t.Optional[exp.Expression]: 6459 if self._match(TokenType.L_PAREN): 6460 args = self._parse_csv( 6461 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6462 ) 6463 this = self.validate_expression(exp.If.from_arg_list(args), args) 6464 self._match_r_paren() 6465 else: 6466 index = self._index - 1 6467 6468 if self.NO_PAREN_IF_COMMANDS and index == 0: 6469 return self._parse_as_command(self._prev) 6470 6471 condition = self._parse_assignment() 6472 6473 if not condition: 6474 self._retreat(index) 6475 return None 6476 6477 self._match(TokenType.THEN) 6478 true = self._parse_assignment() 6479 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6480 self._match(TokenType.END) 6481 this = self.expression(exp.If, this=condition, true=true, false=false) 6482 6483 return this 6484 6485 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6486 if not self._match_text_seq("VALUE", "FOR"): 6487 self._retreat(self._index - 1) 6488 return None 6489 6490 return self.expression( 6491 exp.NextValueFor, 6492 this=self._parse_column(), 6493 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6494 ) 6495 6496 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6497 this = self._parse_function() or self._parse_var_or_string(upper=True) 6498 6499 if self._match(TokenType.FROM): 6500 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6501 6502 if not self._match(TokenType.COMMA): 6503 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6504 6505 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6506 6507 def _parse_gap_fill(self) -> exp.GapFill: 6508 self._match(TokenType.TABLE) 6509 this = self._parse_table() 6510 6511 self._match(TokenType.COMMA) 6512 args = [this, *self._parse_csv(self._parse_lambda)] 6513 6514 gap_fill = exp.GapFill.from_arg_list(args) 6515 return self.validate_expression(gap_fill, args) 6516 6517 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6518 this = self._parse_assignment() 6519 6520 if not self._match(TokenType.ALIAS): 6521 if self._match(TokenType.COMMA): 6522 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6523 6524 self.raise_error("Expected AS after CAST") 6525 6526 fmt = None 6527 to = self._parse_types() 6528 6529 default = self._match(TokenType.DEFAULT) 6530 if default: 6531 default = self._parse_bitwise() 6532 self._match_text_seq("ON", "CONVERSION", "ERROR") 6533 6534 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6535 fmt_string = self._parse_string() 6536 fmt = self._parse_at_time_zone(fmt_string) 6537 6538 if not to: 6539 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6540 if to.this in exp.DataType.TEMPORAL_TYPES: 6541 this = self.expression( 6542 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6543 this=this, 6544 format=exp.Literal.string( 6545 format_time( 6546 fmt_string.this if fmt_string else "", 6547 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6548 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6549 ) 6550 ), 6551 safe=safe, 6552 ) 6553 6554 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6555 this.set("zone", fmt.args["zone"]) 6556 return this 6557 elif not to: 6558 self.raise_error("Expected TYPE after CAST") 6559 elif isinstance(to, exp.Identifier): 6560 to = exp.DataType.build(to.name, udt=True) 6561 elif to.this == exp.DataType.Type.CHAR: 6562 if self._match(TokenType.CHARACTER_SET): 6563 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6564 6565 return self.build_cast( 6566 strict=strict, 6567 this=this, 6568 to=to, 6569 format=fmt, 6570 safe=safe, 6571 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6572 default=default, 6573 ) 6574 6575 def _parse_string_agg(self) -> exp.GroupConcat: 6576 if self._match(TokenType.DISTINCT): 6577 args: t.List[t.Optional[exp.Expression]] = [ 6578 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6579 ] 6580 if self._match(TokenType.COMMA): 6581 args.extend(self._parse_csv(self._parse_assignment)) 6582 else: 6583 args = self._parse_csv(self._parse_assignment) # type: ignore 6584 6585 if self._match_text_seq("ON", "OVERFLOW"): 6586 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6587 if self._match_text_seq("ERROR"): 6588 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6589 else: 6590 self._match_text_seq("TRUNCATE") 6591 on_overflow = self.expression( 6592 exp.OverflowTruncateBehavior, 6593 this=self._parse_string(), 6594 with_count=( 6595 self._match_text_seq("WITH", "COUNT") 6596 or not self._match_text_seq("WITHOUT", "COUNT") 6597 ), 6598 ) 6599 else: 6600 on_overflow = None 6601 6602 index = self._index 6603 if not self._match(TokenType.R_PAREN) and args: 6604 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6605 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6606 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6607 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6608 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6609 6610 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6611 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6612 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6613 if not self._match_text_seq("WITHIN", "GROUP"): 6614 self._retreat(index) 6615 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6616 6617 # The corresponding match_r_paren will be called in parse_function (caller) 6618 self._match_l_paren() 6619 6620 return self.expression( 6621 exp.GroupConcat, 6622 this=self._parse_order(this=seq_get(args, 0)), 6623 separator=seq_get(args, 1), 6624 on_overflow=on_overflow, 6625 ) 6626 6627 def _parse_convert( 6628 self, strict: bool, safe: t.Optional[bool] = None 6629 ) -> t.Optional[exp.Expression]: 6630 this = self._parse_bitwise() 6631 6632 if self._match(TokenType.USING): 6633 to: t.Optional[exp.Expression] = self.expression( 6634 exp.CharacterSet, this=self._parse_var() 6635 ) 6636 elif self._match(TokenType.COMMA): 6637 to = self._parse_types() 6638 else: 6639 to = None 6640 6641 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6642 6643 def _parse_xml_table(self) -> exp.XMLTable: 6644 namespaces = None 6645 passing = None 6646 columns = None 6647 6648 if self._match_text_seq("XMLNAMESPACES", "("): 6649 namespaces = self._parse_xml_namespace() 6650 self._match_text_seq(")", ",") 6651 6652 this = self._parse_string() 6653 6654 if self._match_text_seq("PASSING"): 6655 # The BY VALUE keywords are optional and are provided for semantic clarity 6656 self._match_text_seq("BY", "VALUE") 6657 passing = self._parse_csv(self._parse_column) 6658 6659 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6660 6661 if self._match_text_seq("COLUMNS"): 6662 columns = self._parse_csv(self._parse_field_def) 6663 6664 return self.expression( 6665 exp.XMLTable, 6666 this=this, 6667 namespaces=namespaces, 6668 passing=passing, 6669 columns=columns, 6670 by_ref=by_ref, 6671 ) 6672 6673 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6674 namespaces = [] 6675 6676 while True: 6677 if self._match(TokenType.DEFAULT): 6678 uri = self._parse_string() 6679 else: 6680 uri = self._parse_alias(self._parse_string()) 6681 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6682 if not self._match(TokenType.COMMA): 6683 break 6684 6685 return namespaces 6686 6687 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6688 args = self._parse_csv(self._parse_assignment) 6689 6690 if len(args) < 3: 6691 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6692 6693 return self.expression(exp.DecodeCase, expressions=args) 6694 6695 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6696 self._match_text_seq("KEY") 6697 key = self._parse_column() 6698 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6699 self._match_text_seq("VALUE") 6700 value = self._parse_bitwise() 6701 6702 if not key and not value: 6703 return None 6704 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6705 6706 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6707 if not this or not self._match_text_seq("FORMAT", "JSON"): 6708 return this 6709 6710 return self.expression(exp.FormatJson, this=this) 6711 6712 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6713 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6714 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6715 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6716 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6717 else: 6718 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6719 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6720 6721 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6722 6723 if not empty and not error and not null: 6724 return None 6725 6726 return self.expression( 6727 exp.OnCondition, 6728 empty=empty, 6729 error=error, 6730 null=null, 6731 ) 6732 6733 def _parse_on_handling( 6734 self, on: str, *values: str 6735 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6736 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6737 for value in values: 6738 if self._match_text_seq(value, "ON", on): 6739 return f"{value} ON {on}" 6740 6741 index = self._index 6742 if self._match(TokenType.DEFAULT): 6743 default_value = self._parse_bitwise() 6744 if self._match_text_seq("ON", on): 6745 return default_value 6746 6747 self._retreat(index) 6748 6749 return None 6750 6751 @t.overload 6752 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6753 6754 @t.overload 6755 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6756 6757 def _parse_json_object(self, agg=False): 6758 star = self._parse_star() 6759 expressions = ( 6760 [star] 6761 if star 6762 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6763 ) 6764 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6765 6766 unique_keys = None 6767 if self._match_text_seq("WITH", "UNIQUE"): 6768 unique_keys = True 6769 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6770 unique_keys = False 6771 6772 self._match_text_seq("KEYS") 6773 6774 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6775 self._parse_type() 6776 ) 6777 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6778 6779 return self.expression( 6780 exp.JSONObjectAgg if agg else exp.JSONObject, 6781 expressions=expressions, 6782 null_handling=null_handling, 6783 unique_keys=unique_keys, 6784 return_type=return_type, 6785 encoding=encoding, 6786 ) 6787 6788 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6789 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6790 if not self._match_text_seq("NESTED"): 6791 this = self._parse_id_var() 6792 kind = self._parse_types(allow_identifiers=False) 6793 nested = None 6794 else: 6795 this = None 6796 kind = None 6797 nested = True 6798 6799 path = self._match_text_seq("PATH") and self._parse_string() 6800 nested_schema = nested and self._parse_json_schema() 6801 6802 return self.expression( 6803 exp.JSONColumnDef, 6804 this=this, 6805 kind=kind, 6806 path=path, 6807 nested_schema=nested_schema, 6808 ) 6809 6810 def _parse_json_schema(self) -> exp.JSONSchema: 6811 self._match_text_seq("COLUMNS") 6812 return self.expression( 6813 exp.JSONSchema, 6814 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6815 ) 6816 6817 def _parse_json_table(self) -> exp.JSONTable: 6818 this = self._parse_format_json(self._parse_bitwise()) 6819 path = self._match(TokenType.COMMA) and self._parse_string() 6820 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6821 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6822 schema = self._parse_json_schema() 6823 6824 return exp.JSONTable( 6825 this=this, 6826 schema=schema, 6827 path=path, 6828 error_handling=error_handling, 6829 empty_handling=empty_handling, 6830 ) 6831 6832 def _parse_match_against(self) -> exp.MatchAgainst: 6833 expressions = self._parse_csv(self._parse_column) 6834 6835 self._match_text_seq(")", "AGAINST", "(") 6836 6837 this = self._parse_string() 6838 6839 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6840 modifier = "IN NATURAL LANGUAGE MODE" 6841 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6842 modifier = f"{modifier} WITH QUERY EXPANSION" 6843 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6844 modifier = "IN BOOLEAN MODE" 6845 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6846 modifier = "WITH QUERY EXPANSION" 6847 else: 6848 modifier = None 6849 6850 return self.expression( 6851 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6852 ) 6853 6854 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6855 def _parse_open_json(self) -> exp.OpenJSON: 6856 this = self._parse_bitwise() 6857 path = self._match(TokenType.COMMA) and self._parse_string() 6858 6859 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6860 this = self._parse_field(any_token=True) 6861 kind = self._parse_types() 6862 path = self._parse_string() 6863 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6864 6865 return self.expression( 6866 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6867 ) 6868 6869 expressions = None 6870 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6871 self._match_l_paren() 6872 expressions = self._parse_csv(_parse_open_json_column_def) 6873 6874 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6875 6876 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6877 args = self._parse_csv(self._parse_bitwise) 6878 6879 if self._match(TokenType.IN): 6880 return self.expression( 6881 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6882 ) 6883 6884 if haystack_first: 6885 haystack = seq_get(args, 0) 6886 needle = seq_get(args, 1) 6887 else: 6888 haystack = seq_get(args, 1) 6889 needle = seq_get(args, 0) 6890 6891 return self.expression( 6892 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6893 ) 6894 6895 def _parse_predict(self) -> exp.Predict: 6896 self._match_text_seq("MODEL") 6897 this = self._parse_table() 6898 6899 self._match(TokenType.COMMA) 6900 self._match_text_seq("TABLE") 6901 6902 return self.expression( 6903 exp.Predict, 6904 this=this, 6905 expression=self._parse_table(), 6906 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6907 ) 6908 6909 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6910 args = self._parse_csv(self._parse_table) 6911 return exp.JoinHint(this=func_name.upper(), expressions=args) 6912 6913 def _parse_substring(self) -> exp.Substring: 6914 # Postgres supports the form: substring(string [from int] [for int]) 6915 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6916 6917 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6918 6919 if self._match(TokenType.FROM): 6920 args.append(self._parse_bitwise()) 6921 if self._match(TokenType.FOR): 6922 if len(args) == 1: 6923 args.append(exp.Literal.number(1)) 6924 args.append(self._parse_bitwise()) 6925 6926 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6927 6928 def _parse_trim(self) -> exp.Trim: 6929 # https://www.w3resource.com/sql/character-functions/trim.php 6930 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6931 6932 position = None 6933 collation = None 6934 expression = None 6935 6936 if self._match_texts(self.TRIM_TYPES): 6937 position = self._prev.text.upper() 6938 6939 this = self._parse_bitwise() 6940 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6941 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6942 expression = self._parse_bitwise() 6943 6944 if invert_order: 6945 this, expression = expression, this 6946 6947 if self._match(TokenType.COLLATE): 6948 collation = self._parse_bitwise() 6949 6950 return self.expression( 6951 exp.Trim, this=this, position=position, expression=expression, collation=collation 6952 ) 6953 6954 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6955 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6956 6957 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6958 return self._parse_window(self._parse_id_var(), alias=True) 6959 6960 def _parse_respect_or_ignore_nulls( 6961 self, this: t.Optional[exp.Expression] 6962 ) -> t.Optional[exp.Expression]: 6963 if self._match_text_seq("IGNORE", "NULLS"): 6964 return self.expression(exp.IgnoreNulls, this=this) 6965 if self._match_text_seq("RESPECT", "NULLS"): 6966 return self.expression(exp.RespectNulls, this=this) 6967 return this 6968 6969 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6970 if self._match(TokenType.HAVING): 6971 self._match_texts(("MAX", "MIN")) 6972 max = self._prev.text.upper() != "MIN" 6973 return self.expression( 6974 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6975 ) 6976 6977 return this 6978 6979 def _parse_window( 6980 self, this: t.Optional[exp.Expression], alias: bool = False 6981 ) -> t.Optional[exp.Expression]: 6982 func = this 6983 comments = func.comments if isinstance(func, exp.Expression) else None 6984 6985 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6986 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6987 if self._match_text_seq("WITHIN", "GROUP"): 6988 order = self._parse_wrapped(self._parse_order) 6989 this = self.expression(exp.WithinGroup, this=this, expression=order) 6990 6991 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6992 self._match(TokenType.WHERE) 6993 this = self.expression( 6994 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6995 ) 6996 self._match_r_paren() 6997 6998 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6999 # Some dialects choose to implement and some do not. 7000 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7001 7002 # There is some code above in _parse_lambda that handles 7003 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7004 7005 # The below changes handle 7006 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7007 7008 # Oracle allows both formats 7009 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7010 # and Snowflake chose to do the same for familiarity 7011 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7012 if isinstance(this, exp.AggFunc): 7013 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7014 7015 if ignore_respect and ignore_respect is not this: 7016 ignore_respect.replace(ignore_respect.this) 7017 this = self.expression(ignore_respect.__class__, this=this) 7018 7019 this = self._parse_respect_or_ignore_nulls(this) 7020 7021 # bigquery select from window x AS (partition by ...) 7022 if alias: 7023 over = None 7024 self._match(TokenType.ALIAS) 7025 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7026 return this 7027 else: 7028 over = self._prev.text.upper() 7029 7030 if comments and isinstance(func, exp.Expression): 7031 func.pop_comments() 7032 7033 if not self._match(TokenType.L_PAREN): 7034 return self.expression( 7035 exp.Window, 7036 comments=comments, 7037 this=this, 7038 alias=self._parse_id_var(False), 7039 over=over, 7040 ) 7041 7042 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7043 7044 first = self._match(TokenType.FIRST) 7045 if self._match_text_seq("LAST"): 7046 first = False 7047 7048 partition, order = self._parse_partition_and_order() 7049 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7050 7051 if kind: 7052 self._match(TokenType.BETWEEN) 7053 start = self._parse_window_spec() 7054 self._match(TokenType.AND) 7055 end = self._parse_window_spec() 7056 exclude = ( 7057 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7058 if self._match_text_seq("EXCLUDE") 7059 else None 7060 ) 7061 7062 spec = self.expression( 7063 exp.WindowSpec, 7064 kind=kind, 7065 start=start["value"], 7066 start_side=start["side"], 7067 end=end["value"], 7068 end_side=end["side"], 7069 exclude=exclude, 7070 ) 7071 else: 7072 spec = None 7073 7074 self._match_r_paren() 7075 7076 window = self.expression( 7077 exp.Window, 7078 comments=comments, 7079 this=this, 7080 partition_by=partition, 7081 order=order, 7082 spec=spec, 7083 alias=window_alias, 7084 over=over, 7085 first=first, 7086 ) 7087 7088 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7089 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7090 return self._parse_window(window, alias=alias) 7091 7092 return window 7093 7094 def _parse_partition_and_order( 7095 self, 7096 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7097 return self._parse_partition_by(), self._parse_order() 7098 7099 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7100 self._match(TokenType.BETWEEN) 7101 7102 return { 7103 "value": ( 7104 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7105 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7106 or self._parse_bitwise() 7107 ), 7108 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7109 } 7110 7111 def _parse_alias( 7112 self, this: t.Optional[exp.Expression], explicit: bool = False 7113 ) -> t.Optional[exp.Expression]: 7114 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7115 # so this section tries to parse the clause version and if it fails, it treats the token 7116 # as an identifier (alias) 7117 if self._can_parse_limit_or_offset(): 7118 return this 7119 7120 any_token = self._match(TokenType.ALIAS) 7121 comments = self._prev_comments or [] 7122 7123 if explicit and not any_token: 7124 return this 7125 7126 if self._match(TokenType.L_PAREN): 7127 aliases = self.expression( 7128 exp.Aliases, 7129 comments=comments, 7130 this=this, 7131 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7132 ) 7133 self._match_r_paren(aliases) 7134 return aliases 7135 7136 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7137 self.STRING_ALIASES and self._parse_string_as_identifier() 7138 ) 7139 7140 if alias: 7141 comments.extend(alias.pop_comments()) 7142 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7143 column = this.this 7144 7145 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7146 if not this.comments and column and column.comments: 7147 this.comments = column.pop_comments() 7148 7149 return this 7150 7151 def _parse_id_var( 7152 self, 7153 any_token: bool = True, 7154 tokens: t.Optional[t.Collection[TokenType]] = None, 7155 ) -> t.Optional[exp.Expression]: 7156 expression = self._parse_identifier() 7157 if not expression and ( 7158 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7159 ): 7160 quoted = self._prev.token_type == TokenType.STRING 7161 expression = self._identifier_expression(quoted=quoted) 7162 7163 return expression 7164 7165 def _parse_string(self) -> t.Optional[exp.Expression]: 7166 if self._match_set(self.STRING_PARSERS): 7167 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7168 return self._parse_placeholder() 7169 7170 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7171 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7172 if output: 7173 output.update_positions(self._prev) 7174 return output 7175 7176 def _parse_number(self) -> t.Optional[exp.Expression]: 7177 if self._match_set(self.NUMERIC_PARSERS): 7178 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7179 return self._parse_placeholder() 7180 7181 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7182 if self._match(TokenType.IDENTIFIER): 7183 return self._identifier_expression(quoted=True) 7184 return self._parse_placeholder() 7185 7186 def _parse_var( 7187 self, 7188 any_token: bool = False, 7189 tokens: t.Optional[t.Collection[TokenType]] = None, 7190 upper: bool = False, 7191 ) -> t.Optional[exp.Expression]: 7192 if ( 7193 (any_token and self._advance_any()) 7194 or self._match(TokenType.VAR) 7195 or (self._match_set(tokens) if tokens else False) 7196 ): 7197 return self.expression( 7198 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7199 ) 7200 return self._parse_placeholder() 7201 7202 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7203 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7204 self._advance() 7205 return self._prev 7206 return None 7207 7208 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7209 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7210 7211 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7212 return self._parse_primary() or self._parse_var(any_token=True) 7213 7214 def _parse_null(self) -> t.Optional[exp.Expression]: 7215 if self._match_set(self.NULL_TOKENS): 7216 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7217 return self._parse_placeholder() 7218 7219 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7220 if self._match(TokenType.TRUE): 7221 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7222 if self._match(TokenType.FALSE): 7223 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7224 return self._parse_placeholder() 7225 7226 def _parse_star(self) -> t.Optional[exp.Expression]: 7227 if self._match(TokenType.STAR): 7228 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7229 return self._parse_placeholder() 7230 7231 def _parse_parameter(self) -> exp.Parameter: 7232 this = self._parse_identifier() or self._parse_primary_or_var() 7233 return self.expression(exp.Parameter, this=this) 7234 7235 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7236 if self._match_set(self.PLACEHOLDER_PARSERS): 7237 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7238 if placeholder: 7239 return placeholder 7240 self._advance(-1) 7241 return None 7242 7243 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7244 if not self._match_texts(keywords): 7245 return None 7246 if self._match(TokenType.L_PAREN, advance=False): 7247 return self._parse_wrapped_csv(self._parse_expression) 7248 7249 expression = self._parse_expression() 7250 return [expression] if expression else None 7251 7252 def _parse_csv( 7253 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7254 ) -> t.List[exp.Expression]: 7255 parse_result = parse_method() 7256 items = [parse_result] if parse_result is not None else [] 7257 7258 while self._match(sep): 7259 self._add_comments(parse_result) 7260 parse_result = parse_method() 7261 if parse_result is not None: 7262 items.append(parse_result) 7263 7264 return items 7265 7266 def _parse_tokens( 7267 self, parse_method: t.Callable, expressions: t.Dict 7268 ) -> t.Optional[exp.Expression]: 7269 this = parse_method() 7270 7271 while self._match_set(expressions): 7272 this = self.expression( 7273 expressions[self._prev.token_type], 7274 this=this, 7275 comments=self._prev_comments, 7276 expression=parse_method(), 7277 ) 7278 7279 return this 7280 7281 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7282 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7283 7284 def _parse_wrapped_csv( 7285 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7286 ) -> t.List[exp.Expression]: 7287 return self._parse_wrapped( 7288 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7289 ) 7290 7291 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7292 wrapped = self._match(TokenType.L_PAREN) 7293 if not wrapped and not optional: 7294 self.raise_error("Expecting (") 7295 parse_result = parse_method() 7296 if wrapped: 7297 self._match_r_paren() 7298 return parse_result 7299 7300 def _parse_expressions(self) -> t.List[exp.Expression]: 7301 return self._parse_csv(self._parse_expression) 7302 7303 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7304 return self._parse_select() or self._parse_set_operations( 7305 self._parse_alias(self._parse_assignment(), explicit=True) 7306 if alias 7307 else self._parse_assignment() 7308 ) 7309 7310 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7311 return self._parse_query_modifiers( 7312 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7313 ) 7314 7315 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7316 this = None 7317 if self._match_texts(self.TRANSACTION_KIND): 7318 this = self._prev.text 7319 7320 self._match_texts(("TRANSACTION", "WORK")) 7321 7322 modes = [] 7323 while True: 7324 mode = [] 7325 while self._match(TokenType.VAR): 7326 mode.append(self._prev.text) 7327 7328 if mode: 7329 modes.append(" ".join(mode)) 7330 if not self._match(TokenType.COMMA): 7331 break 7332 7333 return self.expression(exp.Transaction, this=this, modes=modes) 7334 7335 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7336 chain = None 7337 savepoint = None 7338 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7339 7340 self._match_texts(("TRANSACTION", "WORK")) 7341 7342 if self._match_text_seq("TO"): 7343 self._match_text_seq("SAVEPOINT") 7344 savepoint = self._parse_id_var() 7345 7346 if self._match(TokenType.AND): 7347 chain = not self._match_text_seq("NO") 7348 self._match_text_seq("CHAIN") 7349 7350 if is_rollback: 7351 return self.expression(exp.Rollback, savepoint=savepoint) 7352 7353 return self.expression(exp.Commit, chain=chain) 7354 7355 def _parse_refresh(self) -> exp.Refresh: 7356 self._match(TokenType.TABLE) 7357 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7358 7359 def _parse_column_def_with_exists(self): 7360 start = self._index 7361 self._match(TokenType.COLUMN) 7362 7363 exists_column = self._parse_exists(not_=True) 7364 expression = self._parse_field_def() 7365 7366 if not isinstance(expression, exp.ColumnDef): 7367 self._retreat(start) 7368 return None 7369 7370 expression.set("exists", exists_column) 7371 7372 return expression 7373 7374 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7375 if not self._prev.text.upper() == "ADD": 7376 return None 7377 7378 expression = self._parse_column_def_with_exists() 7379 if not expression: 7380 return None 7381 7382 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7383 if self._match_texts(("FIRST", "AFTER")): 7384 position = self._prev.text 7385 column_position = self.expression( 7386 exp.ColumnPosition, this=self._parse_column(), position=position 7387 ) 7388 expression.set("position", column_position) 7389 7390 return expression 7391 7392 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7393 drop = self._match(TokenType.DROP) and self._parse_drop() 7394 if drop and not isinstance(drop, exp.Command): 7395 drop.set("kind", drop.args.get("kind", "COLUMN")) 7396 return drop 7397 7398 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7399 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7400 return self.expression( 7401 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7402 ) 7403 7404 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7405 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7406 self._match_text_seq("ADD") 7407 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7408 return self.expression( 7409 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7410 ) 7411 7412 column_def = self._parse_add_column() 7413 if isinstance(column_def, exp.ColumnDef): 7414 return column_def 7415 7416 exists = self._parse_exists(not_=True) 7417 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7418 return self.expression( 7419 exp.AddPartition, 7420 exists=exists, 7421 this=self._parse_field(any_token=True), 7422 location=self._match_text_seq("LOCATION", advance=False) 7423 and self._parse_property(), 7424 ) 7425 7426 return None 7427 7428 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7429 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7430 or self._match_text_seq("COLUMNS") 7431 ): 7432 schema = self._parse_schema() 7433 7434 return ( 7435 ensure_list(schema) 7436 if schema 7437 else self._parse_csv(self._parse_column_def_with_exists) 7438 ) 7439 7440 return self._parse_csv(_parse_add_alteration) 7441 7442 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7443 if self._match_texts(self.ALTER_ALTER_PARSERS): 7444 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7445 7446 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7447 # keyword after ALTER we default to parsing this statement 7448 self._match(TokenType.COLUMN) 7449 column = self._parse_field(any_token=True) 7450 7451 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7452 return self.expression(exp.AlterColumn, this=column, drop=True) 7453 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7454 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7455 if self._match(TokenType.COMMENT): 7456 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7457 if self._match_text_seq("DROP", "NOT", "NULL"): 7458 return self.expression( 7459 exp.AlterColumn, 7460 this=column, 7461 drop=True, 7462 allow_null=True, 7463 ) 7464 if self._match_text_seq("SET", "NOT", "NULL"): 7465 return self.expression( 7466 exp.AlterColumn, 7467 this=column, 7468 allow_null=False, 7469 ) 7470 7471 if self._match_text_seq("SET", "VISIBLE"): 7472 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7473 if self._match_text_seq("SET", "INVISIBLE"): 7474 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7475 7476 self._match_text_seq("SET", "DATA") 7477 self._match_text_seq("TYPE") 7478 return self.expression( 7479 exp.AlterColumn, 7480 this=column, 7481 dtype=self._parse_types(), 7482 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7483 using=self._match(TokenType.USING) and self._parse_assignment(), 7484 ) 7485 7486 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7487 if self._match_texts(("ALL", "EVEN", "AUTO")): 7488 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7489 7490 self._match_text_seq("KEY", "DISTKEY") 7491 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7492 7493 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7494 if compound: 7495 self._match_text_seq("SORTKEY") 7496 7497 if self._match(TokenType.L_PAREN, advance=False): 7498 return self.expression( 7499 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7500 ) 7501 7502 self._match_texts(("AUTO", "NONE")) 7503 return self.expression( 7504 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7505 ) 7506 7507 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7508 index = self._index - 1 7509 7510 partition_exists = self._parse_exists() 7511 if self._match(TokenType.PARTITION, advance=False): 7512 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7513 7514 self._retreat(index) 7515 return self._parse_csv(self._parse_drop_column) 7516 7517 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7518 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7519 exists = self._parse_exists() 7520 old_column = self._parse_column() 7521 to = self._match_text_seq("TO") 7522 new_column = self._parse_column() 7523 7524 if old_column is None or to is None or new_column is None: 7525 return None 7526 7527 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7528 7529 self._match_text_seq("TO") 7530 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7531 7532 def _parse_alter_table_set(self) -> exp.AlterSet: 7533 alter_set = self.expression(exp.AlterSet) 7534 7535 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7536 "TABLE", "PROPERTIES" 7537 ): 7538 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7539 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7540 alter_set.set("expressions", [self._parse_assignment()]) 7541 elif self._match_texts(("LOGGED", "UNLOGGED")): 7542 alter_set.set("option", exp.var(self._prev.text.upper())) 7543 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7544 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7545 elif self._match_text_seq("LOCATION"): 7546 alter_set.set("location", self._parse_field()) 7547 elif self._match_text_seq("ACCESS", "METHOD"): 7548 alter_set.set("access_method", self._parse_field()) 7549 elif self._match_text_seq("TABLESPACE"): 7550 alter_set.set("tablespace", self._parse_field()) 7551 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7552 alter_set.set("file_format", [self._parse_field()]) 7553 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7554 alter_set.set("file_format", self._parse_wrapped_options()) 7555 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7556 alter_set.set("copy_options", self._parse_wrapped_options()) 7557 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7558 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7559 else: 7560 if self._match_text_seq("SERDE"): 7561 alter_set.set("serde", self._parse_field()) 7562 7563 properties = self._parse_wrapped(self._parse_properties, optional=True) 7564 alter_set.set("expressions", [properties]) 7565 7566 return alter_set 7567 7568 def _parse_alter(self) -> exp.Alter | exp.Command: 7569 start = self._prev 7570 7571 alter_token = self._match_set(self.ALTERABLES) and self._prev 7572 if not alter_token: 7573 return self._parse_as_command(start) 7574 7575 exists = self._parse_exists() 7576 only = self._match_text_seq("ONLY") 7577 this = self._parse_table(schema=True) 7578 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7579 7580 if self._next: 7581 self._advance() 7582 7583 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7584 if parser: 7585 actions = ensure_list(parser(self)) 7586 not_valid = self._match_text_seq("NOT", "VALID") 7587 options = self._parse_csv(self._parse_property) 7588 7589 if not self._curr and actions: 7590 return self.expression( 7591 exp.Alter, 7592 this=this, 7593 kind=alter_token.text.upper(), 7594 exists=exists, 7595 actions=actions, 7596 only=only, 7597 options=options, 7598 cluster=cluster, 7599 not_valid=not_valid, 7600 ) 7601 7602 return self._parse_as_command(start) 7603 7604 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7605 start = self._prev 7606 # https://duckdb.org/docs/sql/statements/analyze 7607 if not self._curr: 7608 return self.expression(exp.Analyze) 7609 7610 options = [] 7611 while self._match_texts(self.ANALYZE_STYLES): 7612 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7613 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7614 else: 7615 options.append(self._prev.text.upper()) 7616 7617 this: t.Optional[exp.Expression] = None 7618 inner_expression: t.Optional[exp.Expression] = None 7619 7620 kind = self._curr and self._curr.text.upper() 7621 7622 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7623 this = self._parse_table_parts() 7624 elif self._match_text_seq("TABLES"): 7625 if self._match_set((TokenType.FROM, TokenType.IN)): 7626 kind = f"{kind} {self._prev.text.upper()}" 7627 this = self._parse_table(schema=True, is_db_reference=True) 7628 elif self._match_text_seq("DATABASE"): 7629 this = self._parse_table(schema=True, is_db_reference=True) 7630 elif self._match_text_seq("CLUSTER"): 7631 this = self._parse_table() 7632 # Try matching inner expr keywords before fallback to parse table. 7633 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7634 kind = None 7635 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7636 else: 7637 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7638 kind = None 7639 this = self._parse_table_parts() 7640 7641 partition = self._try_parse(self._parse_partition) 7642 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7643 return self._parse_as_command(start) 7644 7645 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7646 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7647 "WITH", "ASYNC", "MODE" 7648 ): 7649 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7650 else: 7651 mode = None 7652 7653 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7654 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7655 7656 properties = self._parse_properties() 7657 return self.expression( 7658 exp.Analyze, 7659 kind=kind, 7660 this=this, 7661 mode=mode, 7662 partition=partition, 7663 properties=properties, 7664 expression=inner_expression, 7665 options=options, 7666 ) 7667 7668 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7669 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7670 this = None 7671 kind = self._prev.text.upper() 7672 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7673 expressions = [] 7674 7675 if not self._match_text_seq("STATISTICS"): 7676 self.raise_error("Expecting token STATISTICS") 7677 7678 if self._match_text_seq("NOSCAN"): 7679 this = "NOSCAN" 7680 elif self._match(TokenType.FOR): 7681 if self._match_text_seq("ALL", "COLUMNS"): 7682 this = "FOR ALL COLUMNS" 7683 if self._match_texts("COLUMNS"): 7684 this = "FOR COLUMNS" 7685 expressions = self._parse_csv(self._parse_column_reference) 7686 elif self._match_text_seq("SAMPLE"): 7687 sample = self._parse_number() 7688 expressions = [ 7689 self.expression( 7690 exp.AnalyzeSample, 7691 sample=sample, 7692 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7693 ) 7694 ] 7695 7696 return self.expression( 7697 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7698 ) 7699 7700 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7701 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7702 kind = None 7703 this = None 7704 expression: t.Optional[exp.Expression] = None 7705 if self._match_text_seq("REF", "UPDATE"): 7706 kind = "REF" 7707 this = "UPDATE" 7708 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7709 this = "UPDATE SET DANGLING TO NULL" 7710 elif self._match_text_seq("STRUCTURE"): 7711 kind = "STRUCTURE" 7712 if self._match_text_seq("CASCADE", "FAST"): 7713 this = "CASCADE FAST" 7714 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7715 ("ONLINE", "OFFLINE") 7716 ): 7717 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7718 expression = self._parse_into() 7719 7720 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7721 7722 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7723 this = self._prev.text.upper() 7724 if self._match_text_seq("COLUMNS"): 7725 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7726 return None 7727 7728 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7729 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7730 if self._match_text_seq("STATISTICS"): 7731 return self.expression(exp.AnalyzeDelete, kind=kind) 7732 return None 7733 7734 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7735 if self._match_text_seq("CHAINED", "ROWS"): 7736 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7737 return None 7738 7739 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7740 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7741 this = self._prev.text.upper() 7742 expression: t.Optional[exp.Expression] = None 7743 expressions = [] 7744 update_options = None 7745 7746 if self._match_text_seq("HISTOGRAM", "ON"): 7747 expressions = self._parse_csv(self._parse_column_reference) 7748 with_expressions = [] 7749 while self._match(TokenType.WITH): 7750 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7751 if self._match_texts(("SYNC", "ASYNC")): 7752 if self._match_text_seq("MODE", advance=False): 7753 with_expressions.append(f"{self._prev.text.upper()} MODE") 7754 self._advance() 7755 else: 7756 buckets = self._parse_number() 7757 if self._match_text_seq("BUCKETS"): 7758 with_expressions.append(f"{buckets} BUCKETS") 7759 if with_expressions: 7760 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7761 7762 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7763 TokenType.UPDATE, advance=False 7764 ): 7765 update_options = self._prev.text.upper() 7766 self._advance() 7767 elif self._match_text_seq("USING", "DATA"): 7768 expression = self.expression(exp.UsingData, this=self._parse_string()) 7769 7770 return self.expression( 7771 exp.AnalyzeHistogram, 7772 this=this, 7773 expressions=expressions, 7774 expression=expression, 7775 update_options=update_options, 7776 ) 7777 7778 def _parse_merge(self) -> exp.Merge: 7779 self._match(TokenType.INTO) 7780 target = self._parse_table() 7781 7782 if target and self._match(TokenType.ALIAS, advance=False): 7783 target.set("alias", self._parse_table_alias()) 7784 7785 self._match(TokenType.USING) 7786 using = self._parse_table() 7787 7788 self._match(TokenType.ON) 7789 on = self._parse_assignment() 7790 7791 return self.expression( 7792 exp.Merge, 7793 this=target, 7794 using=using, 7795 on=on, 7796 whens=self._parse_when_matched(), 7797 returning=self._parse_returning(), 7798 ) 7799 7800 def _parse_when_matched(self) -> exp.Whens: 7801 whens = [] 7802 7803 while self._match(TokenType.WHEN): 7804 matched = not self._match(TokenType.NOT) 7805 self._match_text_seq("MATCHED") 7806 source = ( 7807 False 7808 if self._match_text_seq("BY", "TARGET") 7809 else self._match_text_seq("BY", "SOURCE") 7810 ) 7811 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7812 7813 self._match(TokenType.THEN) 7814 7815 if self._match(TokenType.INSERT): 7816 this = self._parse_star() 7817 if this: 7818 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7819 else: 7820 then = self.expression( 7821 exp.Insert, 7822 this=exp.var("ROW") 7823 if self._match_text_seq("ROW") 7824 else self._parse_value(values=False), 7825 expression=self._match_text_seq("VALUES") and self._parse_value(), 7826 ) 7827 elif self._match(TokenType.UPDATE): 7828 expressions = self._parse_star() 7829 if expressions: 7830 then = self.expression(exp.Update, expressions=expressions) 7831 else: 7832 then = self.expression( 7833 exp.Update, 7834 expressions=self._match(TokenType.SET) 7835 and self._parse_csv(self._parse_equality), 7836 ) 7837 elif self._match(TokenType.DELETE): 7838 then = self.expression(exp.Var, this=self._prev.text) 7839 else: 7840 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7841 7842 whens.append( 7843 self.expression( 7844 exp.When, 7845 matched=matched, 7846 source=source, 7847 condition=condition, 7848 then=then, 7849 ) 7850 ) 7851 return self.expression(exp.Whens, expressions=whens) 7852 7853 def _parse_show(self) -> t.Optional[exp.Expression]: 7854 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7855 if parser: 7856 return parser(self) 7857 return self._parse_as_command(self._prev) 7858 7859 def _parse_set_item_assignment( 7860 self, kind: t.Optional[str] = None 7861 ) -> t.Optional[exp.Expression]: 7862 index = self._index 7863 7864 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7865 return self._parse_set_transaction(global_=kind == "GLOBAL") 7866 7867 left = self._parse_primary() or self._parse_column() 7868 assignment_delimiter = self._match_texts(("=", "TO")) 7869 7870 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7871 self._retreat(index) 7872 return None 7873 7874 right = self._parse_statement() or self._parse_id_var() 7875 if isinstance(right, (exp.Column, exp.Identifier)): 7876 right = exp.var(right.name) 7877 7878 this = self.expression(exp.EQ, this=left, expression=right) 7879 return self.expression(exp.SetItem, this=this, kind=kind) 7880 7881 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7882 self._match_text_seq("TRANSACTION") 7883 characteristics = self._parse_csv( 7884 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7885 ) 7886 return self.expression( 7887 exp.SetItem, 7888 expressions=characteristics, 7889 kind="TRANSACTION", 7890 **{"global": global_}, # type: ignore 7891 ) 7892 7893 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7894 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7895 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7896 7897 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7898 index = self._index 7899 set_ = self.expression( 7900 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7901 ) 7902 7903 if self._curr: 7904 self._retreat(index) 7905 return self._parse_as_command(self._prev) 7906 7907 return set_ 7908 7909 def _parse_var_from_options( 7910 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7911 ) -> t.Optional[exp.Var]: 7912 start = self._curr 7913 if not start: 7914 return None 7915 7916 option = start.text.upper() 7917 continuations = options.get(option) 7918 7919 index = self._index 7920 self._advance() 7921 for keywords in continuations or []: 7922 if isinstance(keywords, str): 7923 keywords = (keywords,) 7924 7925 if self._match_text_seq(*keywords): 7926 option = f"{option} {' '.join(keywords)}" 7927 break 7928 else: 7929 if continuations or continuations is None: 7930 if raise_unmatched: 7931 self.raise_error(f"Unknown option {option}") 7932 7933 self._retreat(index) 7934 return None 7935 7936 return exp.var(option) 7937 7938 def _parse_as_command(self, start: Token) -> exp.Command: 7939 while self._curr: 7940 self._advance() 7941 text = self._find_sql(start, self._prev) 7942 size = len(start.text) 7943 self._warn_unsupported() 7944 return exp.Command(this=text[:size], expression=text[size:]) 7945 7946 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7947 settings = [] 7948 7949 self._match_l_paren() 7950 kind = self._parse_id_var() 7951 7952 if self._match(TokenType.L_PAREN): 7953 while True: 7954 key = self._parse_id_var() 7955 value = self._parse_primary() 7956 if not key and value is None: 7957 break 7958 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7959 self._match(TokenType.R_PAREN) 7960 7961 self._match_r_paren() 7962 7963 return self.expression( 7964 exp.DictProperty, 7965 this=this, 7966 kind=kind.this if kind else None, 7967 settings=settings, 7968 ) 7969 7970 def _parse_dict_range(self, this: str) -> exp.DictRange: 7971 self._match_l_paren() 7972 has_min = self._match_text_seq("MIN") 7973 if has_min: 7974 min = self._parse_var() or self._parse_primary() 7975 self._match_text_seq("MAX") 7976 max = self._parse_var() or self._parse_primary() 7977 else: 7978 max = self._parse_var() or self._parse_primary() 7979 min = exp.Literal.number(0) 7980 self._match_r_paren() 7981 return self.expression(exp.DictRange, this=this, min=min, max=max) 7982 7983 def _parse_comprehension( 7984 self, this: t.Optional[exp.Expression] 7985 ) -> t.Optional[exp.Comprehension]: 7986 index = self._index 7987 expression = self._parse_column() 7988 if not self._match(TokenType.IN): 7989 self._retreat(index - 1) 7990 return None 7991 iterator = self._parse_column() 7992 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7993 return self.expression( 7994 exp.Comprehension, 7995 this=this, 7996 expression=expression, 7997 iterator=iterator, 7998 condition=condition, 7999 ) 8000 8001 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8002 if self._match(TokenType.HEREDOC_STRING): 8003 return self.expression(exp.Heredoc, this=self._prev.text) 8004 8005 if not self._match_text_seq("$"): 8006 return None 8007 8008 tags = ["$"] 8009 tag_text = None 8010 8011 if self._is_connected(): 8012 self._advance() 8013 tags.append(self._prev.text.upper()) 8014 else: 8015 self.raise_error("No closing $ found") 8016 8017 if tags[-1] != "$": 8018 if self._is_connected() and self._match_text_seq("$"): 8019 tag_text = tags[-1] 8020 tags.append("$") 8021 else: 8022 self.raise_error("No closing $ found") 8023 8024 heredoc_start = self._curr 8025 8026 while self._curr: 8027 if self._match_text_seq(*tags, advance=False): 8028 this = self._find_sql(heredoc_start, self._prev) 8029 self._advance(len(tags)) 8030 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8031 8032 self._advance() 8033 8034 self.raise_error(f"No closing {''.join(tags)} found") 8035 return None 8036 8037 def _find_parser( 8038 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8039 ) -> t.Optional[t.Callable]: 8040 if not self._curr: 8041 return None 8042 8043 index = self._index 8044 this = [] 8045 while True: 8046 # The current token might be multiple words 8047 curr = self._curr.text.upper() 8048 key = curr.split(" ") 8049 this.append(curr) 8050 8051 self._advance() 8052 result, trie = in_trie(trie, key) 8053 if result == TrieResult.FAILED: 8054 break 8055 8056 if result == TrieResult.EXISTS: 8057 subparser = parsers[" ".join(this)] 8058 return subparser 8059 8060 self._retreat(index) 8061 return None 8062 8063 def _match(self, token_type, advance=True, expression=None): 8064 if not self._curr: 8065 return None 8066 8067 if self._curr.token_type == token_type: 8068 if advance: 8069 self._advance() 8070 self._add_comments(expression) 8071 return True 8072 8073 return None 8074 8075 def _match_set(self, types, advance=True): 8076 if not self._curr: 8077 return None 8078 8079 if self._curr.token_type in types: 8080 if advance: 8081 self._advance() 8082 return True 8083 8084 return None 8085 8086 def _match_pair(self, token_type_a, token_type_b, advance=True): 8087 if not self._curr or not self._next: 8088 return None 8089 8090 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8091 if advance: 8092 self._advance(2) 8093 return True 8094 8095 return None 8096 8097 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8098 if not self._match(TokenType.L_PAREN, expression=expression): 8099 self.raise_error("Expecting (") 8100 8101 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8102 if not self._match(TokenType.R_PAREN, expression=expression): 8103 self.raise_error("Expecting )") 8104 8105 def _match_texts(self, texts, advance=True): 8106 if ( 8107 self._curr 8108 and self._curr.token_type != TokenType.STRING 8109 and self._curr.text.upper() in texts 8110 ): 8111 if advance: 8112 self._advance() 8113 return True 8114 return None 8115 8116 def _match_text_seq(self, *texts, advance=True): 8117 index = self._index 8118 for text in texts: 8119 if ( 8120 self._curr 8121 and self._curr.token_type != TokenType.STRING 8122 and self._curr.text.upper() == text 8123 ): 8124 self._advance() 8125 else: 8126 self._retreat(index) 8127 return None 8128 8129 if not advance: 8130 self._retreat(index) 8131 8132 return True 8133 8134 def _replace_lambda( 8135 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8136 ) -> t.Optional[exp.Expression]: 8137 if not node: 8138 return node 8139 8140 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8141 8142 for column in node.find_all(exp.Column): 8143 typ = lambda_types.get(column.parts[0].name) 8144 if typ is not None: 8145 dot_or_id = column.to_dot() if column.table else column.this 8146 8147 if typ: 8148 dot_or_id = self.expression( 8149 exp.Cast, 8150 this=dot_or_id, 8151 to=typ, 8152 ) 8153 8154 parent = column.parent 8155 8156 while isinstance(parent, exp.Dot): 8157 if not isinstance(parent.parent, exp.Dot): 8158 parent.replace(dot_or_id) 8159 break 8160 parent = parent.parent 8161 else: 8162 if column is node: 8163 node = dot_or_id 8164 else: 8165 column.replace(dot_or_id) 8166 return node 8167 8168 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8169 start = self._prev 8170 8171 # Not to be confused with TRUNCATE(number, decimals) function call 8172 if self._match(TokenType.L_PAREN): 8173 self._retreat(self._index - 2) 8174 return self._parse_function() 8175 8176 # Clickhouse supports TRUNCATE DATABASE as well 8177 is_database = self._match(TokenType.DATABASE) 8178 8179 self._match(TokenType.TABLE) 8180 8181 exists = self._parse_exists(not_=False) 8182 8183 expressions = self._parse_csv( 8184 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8185 ) 8186 8187 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8188 8189 if self._match_text_seq("RESTART", "IDENTITY"): 8190 identity = "RESTART" 8191 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8192 identity = "CONTINUE" 8193 else: 8194 identity = None 8195 8196 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8197 option = self._prev.text 8198 else: 8199 option = None 8200 8201 partition = self._parse_partition() 8202 8203 # Fallback case 8204 if self._curr: 8205 return self._parse_as_command(start) 8206 8207 return self.expression( 8208 exp.TruncateTable, 8209 expressions=expressions, 8210 is_database=is_database, 8211 exists=exists, 8212 cluster=cluster, 8213 identity=identity, 8214 option=option, 8215 partition=partition, 8216 ) 8217 8218 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8219 this = self._parse_ordered(self._parse_opclass) 8220 8221 if not self._match(TokenType.WITH): 8222 return this 8223 8224 op = self._parse_var(any_token=True) 8225 8226 return self.expression(exp.WithOperator, this=this, op=op) 8227 8228 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8229 self._match(TokenType.EQ) 8230 self._match(TokenType.L_PAREN) 8231 8232 opts: t.List[t.Optional[exp.Expression]] = [] 8233 option: exp.Expression | None 8234 while self._curr and not self._match(TokenType.R_PAREN): 8235 if self._match_text_seq("FORMAT_NAME", "="): 8236 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8237 option = self._parse_format_name() 8238 else: 8239 option = self._parse_property() 8240 8241 if option is None: 8242 self.raise_error("Unable to parse option") 8243 break 8244 8245 opts.append(option) 8246 8247 return opts 8248 8249 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8250 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8251 8252 options = [] 8253 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8254 option = self._parse_var(any_token=True) 8255 prev = self._prev.text.upper() 8256 8257 # Different dialects might separate options and values by white space, "=" and "AS" 8258 self._match(TokenType.EQ) 8259 self._match(TokenType.ALIAS) 8260 8261 param = self.expression(exp.CopyParameter, this=option) 8262 8263 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8264 TokenType.L_PAREN, advance=False 8265 ): 8266 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8267 param.set("expressions", self._parse_wrapped_options()) 8268 elif prev == "FILE_FORMAT": 8269 # T-SQL's external file format case 8270 param.set("expression", self._parse_field()) 8271 else: 8272 param.set("expression", self._parse_unquoted_field()) 8273 8274 options.append(param) 8275 self._match(sep) 8276 8277 return options 8278 8279 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8280 expr = self.expression(exp.Credentials) 8281 8282 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8283 expr.set("storage", self._parse_field()) 8284 if self._match_text_seq("CREDENTIALS"): 8285 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8286 creds = ( 8287 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8288 ) 8289 expr.set("credentials", creds) 8290 if self._match_text_seq("ENCRYPTION"): 8291 expr.set("encryption", self._parse_wrapped_options()) 8292 if self._match_text_seq("IAM_ROLE"): 8293 expr.set("iam_role", self._parse_field()) 8294 if self._match_text_seq("REGION"): 8295 expr.set("region", self._parse_field()) 8296 8297 return expr 8298 8299 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8300 return self._parse_field() 8301 8302 def _parse_copy(self) -> exp.Copy | exp.Command: 8303 start = self._prev 8304 8305 self._match(TokenType.INTO) 8306 8307 this = ( 8308 self._parse_select(nested=True, parse_subquery_alias=False) 8309 if self._match(TokenType.L_PAREN, advance=False) 8310 else self._parse_table(schema=True) 8311 ) 8312 8313 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8314 8315 files = self._parse_csv(self._parse_file_location) 8316 credentials = self._parse_credentials() 8317 8318 self._match_text_seq("WITH") 8319 8320 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8321 8322 # Fallback case 8323 if self._curr: 8324 return self._parse_as_command(start) 8325 8326 return self.expression( 8327 exp.Copy, 8328 this=this, 8329 kind=kind, 8330 credentials=credentials, 8331 files=files, 8332 params=params, 8333 ) 8334 8335 def _parse_normalize(self) -> exp.Normalize: 8336 return self.expression( 8337 exp.Normalize, 8338 this=self._parse_bitwise(), 8339 form=self._match(TokenType.COMMA) and self._parse_var(), 8340 ) 8341 8342 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8343 args = self._parse_csv(lambda: self._parse_lambda()) 8344 8345 this = seq_get(args, 0) 8346 decimals = seq_get(args, 1) 8347 8348 return expr_type( 8349 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8350 ) 8351 8352 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8353 star_token = self._prev 8354 8355 if self._match_text_seq("COLUMNS", "(", advance=False): 8356 this = self._parse_function() 8357 if isinstance(this, exp.Columns): 8358 this.set("unpack", True) 8359 return this 8360 8361 return self.expression( 8362 exp.Star, 8363 **{ # type: ignore 8364 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8365 "replace": self._parse_star_op("REPLACE"), 8366 "rename": self._parse_star_op("RENAME"), 8367 }, 8368 ).update_positions(star_token) 8369 8370 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8371 privilege_parts = [] 8372 8373 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8374 # (end of privilege list) or L_PAREN (start of column list) are met 8375 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8376 privilege_parts.append(self._curr.text.upper()) 8377 self._advance() 8378 8379 this = exp.var(" ".join(privilege_parts)) 8380 expressions = ( 8381 self._parse_wrapped_csv(self._parse_column) 8382 if self._match(TokenType.L_PAREN, advance=False) 8383 else None 8384 ) 8385 8386 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8387 8388 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8389 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8390 principal = self._parse_id_var() 8391 8392 if not principal: 8393 return None 8394 8395 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8396 8397 def _parse_grant(self) -> exp.Grant | exp.Command: 8398 start = self._prev 8399 8400 privileges = self._parse_csv(self._parse_grant_privilege) 8401 8402 self._match(TokenType.ON) 8403 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8404 8405 # Attempt to parse the securable e.g. MySQL allows names 8406 # such as "foo.*", "*.*" which are not easily parseable yet 8407 securable = self._try_parse(self._parse_table_parts) 8408 8409 if not securable or not self._match_text_seq("TO"): 8410 return self._parse_as_command(start) 8411 8412 principals = self._parse_csv(self._parse_grant_principal) 8413 8414 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8415 8416 if self._curr: 8417 return self._parse_as_command(start) 8418 8419 return self.expression( 8420 exp.Grant, 8421 privileges=privileges, 8422 kind=kind, 8423 securable=securable, 8424 principals=principals, 8425 grant_option=grant_option, 8426 ) 8427 8428 def _parse_overlay(self) -> exp.Overlay: 8429 return self.expression( 8430 exp.Overlay, 8431 **{ # type: ignore 8432 "this": self._parse_bitwise(), 8433 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8434 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8435 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8436 }, 8437 ) 8438 8439 def _parse_format_name(self) -> exp.Property: 8440 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8441 # for FILE_FORMAT = <format_name> 8442 return self.expression( 8443 exp.Property, 8444 this=exp.var("FORMAT_NAME"), 8445 value=self._parse_string() or self._parse_table_parts(), 8446 ) 8447 8448 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8449 args: t.List[exp.Expression] = [] 8450 8451 if self._match(TokenType.DISTINCT): 8452 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8453 self._match(TokenType.COMMA) 8454 8455 args.extend(self._parse_csv(self._parse_assignment)) 8456 8457 return self.expression( 8458 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8459 ) 8460 8461 def _identifier_expression( 8462 self, token: t.Optional[Token] = None, **kwargs: t.Any 8463 ) -> exp.Identifier: 8464 token = token or self._prev 8465 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8466 expression.update_positions(token) 8467 return expression 8468 8469 def _build_pipe_cte( 8470 self, 8471 query: exp.Query, 8472 expressions: t.List[exp.Expression], 8473 alias_cte: t.Optional[exp.TableAlias] = None, 8474 ) -> exp.Select: 8475 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8476 if alias_cte: 8477 new_cte = alias_cte 8478 else: 8479 self._pipe_cte_counter += 1 8480 new_cte = f"__tmp{self._pipe_cte_counter}" 8481 8482 with_ = query.args.get("with") 8483 ctes = with_.pop() if with_ else None 8484 8485 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8486 if ctes: 8487 new_select.set("with", ctes) 8488 8489 return new_select.with_(new_cte, as_=query, copy=False) 8490 8491 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8492 select = self._parse_select(consume_pipe=False) 8493 if not select: 8494 return query 8495 8496 return self._build_pipe_cte( 8497 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8498 ) 8499 8500 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8501 limit = self._parse_limit() 8502 offset = self._parse_offset() 8503 if limit: 8504 curr_limit = query.args.get("limit", limit) 8505 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8506 query.limit(limit, copy=False) 8507 if offset: 8508 curr_offset = query.args.get("offset") 8509 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8510 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8511 8512 return query 8513 8514 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8515 this = self._parse_assignment() 8516 if self._match_text_seq("GROUP", "AND", advance=False): 8517 return this 8518 8519 this = self._parse_alias(this) 8520 8521 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8522 return self._parse_ordered(lambda: this) 8523 8524 return this 8525 8526 def _parse_pipe_syntax_aggregate_group_order_by( 8527 self, query: exp.Select, group_by_exists: bool = True 8528 ) -> exp.Select: 8529 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8530 aggregates_or_groups, orders = [], [] 8531 for element in expr: 8532 if isinstance(element, exp.Ordered): 8533 this = element.this 8534 if isinstance(this, exp.Alias): 8535 element.set("this", this.args["alias"]) 8536 orders.append(element) 8537 else: 8538 this = element 8539 aggregates_or_groups.append(this) 8540 8541 if group_by_exists: 8542 query.select(*aggregates_or_groups, copy=False).group_by( 8543 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8544 copy=False, 8545 ) 8546 else: 8547 query.select(*aggregates_or_groups, append=False, copy=False) 8548 8549 if orders: 8550 return query.order_by(*orders, append=False, copy=False) 8551 8552 return query 8553 8554 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8555 self._match_text_seq("AGGREGATE") 8556 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8557 8558 if self._match(TokenType.GROUP_BY) or ( 8559 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8560 ): 8561 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8562 8563 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8564 8565 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8566 first_setop = self.parse_set_operation(this=query) 8567 if not first_setop: 8568 return None 8569 8570 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8571 expr = self._parse_paren() 8572 return expr.assert_is(exp.Subquery).unnest() if expr else None 8573 8574 first_setop.this.pop() 8575 8576 setops = [ 8577 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8578 *self._parse_csv(_parse_and_unwrap_query), 8579 ] 8580 8581 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8582 with_ = query.args.get("with") 8583 ctes = with_.pop() if with_ else None 8584 8585 if isinstance(first_setop, exp.Union): 8586 query = query.union(*setops, copy=False, **first_setop.args) 8587 elif isinstance(first_setop, exp.Except): 8588 query = query.except_(*setops, copy=False, **first_setop.args) 8589 else: 8590 query = query.intersect(*setops, copy=False, **first_setop.args) 8591 8592 query.set("with", ctes) 8593 8594 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8595 8596 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8597 join = self._parse_join() 8598 if not join: 8599 return None 8600 8601 if isinstance(query, exp.Select): 8602 return query.join(join, copy=False) 8603 8604 return query 8605 8606 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8607 pivots = self._parse_pivots() 8608 if not pivots: 8609 return query 8610 8611 from_ = query.args.get("from") 8612 if from_: 8613 from_.this.set("pivots", pivots) 8614 8615 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8616 8617 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8618 self._match_text_seq("EXTEND") 8619 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8620 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8621 8622 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8623 sample = self._parse_table_sample() 8624 8625 with_ = query.args.get("with") 8626 if with_: 8627 with_.expressions[-1].this.set("sample", sample) 8628 else: 8629 query.set("sample", sample) 8630 8631 return query 8632 8633 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8634 if isinstance(query, exp.Subquery): 8635 query = exp.select("*").from_(query, copy=False) 8636 8637 if not query.args.get("from"): 8638 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8639 8640 while self._match(TokenType.PIPE_GT): 8641 start = self._curr 8642 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8643 if not parser: 8644 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8645 # keywords, making it tricky to disambiguate them without lookahead. The approach 8646 # here is to try and parse a set operation and if that fails, then try to parse a 8647 # join operator. If that fails as well, then the operator is not supported. 8648 parsed_query = self._parse_pipe_syntax_set_operator(query) 8649 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8650 if not parsed_query: 8651 self._retreat(start) 8652 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8653 break 8654 query = parsed_query 8655 else: 8656 query = parser(self, query) 8657 8658 return query 8659 8660 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8661 vars = self._parse_csv(self._parse_id_var) 8662 if not vars: 8663 return None 8664 8665 return self.expression( 8666 exp.DeclareItem, 8667 this=vars, 8668 kind=self._parse_types(), 8669 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8670 ) 8671 8672 def _parse_declare(self) -> exp.Declare | exp.Command: 8673 start = self._prev 8674 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8675 8676 if not expressions or self._curr: 8677 return self._parse_as_command(start) 8678 8679 return self.expression(exp.Declare, expressions=expressions) 8680 8681 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8682 exp_class = exp.Cast if strict else exp.TryCast 8683 8684 if exp_class == exp.TryCast: 8685 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8686 8687 return self.expression(exp_class, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1575 def __init__( 1576 self, 1577 error_level: t.Optional[ErrorLevel] = None, 1578 error_message_context: int = 100, 1579 max_errors: int = 3, 1580 dialect: DialectType = None, 1581 ): 1582 from sqlglot.dialects import Dialect 1583 1584 self.error_level = error_level or ErrorLevel.IMMEDIATE 1585 self.error_message_context = error_message_context 1586 self.max_errors = max_errors 1587 self.dialect = Dialect.get_or_raise(dialect) 1588 self.reset()
1601 def parse( 1602 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1603 ) -> t.List[t.Optional[exp.Expression]]: 1604 """ 1605 Parses a list of tokens and returns a list of syntax trees, one tree 1606 per parsed SQL statement. 1607 1608 Args: 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The list of the produced syntax trees. 1614 """ 1615 return self._parse( 1616 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1617 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1619 def parse_into( 1620 self, 1621 expression_types: exp.IntoType, 1622 raw_tokens: t.List[Token], 1623 sql: t.Optional[str] = None, 1624 ) -> t.List[t.Optional[exp.Expression]]: 1625 """ 1626 Parses a list of tokens into a given Expression type. If a collection of Expression 1627 types is given instead, this method will try to parse the token list into each one 1628 of them, stopping at the first for which the parsing succeeds. 1629 1630 Args: 1631 expression_types: The expression type(s) to try and parse the token list into. 1632 raw_tokens: The list of tokens. 1633 sql: The original SQL string, used to produce helpful debug messages. 1634 1635 Returns: 1636 The target Expression. 1637 """ 1638 errors = [] 1639 for expression_type in ensure_list(expression_types): 1640 parser = self.EXPRESSION_PARSERS.get(expression_type) 1641 if not parser: 1642 raise TypeError(f"No parser registered for {expression_type}") 1643 1644 try: 1645 return self._parse(parser, raw_tokens, sql) 1646 except ParseError as e: 1647 e.errors[0]["into_expression"] = expression_type 1648 errors.append(e) 1649 1650 raise ParseError( 1651 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1652 errors=merge_errors(errors), 1653 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1693 def check_errors(self) -> None: 1694 """Logs or raises any found errors, depending on the chosen error level setting.""" 1695 if self.error_level == ErrorLevel.WARN: 1696 for error in self.errors: 1697 logger.error(str(error)) 1698 elif self.error_level == ErrorLevel.RAISE and self.errors: 1699 raise ParseError( 1700 concat_messages(self.errors, self.max_errors), 1701 errors=merge_errors(self.errors), 1702 )
Logs or raises any found errors, depending on the chosen error level setting.
1704 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1705 """ 1706 Appends an error in the list of recorded errors or raises it, depending on the chosen 1707 error level setting. 1708 """ 1709 token = token or self._curr or self._prev or Token.string("") 1710 start = token.start 1711 end = token.end + 1 1712 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1713 highlight = self.sql[start:end] 1714 end_context = self.sql[end : end + self.error_message_context] 1715 1716 error = ParseError.new( 1717 f"{message}. Line {token.line}, Col: {token.col}.\n" 1718 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1719 description=message, 1720 line=token.line, 1721 col=token.col, 1722 start_context=start_context, 1723 highlight=highlight, 1724 end_context=end_context, 1725 ) 1726 1727 if self.error_level == ErrorLevel.IMMEDIATE: 1728 raise error 1729 1730 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1732 def expression( 1733 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1734 ) -> E: 1735 """ 1736 Creates a new, validated Expression. 1737 1738 Args: 1739 exp_class: The expression class to instantiate. 1740 comments: An optional list of comments to attach to the expression. 1741 kwargs: The arguments to set for the expression along with their respective values. 1742 1743 Returns: 1744 The target expression. 1745 """ 1746 instance = exp_class(**kwargs) 1747 instance.add_comments(comments) if comments else self._add_comments(instance) 1748 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1755 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1756 """ 1757 Validates an Expression, making sure that all its mandatory arguments are set. 1758 1759 Args: 1760 expression: The expression to validate. 1761 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1762 1763 Returns: 1764 The validated expression. 1765 """ 1766 if self.error_level != ErrorLevel.IGNORE: 1767 for error_message in expression.error_messages(args): 1768 self.raise_error(error_message) 1769 1770 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4809 def parse_set_operation( 4810 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4811 ) -> t.Optional[exp.Expression]: 4812 start = self._index 4813 _, side_token, kind_token = self._parse_join_parts() 4814 4815 side = side_token.text if side_token else None 4816 kind = kind_token.text if kind_token else None 4817 4818 if not self._match_set(self.SET_OPERATIONS): 4819 self._retreat(start) 4820 return None 4821 4822 token_type = self._prev.token_type 4823 4824 if token_type == TokenType.UNION: 4825 operation: t.Type[exp.SetOperation] = exp.Union 4826 elif token_type == TokenType.EXCEPT: 4827 operation = exp.Except 4828 else: 4829 operation = exp.Intersect 4830 4831 comments = self._prev.comments 4832 4833 if self._match(TokenType.DISTINCT): 4834 distinct: t.Optional[bool] = True 4835 elif self._match(TokenType.ALL): 4836 distinct = False 4837 else: 4838 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4839 if distinct is None: 4840 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4841 4842 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4843 "STRICT", "CORRESPONDING" 4844 ) 4845 if self._match_text_seq("CORRESPONDING"): 4846 by_name = True 4847 if not side and not kind: 4848 kind = "INNER" 4849 4850 on_column_list = None 4851 if by_name and self._match_texts(("ON", "BY")): 4852 on_column_list = self._parse_wrapped_csv(self._parse_column) 4853 4854 expression = self._parse_select( 4855 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4856 ) 4857 4858 return self.expression( 4859 operation, 4860 comments=comments, 4861 this=this, 4862 distinct=distinct, 4863 by_name=by_name, 4864 expression=expression, 4865 side=side, 4866 kind=kind, 4867 on=on_column_list, 4868 )