sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.VOID, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NOTHING, 410 TokenType.NULL, 411 TokenType.NAME, 412 TokenType.TDIGEST, 413 TokenType.DYNAMIC, 414 *ENUM_TYPE_TOKENS, 415 *NESTED_TYPE_TOKENS, 416 *AGGREGATE_TYPE_TOKENS, 417 } 418 419 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 420 TokenType.BIGINT: TokenType.UBIGINT, 421 TokenType.INT: TokenType.UINT, 422 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 423 TokenType.SMALLINT: TokenType.USMALLINT, 424 TokenType.TINYINT: TokenType.UTINYINT, 425 TokenType.DECIMAL: TokenType.UDECIMAL, 426 TokenType.DOUBLE: TokenType.UDOUBLE, 427 } 428 429 SUBQUERY_PREDICATES = { 430 TokenType.ANY: exp.Any, 431 TokenType.ALL: exp.All, 432 TokenType.EXISTS: exp.Exists, 433 TokenType.SOME: exp.Any, 434 } 435 436 RESERVED_TOKENS = { 437 *Tokenizer.SINGLE_TOKENS.values(), 438 TokenType.SELECT, 439 } - {TokenType.IDENTIFIER} 440 441 DB_CREATABLES = { 442 TokenType.DATABASE, 443 TokenType.DICTIONARY, 444 TokenType.FILE_FORMAT, 445 TokenType.MODEL, 446 TokenType.NAMESPACE, 447 TokenType.SCHEMA, 448 TokenType.SEQUENCE, 449 TokenType.SINK, 450 TokenType.SOURCE, 451 TokenType.STAGE, 452 TokenType.STORAGE_INTEGRATION, 453 TokenType.STREAMLIT, 454 TokenType.TABLE, 455 TokenType.TAG, 456 TokenType.VIEW, 457 TokenType.WAREHOUSE, 458 } 459 460 CREATABLES = { 461 TokenType.COLUMN, 462 TokenType.CONSTRAINT, 463 TokenType.FOREIGN_KEY, 464 TokenType.FUNCTION, 465 TokenType.INDEX, 466 TokenType.PROCEDURE, 467 *DB_CREATABLES, 468 } 469 470 ALTERABLES = { 471 TokenType.INDEX, 472 TokenType.TABLE, 473 TokenType.VIEW, 474 } 475 476 # Tokens that can represent identifiers 477 ID_VAR_TOKENS = { 478 TokenType.ALL, 479 TokenType.ATTACH, 480 TokenType.VAR, 481 TokenType.ANTI, 482 TokenType.APPLY, 483 TokenType.ASC, 484 TokenType.ASOF, 485 TokenType.AUTO_INCREMENT, 486 TokenType.BEGIN, 487 TokenType.BPCHAR, 488 TokenType.CACHE, 489 TokenType.CASE, 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.COMMENT, 493 TokenType.COMMIT, 494 TokenType.CONSTRAINT, 495 TokenType.COPY, 496 TokenType.CUBE, 497 TokenType.CURRENT_SCHEMA, 498 TokenType.DEFAULT, 499 TokenType.DELETE, 500 TokenType.DESC, 501 TokenType.DESCRIBE, 502 TokenType.DETACH, 503 TokenType.DICTIONARY, 504 TokenType.DIV, 505 TokenType.END, 506 TokenType.EXECUTE, 507 TokenType.EXPORT, 508 TokenType.ESCAPE, 509 TokenType.FALSE, 510 TokenType.FIRST, 511 TokenType.FILTER, 512 TokenType.FINAL, 513 TokenType.FORMAT, 514 TokenType.FULL, 515 TokenType.GET, 516 TokenType.IDENTIFIER, 517 TokenType.IS, 518 TokenType.ISNULL, 519 TokenType.INTERVAL, 520 TokenType.KEEP, 521 TokenType.KILL, 522 TokenType.LEFT, 523 TokenType.LIMIT, 524 TokenType.LOAD, 525 TokenType.MERGE, 526 TokenType.NATURAL, 527 TokenType.NEXT, 528 TokenType.OFFSET, 529 TokenType.OPERATOR, 530 TokenType.ORDINALITY, 531 TokenType.OVERLAPS, 532 TokenType.OVERWRITE, 533 TokenType.PARTITION, 534 TokenType.PERCENT, 535 TokenType.PIVOT, 536 TokenType.PRAGMA, 537 TokenType.PUT, 538 TokenType.RANGE, 539 TokenType.RECURSIVE, 540 TokenType.REFERENCES, 541 TokenType.REFRESH, 542 TokenType.RENAME, 543 TokenType.REPLACE, 544 TokenType.RIGHT, 545 TokenType.ROLLUP, 546 TokenType.ROW, 547 TokenType.ROWS, 548 TokenType.SEMI, 549 TokenType.SET, 550 TokenType.SETTINGS, 551 TokenType.SHOW, 552 TokenType.TEMPORARY, 553 TokenType.TOP, 554 TokenType.TRUE, 555 TokenType.TRUNCATE, 556 TokenType.UNIQUE, 557 TokenType.UNNEST, 558 TokenType.UNPIVOT, 559 TokenType.UPDATE, 560 TokenType.USE, 561 TokenType.VOLATILE, 562 TokenType.WINDOW, 563 *CREATABLES, 564 *SUBQUERY_PREDICATES, 565 *TYPE_TOKENS, 566 *NO_PAREN_FUNCTIONS, 567 } 568 ID_VAR_TOKENS.remove(TokenType.UNION) 569 570 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 571 TokenType.ANTI, 572 TokenType.APPLY, 573 TokenType.ASOF, 574 TokenType.FULL, 575 TokenType.LEFT, 576 TokenType.LOCK, 577 TokenType.NATURAL, 578 TokenType.RIGHT, 579 TokenType.SEMI, 580 TokenType.WINDOW, 581 } 582 583 ALIAS_TOKENS = ID_VAR_TOKENS 584 585 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 586 587 ARRAY_CONSTRUCTORS = { 588 "ARRAY": exp.Array, 589 "LIST": exp.List, 590 } 591 592 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 593 594 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 595 596 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 597 598 FUNC_TOKENS = { 599 TokenType.COLLATE, 600 TokenType.COMMAND, 601 TokenType.CURRENT_DATE, 602 TokenType.CURRENT_DATETIME, 603 TokenType.CURRENT_SCHEMA, 604 TokenType.CURRENT_TIMESTAMP, 605 TokenType.CURRENT_TIME, 606 TokenType.CURRENT_USER, 607 TokenType.FILTER, 608 TokenType.FIRST, 609 TokenType.FORMAT, 610 TokenType.GET, 611 TokenType.GLOB, 612 TokenType.IDENTIFIER, 613 TokenType.INDEX, 614 TokenType.ISNULL, 615 TokenType.ILIKE, 616 TokenType.INSERT, 617 TokenType.LIKE, 618 TokenType.MERGE, 619 TokenType.NEXT, 620 TokenType.OFFSET, 621 TokenType.PRIMARY_KEY, 622 TokenType.RANGE, 623 TokenType.REPLACE, 624 TokenType.RLIKE, 625 TokenType.ROW, 626 TokenType.UNNEST, 627 TokenType.VAR, 628 TokenType.LEFT, 629 TokenType.RIGHT, 630 TokenType.SEQUENCE, 631 TokenType.DATE, 632 TokenType.DATETIME, 633 TokenType.TABLE, 634 TokenType.TIMESTAMP, 635 TokenType.TIMESTAMPTZ, 636 TokenType.TRUNCATE, 637 TokenType.WINDOW, 638 TokenType.XOR, 639 *TYPE_TOKENS, 640 *SUBQUERY_PREDICATES, 641 } 642 643 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 644 TokenType.AND: exp.And, 645 } 646 647 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 648 TokenType.COLON_EQ: exp.PropertyEQ, 649 } 650 651 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.OR: exp.Or, 653 } 654 655 EQUALITY = { 656 TokenType.EQ: exp.EQ, 657 TokenType.NEQ: exp.NEQ, 658 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 659 } 660 661 COMPARISON = { 662 TokenType.GT: exp.GT, 663 TokenType.GTE: exp.GTE, 664 TokenType.LT: exp.LT, 665 TokenType.LTE: exp.LTE, 666 } 667 668 BITWISE = { 669 TokenType.AMP: exp.BitwiseAnd, 670 TokenType.CARET: exp.BitwiseXor, 671 TokenType.PIPE: exp.BitwiseOr, 672 } 673 674 TERM = { 675 TokenType.DASH: exp.Sub, 676 TokenType.PLUS: exp.Add, 677 TokenType.MOD: exp.Mod, 678 TokenType.COLLATE: exp.Collate, 679 } 680 681 FACTOR = { 682 TokenType.DIV: exp.IntDiv, 683 TokenType.LR_ARROW: exp.Distance, 684 TokenType.SLASH: exp.Div, 685 TokenType.STAR: exp.Mul, 686 } 687 688 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 689 690 TIMES = { 691 TokenType.TIME, 692 TokenType.TIMETZ, 693 } 694 695 TIMESTAMPS = { 696 TokenType.TIMESTAMP, 697 TokenType.TIMESTAMPNTZ, 698 TokenType.TIMESTAMPTZ, 699 TokenType.TIMESTAMPLTZ, 700 *TIMES, 701 } 702 703 SET_OPERATIONS = { 704 TokenType.UNION, 705 TokenType.INTERSECT, 706 TokenType.EXCEPT, 707 } 708 709 JOIN_METHODS = { 710 TokenType.ASOF, 711 TokenType.NATURAL, 712 TokenType.POSITIONAL, 713 } 714 715 JOIN_SIDES = { 716 TokenType.LEFT, 717 TokenType.RIGHT, 718 TokenType.FULL, 719 } 720 721 JOIN_KINDS = { 722 TokenType.ANTI, 723 TokenType.CROSS, 724 TokenType.INNER, 725 TokenType.OUTER, 726 TokenType.SEMI, 727 TokenType.STRAIGHT_JOIN, 728 } 729 730 JOIN_HINTS: t.Set[str] = set() 731 732 LAMBDAS = { 733 TokenType.ARROW: lambda self, expressions: self.expression( 734 exp.Lambda, 735 this=self._replace_lambda( 736 self._parse_assignment(), 737 expressions, 738 ), 739 expressions=expressions, 740 ), 741 TokenType.FARROW: lambda self, expressions: self.expression( 742 exp.Kwarg, 743 this=exp.var(expressions[0].name), 744 expression=self._parse_assignment(), 745 ), 746 } 747 748 COLUMN_OPERATORS = { 749 TokenType.DOT: None, 750 TokenType.DOTCOLON: lambda self, this, to: self.expression( 751 exp.JSONCast, 752 this=this, 753 to=to, 754 ), 755 TokenType.DCOLON: lambda self, this, to: self.expression( 756 exp.Cast if self.STRICT_CAST else exp.TryCast, 757 this=this, 758 to=to, 759 ), 760 TokenType.ARROW: lambda self, this, path: self.expression( 761 exp.JSONExtract, 762 this=this, 763 expression=self.dialect.to_json_path(path), 764 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 765 ), 766 TokenType.DARROW: lambda self, this, path: self.expression( 767 exp.JSONExtractScalar, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 773 exp.JSONBExtract, 774 this=this, 775 expression=path, 776 ), 777 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtractScalar, 779 this=this, 780 expression=path, 781 ), 782 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 783 exp.JSONBContains, 784 this=this, 785 expression=key, 786 ), 787 } 788 789 EXPRESSION_PARSERS = { 790 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 exp.Column: lambda self: self._parse_column(), 792 exp.Condition: lambda self: self._parse_assignment(), 793 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 794 exp.Expression: lambda self: self._parse_expression(), 795 exp.From: lambda self: self._parse_from(joins=True), 796 exp.Group: lambda self: self._parse_group(), 797 exp.Having: lambda self: self._parse_having(), 798 exp.Hint: lambda self: self._parse_hint_body(), 799 exp.Identifier: lambda self: self._parse_id_var(), 800 exp.Join: lambda self: self._parse_join(), 801 exp.Lambda: lambda self: self._parse_lambda(), 802 exp.Lateral: lambda self: self._parse_lateral(), 803 exp.Limit: lambda self: self._parse_limit(), 804 exp.Offset: lambda self: self._parse_offset(), 805 exp.Order: lambda self: self._parse_order(), 806 exp.Ordered: lambda self: self._parse_ordered(), 807 exp.Properties: lambda self: self._parse_properties(), 808 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 809 exp.Qualify: lambda self: self._parse_qualify(), 810 exp.Returning: lambda self: self._parse_returning(), 811 exp.Select: lambda self: self._parse_select(), 812 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 813 exp.Table: lambda self: self._parse_table_parts(), 814 exp.TableAlias: lambda self: self._parse_table_alias(), 815 exp.Tuple: lambda self: self._parse_value(values=False), 816 exp.Whens: lambda self: self._parse_when_matched(), 817 exp.Where: lambda self: self._parse_where(), 818 exp.Window: lambda self: self._parse_named_window(), 819 exp.With: lambda self: self._parse_with(), 820 "JOIN_TYPE": lambda self: self._parse_join_parts(), 821 } 822 823 STATEMENT_PARSERS = { 824 TokenType.ALTER: lambda self: self._parse_alter(), 825 TokenType.ANALYZE: lambda self: self._parse_analyze(), 826 TokenType.BEGIN: lambda self: self._parse_transaction(), 827 TokenType.CACHE: lambda self: self._parse_cache(), 828 TokenType.COMMENT: lambda self: self._parse_comment(), 829 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 830 TokenType.COPY: lambda self: self._parse_copy(), 831 TokenType.CREATE: lambda self: self._parse_create(), 832 TokenType.DELETE: lambda self: self._parse_delete(), 833 TokenType.DESC: lambda self: self._parse_describe(), 834 TokenType.DESCRIBE: lambda self: self._parse_describe(), 835 TokenType.DROP: lambda self: self._parse_drop(), 836 TokenType.GRANT: lambda self: self._parse_grant(), 837 TokenType.INSERT: lambda self: self._parse_insert(), 838 TokenType.KILL: lambda self: self._parse_kill(), 839 TokenType.LOAD: lambda self: self._parse_load(), 840 TokenType.MERGE: lambda self: self._parse_merge(), 841 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 842 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 843 TokenType.REFRESH: lambda self: self._parse_refresh(), 844 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 845 TokenType.SET: lambda self: self._parse_set(), 846 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 847 TokenType.UNCACHE: lambda self: self._parse_uncache(), 848 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 849 TokenType.UPDATE: lambda self: self._parse_update(), 850 TokenType.USE: lambda self: self._parse_use(), 851 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 852 } 853 854 UNARY_PARSERS = { 855 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 856 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 857 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 858 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 859 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 860 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 861 } 862 863 STRING_PARSERS = { 864 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 865 exp.RawString, this=token.text 866 ), 867 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 868 exp.National, this=token.text 869 ), 870 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 871 TokenType.STRING: lambda self, token: self.expression( 872 exp.Literal, this=token.text, is_string=True 873 ), 874 TokenType.UNICODE_STRING: lambda self, token: self.expression( 875 exp.UnicodeString, 876 this=token.text, 877 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 878 ), 879 } 880 881 NUMERIC_PARSERS = { 882 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 883 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 884 TokenType.HEX_STRING: lambda self, token: self.expression( 885 exp.HexString, 886 this=token.text, 887 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 888 ), 889 TokenType.NUMBER: lambda self, token: self.expression( 890 exp.Literal, this=token.text, is_string=False 891 ), 892 } 893 894 PRIMARY_PARSERS = { 895 **STRING_PARSERS, 896 **NUMERIC_PARSERS, 897 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 898 TokenType.NULL: lambda self, _: self.expression(exp.Null), 899 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 900 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 901 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 902 TokenType.STAR: lambda self, _: self._parse_star_ops(), 903 } 904 905 PLACEHOLDER_PARSERS = { 906 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 907 TokenType.PARAMETER: lambda self: self._parse_parameter(), 908 TokenType.COLON: lambda self: ( 909 self.expression(exp.Placeholder, this=self._prev.text) 910 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 911 else None 912 ), 913 } 914 915 RANGE_PARSERS = { 916 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 917 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 918 TokenType.GLOB: binary_range_parser(exp.Glob), 919 TokenType.ILIKE: binary_range_parser(exp.ILike), 920 TokenType.IN: lambda self, this: self._parse_in(this), 921 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 922 TokenType.IS: lambda self, this: self._parse_is(this), 923 TokenType.LIKE: binary_range_parser(exp.Like), 924 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 925 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 926 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 927 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 928 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 929 } 930 931 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 932 "ALLOWED_VALUES": lambda self: self.expression( 933 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 934 ), 935 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 936 "AUTO": lambda self: self._parse_auto_property(), 937 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 938 "BACKUP": lambda self: self.expression( 939 exp.BackupProperty, this=self._parse_var(any_token=True) 940 ), 941 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 942 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 943 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 944 "CHECKSUM": lambda self: self._parse_checksum(), 945 "CLUSTER BY": lambda self: self._parse_cluster(), 946 "CLUSTERED": lambda self: self._parse_clustered_by(), 947 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 948 exp.CollateProperty, **kwargs 949 ), 950 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 951 "CONTAINS": lambda self: self._parse_contains_property(), 952 "COPY": lambda self: self._parse_copy_property(), 953 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 954 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 955 "DEFINER": lambda self: self._parse_definer(), 956 "DETERMINISTIC": lambda self: self.expression( 957 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 958 ), 959 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 960 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 961 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 962 "DISTKEY": lambda self: self._parse_distkey(), 963 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 964 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 965 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 966 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 967 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 968 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 969 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 970 "FREESPACE": lambda self: self._parse_freespace(), 971 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 972 "HEAP": lambda self: self.expression(exp.HeapProperty), 973 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 974 "IMMUTABLE": lambda self: self.expression( 975 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 976 ), 977 "INHERITS": lambda self: self.expression( 978 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 979 ), 980 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 981 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 982 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 983 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 984 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 985 "LIKE": lambda self: self._parse_create_like(), 986 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 987 "LOCK": lambda self: self._parse_locking(), 988 "LOCKING": lambda self: self._parse_locking(), 989 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 990 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 991 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 992 "MODIFIES": lambda self: self._parse_modifies_property(), 993 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 994 "NO": lambda self: self._parse_no_property(), 995 "ON": lambda self: self._parse_on_property(), 996 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 997 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 998 "PARTITION": lambda self: self._parse_partitioned_of(), 999 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1000 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1001 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1002 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1003 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1004 "READS": lambda self: self._parse_reads_property(), 1005 "REMOTE": lambda self: self._parse_remote_with_connection(), 1006 "RETURNS": lambda self: self._parse_returns(), 1007 "STRICT": lambda self: self.expression(exp.StrictProperty), 1008 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1009 "ROW": lambda self: self._parse_row(), 1010 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1011 "SAMPLE": lambda self: self.expression( 1012 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1013 ), 1014 "SECURE": lambda self: self.expression(exp.SecureProperty), 1015 "SECURITY": lambda self: self._parse_security(), 1016 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1017 "SETTINGS": lambda self: self._parse_settings_property(), 1018 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1019 "SORTKEY": lambda self: self._parse_sortkey(), 1020 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1021 "STABLE": lambda self: self.expression( 1022 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1023 ), 1024 "STORED": lambda self: self._parse_stored(), 1025 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1026 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1027 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1028 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1029 "TO": lambda self: self._parse_to_table(), 1030 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1031 "TRANSFORM": lambda self: self.expression( 1032 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1033 ), 1034 "TTL": lambda self: self._parse_ttl(), 1035 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1036 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1037 "VOLATILE": lambda self: self._parse_volatile_property(), 1038 "WITH": lambda self: self._parse_with_property(), 1039 } 1040 1041 CONSTRAINT_PARSERS = { 1042 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1043 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1044 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1045 "CHARACTER SET": lambda self: self.expression( 1046 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1047 ), 1048 "CHECK": lambda self: self.expression( 1049 exp.CheckColumnConstraint, 1050 this=self._parse_wrapped(self._parse_assignment), 1051 enforced=self._match_text_seq("ENFORCED"), 1052 ), 1053 "COLLATE": lambda self: self.expression( 1054 exp.CollateColumnConstraint, 1055 this=self._parse_identifier() or self._parse_column(), 1056 ), 1057 "COMMENT": lambda self: self.expression( 1058 exp.CommentColumnConstraint, this=self._parse_string() 1059 ), 1060 "COMPRESS": lambda self: self._parse_compress(), 1061 "CLUSTERED": lambda self: self.expression( 1062 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1063 ), 1064 "NONCLUSTERED": lambda self: self.expression( 1065 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1066 ), 1067 "DEFAULT": lambda self: self.expression( 1068 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1069 ), 1070 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1071 "EPHEMERAL": lambda self: self.expression( 1072 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1073 ), 1074 "EXCLUDE": lambda self: self.expression( 1075 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1076 ), 1077 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1078 "FORMAT": lambda self: self.expression( 1079 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1080 ), 1081 "GENERATED": lambda self: self._parse_generated_as_identity(), 1082 "IDENTITY": lambda self: self._parse_auto_increment(), 1083 "INLINE": lambda self: self._parse_inline(), 1084 "LIKE": lambda self: self._parse_create_like(), 1085 "NOT": lambda self: self._parse_not_constraint(), 1086 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1087 "ON": lambda self: ( 1088 self._match(TokenType.UPDATE) 1089 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1090 ) 1091 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1092 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1093 "PERIOD": lambda self: self._parse_period_for_system_time(), 1094 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1095 "REFERENCES": lambda self: self._parse_references(match=False), 1096 "TITLE": lambda self: self.expression( 1097 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1098 ), 1099 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1100 "UNIQUE": lambda self: self._parse_unique(), 1101 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1102 "WATERMARK": lambda self: self.expression( 1103 exp.WatermarkColumnConstraint, 1104 this=self._match(TokenType.FOR) and self._parse_column(), 1105 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1106 ), 1107 "WITH": lambda self: self.expression( 1108 exp.Properties, expressions=self._parse_wrapped_properties() 1109 ), 1110 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1111 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1112 } 1113 1114 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1115 klass = ( 1116 exp.PartitionedByBucket 1117 if self._prev.text.upper() == "BUCKET" 1118 else exp.PartitionByTruncate 1119 ) 1120 1121 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1122 this, expression = seq_get(args, 0), seq_get(args, 1) 1123 1124 if isinstance(this, exp.Literal): 1125 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1126 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1127 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1128 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1129 # 1130 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1131 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1132 this, expression = expression, this 1133 1134 return self.expression(klass, this=this, expression=expression) 1135 1136 ALTER_PARSERS = { 1137 "ADD": lambda self: self._parse_alter_table_add(), 1138 "AS": lambda self: self._parse_select(), 1139 "ALTER": lambda self: self._parse_alter_table_alter(), 1140 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1141 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1142 "DROP": lambda self: self._parse_alter_table_drop(), 1143 "RENAME": lambda self: self._parse_alter_table_rename(), 1144 "SET": lambda self: self._parse_alter_table_set(), 1145 "SWAP": lambda self: self.expression( 1146 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1147 ), 1148 } 1149 1150 ALTER_ALTER_PARSERS = { 1151 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1152 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1153 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1154 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1155 } 1156 1157 SCHEMA_UNNAMED_CONSTRAINTS = { 1158 "CHECK", 1159 "EXCLUDE", 1160 "FOREIGN KEY", 1161 "LIKE", 1162 "PERIOD", 1163 "PRIMARY KEY", 1164 "UNIQUE", 1165 "WATERMARK", 1166 "BUCKET", 1167 "TRUNCATE", 1168 } 1169 1170 NO_PAREN_FUNCTION_PARSERS = { 1171 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1172 "CASE": lambda self: self._parse_case(), 1173 "CONNECT_BY_ROOT": lambda self: self.expression( 1174 exp.ConnectByRoot, this=self._parse_column() 1175 ), 1176 "IF": lambda self: self._parse_if(), 1177 } 1178 1179 INVALID_FUNC_NAME_TOKENS = { 1180 TokenType.IDENTIFIER, 1181 TokenType.STRING, 1182 } 1183 1184 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1185 1186 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1187 1188 FUNCTION_PARSERS = { 1189 **{ 1190 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1191 }, 1192 **{ 1193 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1194 }, 1195 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1196 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1197 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1198 "DECODE": lambda self: self._parse_decode(), 1199 "EXTRACT": lambda self: self._parse_extract(), 1200 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1201 "GAP_FILL": lambda self: self._parse_gap_fill(), 1202 "JSON_OBJECT": lambda self: self._parse_json_object(), 1203 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1204 "JSON_TABLE": lambda self: self._parse_json_table(), 1205 "MATCH": lambda self: self._parse_match_against(), 1206 "NORMALIZE": lambda self: self._parse_normalize(), 1207 "OPENJSON": lambda self: self._parse_open_json(), 1208 "OVERLAY": lambda self: self._parse_overlay(), 1209 "POSITION": lambda self: self._parse_position(), 1210 "PREDICT": lambda self: self._parse_predict(), 1211 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1212 "STRING_AGG": lambda self: self._parse_string_agg(), 1213 "SUBSTRING": lambda self: self._parse_substring(), 1214 "TRIM": lambda self: self._parse_trim(), 1215 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1216 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1217 "XMLELEMENT": lambda self: self.expression( 1218 exp.XMLElement, 1219 this=self._match_text_seq("NAME") and self._parse_id_var(), 1220 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1221 ), 1222 "XMLTABLE": lambda self: self._parse_xml_table(), 1223 } 1224 1225 QUERY_MODIFIER_PARSERS = { 1226 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1227 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1228 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1229 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1230 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1231 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1232 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1233 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1234 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1235 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1236 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1237 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1238 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1239 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1240 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1241 TokenType.CLUSTER_BY: lambda self: ( 1242 "cluster", 1243 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1244 ), 1245 TokenType.DISTRIBUTE_BY: lambda self: ( 1246 "distribute", 1247 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1248 ), 1249 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1250 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1251 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1252 } 1253 1254 SET_PARSERS = { 1255 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1256 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1257 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1258 "TRANSACTION": lambda self: self._parse_set_transaction(), 1259 } 1260 1261 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1262 1263 TYPE_LITERAL_PARSERS = { 1264 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1265 } 1266 1267 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1268 1269 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1270 1271 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1272 1273 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1274 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1275 "ISOLATION": ( 1276 ("LEVEL", "REPEATABLE", "READ"), 1277 ("LEVEL", "READ", "COMMITTED"), 1278 ("LEVEL", "READ", "UNCOMITTED"), 1279 ("LEVEL", "SERIALIZABLE"), 1280 ), 1281 "READ": ("WRITE", "ONLY"), 1282 } 1283 1284 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1285 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1286 ) 1287 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1288 1289 CREATE_SEQUENCE: OPTIONS_TYPE = { 1290 "SCALE": ("EXTEND", "NOEXTEND"), 1291 "SHARD": ("EXTEND", "NOEXTEND"), 1292 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1293 **dict.fromkeys( 1294 ( 1295 "SESSION", 1296 "GLOBAL", 1297 "KEEP", 1298 "NOKEEP", 1299 "ORDER", 1300 "NOORDER", 1301 "NOCACHE", 1302 "CYCLE", 1303 "NOCYCLE", 1304 "NOMINVALUE", 1305 "NOMAXVALUE", 1306 "NOSCALE", 1307 "NOSHARD", 1308 ), 1309 tuple(), 1310 ), 1311 } 1312 1313 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1314 1315 USABLES: OPTIONS_TYPE = dict.fromkeys( 1316 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1317 ) 1318 1319 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1320 1321 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1322 "TYPE": ("EVOLUTION",), 1323 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1324 } 1325 1326 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1327 1328 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1329 1330 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1331 "NOT": ("ENFORCED",), 1332 "MATCH": ( 1333 "FULL", 1334 "PARTIAL", 1335 "SIMPLE", 1336 ), 1337 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1338 "USING": ( 1339 "BTREE", 1340 "HASH", 1341 ), 1342 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1343 } 1344 1345 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1346 1347 CLONE_KEYWORDS = {"CLONE", "COPY"} 1348 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1349 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1350 1351 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1352 1353 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1354 1355 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1356 1357 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1358 1359 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1360 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1361 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1362 1363 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1364 1365 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1366 1367 ADD_CONSTRAINT_TOKENS = { 1368 TokenType.CONSTRAINT, 1369 TokenType.FOREIGN_KEY, 1370 TokenType.INDEX, 1371 TokenType.KEY, 1372 TokenType.PRIMARY_KEY, 1373 TokenType.UNIQUE, 1374 } 1375 1376 DISTINCT_TOKENS = {TokenType.DISTINCT} 1377 1378 NULL_TOKENS = {TokenType.NULL} 1379 1380 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1381 1382 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1383 1384 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1385 1386 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1387 1388 ODBC_DATETIME_LITERALS = { 1389 "d": exp.Date, 1390 "t": exp.Time, 1391 "ts": exp.Timestamp, 1392 } 1393 1394 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1395 1396 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1397 1398 # The style options for the DESCRIBE statement 1399 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1400 1401 # The style options for the ANALYZE statement 1402 ANALYZE_STYLES = { 1403 "BUFFER_USAGE_LIMIT", 1404 "FULL", 1405 "LOCAL", 1406 "NO_WRITE_TO_BINLOG", 1407 "SAMPLE", 1408 "SKIP_LOCKED", 1409 "VERBOSE", 1410 } 1411 1412 ANALYZE_EXPRESSION_PARSERS = { 1413 "ALL": lambda self: self._parse_analyze_columns(), 1414 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1415 "DELETE": lambda self: self._parse_analyze_delete(), 1416 "DROP": lambda self: self._parse_analyze_histogram(), 1417 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1418 "LIST": lambda self: self._parse_analyze_list(), 1419 "PREDICATE": lambda self: self._parse_analyze_columns(), 1420 "UPDATE": lambda self: self._parse_analyze_histogram(), 1421 "VALIDATE": lambda self: self._parse_analyze_validate(), 1422 } 1423 1424 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1425 1426 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1427 1428 OPERATION_MODIFIERS: t.Set[str] = set() 1429 1430 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1431 1432 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1433 1434 STRICT_CAST = True 1435 1436 PREFIXED_PIVOT_COLUMNS = False 1437 IDENTIFY_PIVOT_STRINGS = False 1438 1439 LOG_DEFAULTS_TO_LN = False 1440 1441 # Whether ADD is present for each column added by ALTER TABLE 1442 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1443 1444 # Whether the table sample clause expects CSV syntax 1445 TABLESAMPLE_CSV = False 1446 1447 # The default method used for table sampling 1448 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1449 1450 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1451 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1452 1453 # Whether the TRIM function expects the characters to trim as its first argument 1454 TRIM_PATTERN_FIRST = False 1455 1456 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1457 STRING_ALIASES = False 1458 1459 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1460 MODIFIERS_ATTACHED_TO_SET_OP = True 1461 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1462 1463 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1464 NO_PAREN_IF_COMMANDS = True 1465 1466 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1467 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1468 1469 # Whether the `:` operator is used to extract a value from a VARIANT column 1470 COLON_IS_VARIANT_EXTRACT = False 1471 1472 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1473 # If this is True and '(' is not found, the keyword will be treated as an identifier 1474 VALUES_FOLLOWED_BY_PAREN = True 1475 1476 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1477 SUPPORTS_IMPLICIT_UNNEST = False 1478 1479 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1480 INTERVAL_SPANS = True 1481 1482 # Whether a PARTITION clause can follow a table reference 1483 SUPPORTS_PARTITION_SELECTION = False 1484 1485 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1486 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1487 1488 # Whether the 'AS' keyword is optional in the CTE definition syntax 1489 OPTIONAL_ALIAS_TOKEN_CTE = True 1490 1491 __slots__ = ( 1492 "error_level", 1493 "error_message_context", 1494 "max_errors", 1495 "dialect", 1496 "sql", 1497 "errors", 1498 "_tokens", 1499 "_index", 1500 "_curr", 1501 "_next", 1502 "_prev", 1503 "_prev_comments", 1504 ) 1505 1506 # Autofilled 1507 SHOW_TRIE: t.Dict = {} 1508 SET_TRIE: t.Dict = {} 1509 1510 def __init__( 1511 self, 1512 error_level: t.Optional[ErrorLevel] = None, 1513 error_message_context: int = 100, 1514 max_errors: int = 3, 1515 dialect: DialectType = None, 1516 ): 1517 from sqlglot.dialects import Dialect 1518 1519 self.error_level = error_level or ErrorLevel.IMMEDIATE 1520 self.error_message_context = error_message_context 1521 self.max_errors = max_errors 1522 self.dialect = Dialect.get_or_raise(dialect) 1523 self.reset() 1524 1525 def reset(self): 1526 self.sql = "" 1527 self.errors = [] 1528 self._tokens = [] 1529 self._index = 0 1530 self._curr = None 1531 self._next = None 1532 self._prev = None 1533 self._prev_comments = None 1534 1535 def parse( 1536 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1537 ) -> t.List[t.Optional[exp.Expression]]: 1538 """ 1539 Parses a list of tokens and returns a list of syntax trees, one tree 1540 per parsed SQL statement. 1541 1542 Args: 1543 raw_tokens: The list of tokens. 1544 sql: The original SQL string, used to produce helpful debug messages. 1545 1546 Returns: 1547 The list of the produced syntax trees. 1548 """ 1549 return self._parse( 1550 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1551 ) 1552 1553 def parse_into( 1554 self, 1555 expression_types: exp.IntoType, 1556 raw_tokens: t.List[Token], 1557 sql: t.Optional[str] = None, 1558 ) -> t.List[t.Optional[exp.Expression]]: 1559 """ 1560 Parses a list of tokens into a given Expression type. If a collection of Expression 1561 types is given instead, this method will try to parse the token list into each one 1562 of them, stopping at the first for which the parsing succeeds. 1563 1564 Args: 1565 expression_types: The expression type(s) to try and parse the token list into. 1566 raw_tokens: The list of tokens. 1567 sql: The original SQL string, used to produce helpful debug messages. 1568 1569 Returns: 1570 The target Expression. 1571 """ 1572 errors = [] 1573 for expression_type in ensure_list(expression_types): 1574 parser = self.EXPRESSION_PARSERS.get(expression_type) 1575 if not parser: 1576 raise TypeError(f"No parser registered for {expression_type}") 1577 1578 try: 1579 return self._parse(parser, raw_tokens, sql) 1580 except ParseError as e: 1581 e.errors[0]["into_expression"] = expression_type 1582 errors.append(e) 1583 1584 raise ParseError( 1585 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1586 errors=merge_errors(errors), 1587 ) from errors[-1] 1588 1589 def _parse( 1590 self, 1591 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1592 raw_tokens: t.List[Token], 1593 sql: t.Optional[str] = None, 1594 ) -> t.List[t.Optional[exp.Expression]]: 1595 self.reset() 1596 self.sql = sql or "" 1597 1598 total = len(raw_tokens) 1599 chunks: t.List[t.List[Token]] = [[]] 1600 1601 for i, token in enumerate(raw_tokens): 1602 if token.token_type == TokenType.SEMICOLON: 1603 if token.comments: 1604 chunks.append([token]) 1605 1606 if i < total - 1: 1607 chunks.append([]) 1608 else: 1609 chunks[-1].append(token) 1610 1611 expressions = [] 1612 1613 for tokens in chunks: 1614 self._index = -1 1615 self._tokens = tokens 1616 self._advance() 1617 1618 expressions.append(parse_method(self)) 1619 1620 if self._index < len(self._tokens): 1621 self.raise_error("Invalid expression / Unexpected token") 1622 1623 self.check_errors() 1624 1625 return expressions 1626 1627 def check_errors(self) -> None: 1628 """Logs or raises any found errors, depending on the chosen error level setting.""" 1629 if self.error_level == ErrorLevel.WARN: 1630 for error in self.errors: 1631 logger.error(str(error)) 1632 elif self.error_level == ErrorLevel.RAISE and self.errors: 1633 raise ParseError( 1634 concat_messages(self.errors, self.max_errors), 1635 errors=merge_errors(self.errors), 1636 ) 1637 1638 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1639 """ 1640 Appends an error in the list of recorded errors or raises it, depending on the chosen 1641 error level setting. 1642 """ 1643 token = token or self._curr or self._prev or Token.string("") 1644 start = token.start 1645 end = token.end + 1 1646 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1647 highlight = self.sql[start:end] 1648 end_context = self.sql[end : end + self.error_message_context] 1649 1650 error = ParseError.new( 1651 f"{message}. Line {token.line}, Col: {token.col}.\n" 1652 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1653 description=message, 1654 line=token.line, 1655 col=token.col, 1656 start_context=start_context, 1657 highlight=highlight, 1658 end_context=end_context, 1659 ) 1660 1661 if self.error_level == ErrorLevel.IMMEDIATE: 1662 raise error 1663 1664 self.errors.append(error) 1665 1666 def expression( 1667 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1668 ) -> E: 1669 """ 1670 Creates a new, validated Expression. 1671 1672 Args: 1673 exp_class: The expression class to instantiate. 1674 comments: An optional list of comments to attach to the expression. 1675 kwargs: The arguments to set for the expression along with their respective values. 1676 1677 Returns: 1678 The target expression. 1679 """ 1680 instance = exp_class(**kwargs) 1681 instance.add_comments(comments) if comments else self._add_comments(instance) 1682 return self.validate_expression(instance) 1683 1684 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1685 if expression and self._prev_comments: 1686 expression.add_comments(self._prev_comments) 1687 self._prev_comments = None 1688 1689 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1690 """ 1691 Validates an Expression, making sure that all its mandatory arguments are set. 1692 1693 Args: 1694 expression: The expression to validate. 1695 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1696 1697 Returns: 1698 The validated expression. 1699 """ 1700 if self.error_level != ErrorLevel.IGNORE: 1701 for error_message in expression.error_messages(args): 1702 self.raise_error(error_message) 1703 1704 return expression 1705 1706 def _find_sql(self, start: Token, end: Token) -> str: 1707 return self.sql[start.start : end.end + 1] 1708 1709 def _is_connected(self) -> bool: 1710 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1711 1712 def _advance(self, times: int = 1) -> None: 1713 self._index += times 1714 self._curr = seq_get(self._tokens, self._index) 1715 self._next = seq_get(self._tokens, self._index + 1) 1716 1717 if self._index > 0: 1718 self._prev = self._tokens[self._index - 1] 1719 self._prev_comments = self._prev.comments 1720 else: 1721 self._prev = None 1722 self._prev_comments = None 1723 1724 def _retreat(self, index: int) -> None: 1725 if index != self._index: 1726 self._advance(index - self._index) 1727 1728 def _warn_unsupported(self) -> None: 1729 if len(self._tokens) <= 1: 1730 return 1731 1732 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1733 # interested in emitting a warning for the one being currently processed. 1734 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1735 1736 logger.warning( 1737 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1738 ) 1739 1740 def _parse_command(self) -> exp.Command: 1741 self._warn_unsupported() 1742 return self.expression( 1743 exp.Command, 1744 comments=self._prev_comments, 1745 this=self._prev.text.upper(), 1746 expression=self._parse_string(), 1747 ) 1748 1749 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1750 """ 1751 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1752 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1753 solve this by setting & resetting the parser state accordingly 1754 """ 1755 index = self._index 1756 error_level = self.error_level 1757 1758 self.error_level = ErrorLevel.IMMEDIATE 1759 try: 1760 this = parse_method() 1761 except ParseError: 1762 this = None 1763 finally: 1764 if not this or retreat: 1765 self._retreat(index) 1766 self.error_level = error_level 1767 1768 return this 1769 1770 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1771 start = self._prev 1772 exists = self._parse_exists() if allow_exists else None 1773 1774 self._match(TokenType.ON) 1775 1776 materialized = self._match_text_seq("MATERIALIZED") 1777 kind = self._match_set(self.CREATABLES) and self._prev 1778 if not kind: 1779 return self._parse_as_command(start) 1780 1781 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1782 this = self._parse_user_defined_function(kind=kind.token_type) 1783 elif kind.token_type == TokenType.TABLE: 1784 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1785 elif kind.token_type == TokenType.COLUMN: 1786 this = self._parse_column() 1787 else: 1788 this = self._parse_id_var() 1789 1790 self._match(TokenType.IS) 1791 1792 return self.expression( 1793 exp.Comment, 1794 this=this, 1795 kind=kind.text, 1796 expression=self._parse_string(), 1797 exists=exists, 1798 materialized=materialized, 1799 ) 1800 1801 def _parse_to_table( 1802 self, 1803 ) -> exp.ToTableProperty: 1804 table = self._parse_table_parts(schema=True) 1805 return self.expression(exp.ToTableProperty, this=table) 1806 1807 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1808 def _parse_ttl(self) -> exp.Expression: 1809 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1810 this = self._parse_bitwise() 1811 1812 if self._match_text_seq("DELETE"): 1813 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1814 if self._match_text_seq("RECOMPRESS"): 1815 return self.expression( 1816 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1817 ) 1818 if self._match_text_seq("TO", "DISK"): 1819 return self.expression( 1820 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1821 ) 1822 if self._match_text_seq("TO", "VOLUME"): 1823 return self.expression( 1824 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1825 ) 1826 1827 return this 1828 1829 expressions = self._parse_csv(_parse_ttl_action) 1830 where = self._parse_where() 1831 group = self._parse_group() 1832 1833 aggregates = None 1834 if group and self._match(TokenType.SET): 1835 aggregates = self._parse_csv(self._parse_set_item) 1836 1837 return self.expression( 1838 exp.MergeTreeTTL, 1839 expressions=expressions, 1840 where=where, 1841 group=group, 1842 aggregates=aggregates, 1843 ) 1844 1845 def _parse_statement(self) -> t.Optional[exp.Expression]: 1846 if self._curr is None: 1847 return None 1848 1849 if self._match_set(self.STATEMENT_PARSERS): 1850 comments = self._prev_comments 1851 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1852 stmt.add_comments(comments, prepend=True) 1853 return stmt 1854 1855 if self._match_set(self.dialect.tokenizer.COMMANDS): 1856 return self._parse_command() 1857 1858 expression = self._parse_expression() 1859 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1860 return self._parse_query_modifiers(expression) 1861 1862 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1863 start = self._prev 1864 temporary = self._match(TokenType.TEMPORARY) 1865 materialized = self._match_text_seq("MATERIALIZED") 1866 1867 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1868 if not kind: 1869 return self._parse_as_command(start) 1870 1871 concurrently = self._match_text_seq("CONCURRENTLY") 1872 if_exists = exists or self._parse_exists() 1873 1874 if kind == "COLUMN": 1875 this = self._parse_column() 1876 else: 1877 this = self._parse_table_parts( 1878 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1879 ) 1880 1881 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1882 1883 if self._match(TokenType.L_PAREN, advance=False): 1884 expressions = self._parse_wrapped_csv(self._parse_types) 1885 else: 1886 expressions = None 1887 1888 return self.expression( 1889 exp.Drop, 1890 exists=if_exists, 1891 this=this, 1892 expressions=expressions, 1893 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1894 temporary=temporary, 1895 materialized=materialized, 1896 cascade=self._match_text_seq("CASCADE"), 1897 constraints=self._match_text_seq("CONSTRAINTS"), 1898 purge=self._match_text_seq("PURGE"), 1899 cluster=cluster, 1900 concurrently=concurrently, 1901 ) 1902 1903 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1904 return ( 1905 self._match_text_seq("IF") 1906 and (not not_ or self._match(TokenType.NOT)) 1907 and self._match(TokenType.EXISTS) 1908 ) 1909 1910 def _parse_create(self) -> exp.Create | exp.Command: 1911 # Note: this can't be None because we've matched a statement parser 1912 start = self._prev 1913 1914 replace = ( 1915 start.token_type == TokenType.REPLACE 1916 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1917 or self._match_pair(TokenType.OR, TokenType.ALTER) 1918 ) 1919 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1920 1921 unique = self._match(TokenType.UNIQUE) 1922 1923 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1924 clustered = True 1925 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1926 "COLUMNSTORE" 1927 ): 1928 clustered = False 1929 else: 1930 clustered = None 1931 1932 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1933 self._advance() 1934 1935 properties = None 1936 create_token = self._match_set(self.CREATABLES) and self._prev 1937 1938 if not create_token: 1939 # exp.Properties.Location.POST_CREATE 1940 properties = self._parse_properties() 1941 create_token = self._match_set(self.CREATABLES) and self._prev 1942 1943 if not properties or not create_token: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 exists = self._parse_exists(not_=True) 1948 this = None 1949 expression: t.Optional[exp.Expression] = None 1950 indexes = None 1951 no_schema_binding = None 1952 begin = None 1953 end = None 1954 clone = None 1955 1956 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1957 nonlocal properties 1958 if properties and temp_props: 1959 properties.expressions.extend(temp_props.expressions) 1960 elif temp_props: 1961 properties = temp_props 1962 1963 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1964 this = self._parse_user_defined_function(kind=create_token.token_type) 1965 1966 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1967 extend_props(self._parse_properties()) 1968 1969 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1970 extend_props(self._parse_properties()) 1971 1972 if not expression: 1973 if self._match(TokenType.COMMAND): 1974 expression = self._parse_as_command(self._prev) 1975 else: 1976 begin = self._match(TokenType.BEGIN) 1977 return_ = self._match_text_seq("RETURN") 1978 1979 if self._match(TokenType.STRING, advance=False): 1980 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1981 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1982 expression = self._parse_string() 1983 extend_props(self._parse_properties()) 1984 else: 1985 expression = self._parse_user_defined_function_expression() 1986 1987 end = self._match_text_seq("END") 1988 1989 if return_: 1990 expression = self.expression(exp.Return, this=expression) 1991 elif create_token.token_type == TokenType.INDEX: 1992 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1993 if not self._match(TokenType.ON): 1994 index = self._parse_id_var() 1995 anonymous = False 1996 else: 1997 index = None 1998 anonymous = True 1999 2000 this = self._parse_index(index=index, anonymous=anonymous) 2001 elif create_token.token_type in self.DB_CREATABLES: 2002 table_parts = self._parse_table_parts( 2003 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2004 ) 2005 2006 # exp.Properties.Location.POST_NAME 2007 self._match(TokenType.COMMA) 2008 extend_props(self._parse_properties(before=True)) 2009 2010 this = self._parse_schema(this=table_parts) 2011 2012 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2013 extend_props(self._parse_properties()) 2014 2015 has_alias = self._match(TokenType.ALIAS) 2016 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2017 # exp.Properties.Location.POST_ALIAS 2018 extend_props(self._parse_properties()) 2019 2020 if create_token.token_type == TokenType.SEQUENCE: 2021 expression = self._parse_types() 2022 extend_props(self._parse_properties()) 2023 else: 2024 expression = self._parse_ddl_select() 2025 2026 # Some dialects also support using a table as an alias instead of a SELECT. 2027 # Here we fallback to this as an alternative. 2028 if not expression and has_alias: 2029 expression = self._try_parse(self._parse_table_parts) 2030 2031 if create_token.token_type == TokenType.TABLE: 2032 # exp.Properties.Location.POST_EXPRESSION 2033 extend_props(self._parse_properties()) 2034 2035 indexes = [] 2036 while True: 2037 index = self._parse_index() 2038 2039 # exp.Properties.Location.POST_INDEX 2040 extend_props(self._parse_properties()) 2041 if not index: 2042 break 2043 else: 2044 self._match(TokenType.COMMA) 2045 indexes.append(index) 2046 elif create_token.token_type == TokenType.VIEW: 2047 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2048 no_schema_binding = True 2049 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2050 extend_props(self._parse_properties()) 2051 2052 shallow = self._match_text_seq("SHALLOW") 2053 2054 if self._match_texts(self.CLONE_KEYWORDS): 2055 copy = self._prev.text.lower() == "copy" 2056 clone = self.expression( 2057 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2058 ) 2059 2060 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2061 return self._parse_as_command(start) 2062 2063 create_kind_text = create_token.text.upper() 2064 return self.expression( 2065 exp.Create, 2066 this=this, 2067 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2068 replace=replace, 2069 refresh=refresh, 2070 unique=unique, 2071 expression=expression, 2072 exists=exists, 2073 properties=properties, 2074 indexes=indexes, 2075 no_schema_binding=no_schema_binding, 2076 begin=begin, 2077 end=end, 2078 clone=clone, 2079 concurrently=concurrently, 2080 clustered=clustered, 2081 ) 2082 2083 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2084 seq = exp.SequenceProperties() 2085 2086 options = [] 2087 index = self._index 2088 2089 while self._curr: 2090 self._match(TokenType.COMMA) 2091 if self._match_text_seq("INCREMENT"): 2092 self._match_text_seq("BY") 2093 self._match_text_seq("=") 2094 seq.set("increment", self._parse_term()) 2095 elif self._match_text_seq("MINVALUE"): 2096 seq.set("minvalue", self._parse_term()) 2097 elif self._match_text_seq("MAXVALUE"): 2098 seq.set("maxvalue", self._parse_term()) 2099 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2100 self._match_text_seq("=") 2101 seq.set("start", self._parse_term()) 2102 elif self._match_text_seq("CACHE"): 2103 # T-SQL allows empty CACHE which is initialized dynamically 2104 seq.set("cache", self._parse_number() or True) 2105 elif self._match_text_seq("OWNED", "BY"): 2106 # "OWNED BY NONE" is the default 2107 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2108 else: 2109 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2110 if opt: 2111 options.append(opt) 2112 else: 2113 break 2114 2115 seq.set("options", options if options else None) 2116 return None if self._index == index else seq 2117 2118 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2119 # only used for teradata currently 2120 self._match(TokenType.COMMA) 2121 2122 kwargs = { 2123 "no": self._match_text_seq("NO"), 2124 "dual": self._match_text_seq("DUAL"), 2125 "before": self._match_text_seq("BEFORE"), 2126 "default": self._match_text_seq("DEFAULT"), 2127 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2128 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2129 "after": self._match_text_seq("AFTER"), 2130 "minimum": self._match_texts(("MIN", "MINIMUM")), 2131 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2132 } 2133 2134 if self._match_texts(self.PROPERTY_PARSERS): 2135 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2136 try: 2137 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2138 except TypeError: 2139 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2140 2141 return None 2142 2143 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2144 return self._parse_wrapped_csv(self._parse_property) 2145 2146 def _parse_property(self) -> t.Optional[exp.Expression]: 2147 if self._match_texts(self.PROPERTY_PARSERS): 2148 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2149 2150 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2151 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2152 2153 if self._match_text_seq("COMPOUND", "SORTKEY"): 2154 return self._parse_sortkey(compound=True) 2155 2156 if self._match_text_seq("SQL", "SECURITY"): 2157 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2158 2159 index = self._index 2160 key = self._parse_column() 2161 2162 if not self._match(TokenType.EQ): 2163 self._retreat(index) 2164 return self._parse_sequence_properties() 2165 2166 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2167 if isinstance(key, exp.Column): 2168 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2169 2170 value = self._parse_bitwise() or self._parse_var(any_token=True) 2171 2172 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2173 if isinstance(value, exp.Column): 2174 value = exp.var(value.name) 2175 2176 return self.expression(exp.Property, this=key, value=value) 2177 2178 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2179 if self._match_text_seq("BY"): 2180 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2181 2182 self._match(TokenType.ALIAS) 2183 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2184 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2185 2186 return self.expression( 2187 exp.FileFormatProperty, 2188 this=( 2189 self.expression( 2190 exp.InputOutputFormat, 2191 input_format=input_format, 2192 output_format=output_format, 2193 ) 2194 if input_format or output_format 2195 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2196 ), 2197 ) 2198 2199 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2200 field = self._parse_field() 2201 if isinstance(field, exp.Identifier) and not field.quoted: 2202 field = exp.var(field) 2203 2204 return field 2205 2206 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2207 self._match(TokenType.EQ) 2208 self._match(TokenType.ALIAS) 2209 2210 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2211 2212 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2213 properties = [] 2214 while True: 2215 if before: 2216 prop = self._parse_property_before() 2217 else: 2218 prop = self._parse_property() 2219 if not prop: 2220 break 2221 for p in ensure_list(prop): 2222 properties.append(p) 2223 2224 if properties: 2225 return self.expression(exp.Properties, expressions=properties) 2226 2227 return None 2228 2229 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2230 return self.expression( 2231 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2232 ) 2233 2234 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2235 if self._match_texts(("DEFINER", "INVOKER")): 2236 security_specifier = self._prev.text.upper() 2237 return self.expression(exp.SecurityProperty, this=security_specifier) 2238 return None 2239 2240 def _parse_settings_property(self) -> exp.SettingsProperty: 2241 return self.expression( 2242 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2243 ) 2244 2245 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2246 if self._index >= 2: 2247 pre_volatile_token = self._tokens[self._index - 2] 2248 else: 2249 pre_volatile_token = None 2250 2251 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2252 return exp.VolatileProperty() 2253 2254 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2255 2256 def _parse_retention_period(self) -> exp.Var: 2257 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2258 number = self._parse_number() 2259 number_str = f"{number} " if number else "" 2260 unit = self._parse_var(any_token=True) 2261 return exp.var(f"{number_str}{unit}") 2262 2263 def _parse_system_versioning_property( 2264 self, with_: bool = False 2265 ) -> exp.WithSystemVersioningProperty: 2266 self._match(TokenType.EQ) 2267 prop = self.expression( 2268 exp.WithSystemVersioningProperty, 2269 **{ # type: ignore 2270 "on": True, 2271 "with": with_, 2272 }, 2273 ) 2274 2275 if self._match_text_seq("OFF"): 2276 prop.set("on", False) 2277 return prop 2278 2279 self._match(TokenType.ON) 2280 if self._match(TokenType.L_PAREN): 2281 while self._curr and not self._match(TokenType.R_PAREN): 2282 if self._match_text_seq("HISTORY_TABLE", "="): 2283 prop.set("this", self._parse_table_parts()) 2284 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2285 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2286 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2287 prop.set("retention_period", self._parse_retention_period()) 2288 2289 self._match(TokenType.COMMA) 2290 2291 return prop 2292 2293 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2294 self._match(TokenType.EQ) 2295 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2296 prop = self.expression(exp.DataDeletionProperty, on=on) 2297 2298 if self._match(TokenType.L_PAREN): 2299 while self._curr and not self._match(TokenType.R_PAREN): 2300 if self._match_text_seq("FILTER_COLUMN", "="): 2301 prop.set("filter_column", self._parse_column()) 2302 elif self._match_text_seq("RETENTION_PERIOD", "="): 2303 prop.set("retention_period", self._parse_retention_period()) 2304 2305 self._match(TokenType.COMMA) 2306 2307 return prop 2308 2309 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2310 kind = "HASH" 2311 expressions: t.Optional[t.List[exp.Expression]] = None 2312 if self._match_text_seq("BY", "HASH"): 2313 expressions = self._parse_wrapped_csv(self._parse_id_var) 2314 elif self._match_text_seq("BY", "RANDOM"): 2315 kind = "RANDOM" 2316 2317 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2318 buckets: t.Optional[exp.Expression] = None 2319 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2320 buckets = self._parse_number() 2321 2322 return self.expression( 2323 exp.DistributedByProperty, 2324 expressions=expressions, 2325 kind=kind, 2326 buckets=buckets, 2327 order=self._parse_order(), 2328 ) 2329 2330 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2331 self._match_text_seq("KEY") 2332 expressions = self._parse_wrapped_id_vars() 2333 return self.expression(expr_type, expressions=expressions) 2334 2335 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2336 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2337 prop = self._parse_system_versioning_property(with_=True) 2338 self._match_r_paren() 2339 return prop 2340 2341 if self._match(TokenType.L_PAREN, advance=False): 2342 return self._parse_wrapped_properties() 2343 2344 if self._match_text_seq("JOURNAL"): 2345 return self._parse_withjournaltable() 2346 2347 if self._match_texts(self.VIEW_ATTRIBUTES): 2348 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2349 2350 if self._match_text_seq("DATA"): 2351 return self._parse_withdata(no=False) 2352 elif self._match_text_seq("NO", "DATA"): 2353 return self._parse_withdata(no=True) 2354 2355 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2356 return self._parse_serde_properties(with_=True) 2357 2358 if self._match(TokenType.SCHEMA): 2359 return self.expression( 2360 exp.WithSchemaBindingProperty, 2361 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2362 ) 2363 2364 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2365 return self.expression( 2366 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2367 ) 2368 2369 if not self._next: 2370 return None 2371 2372 return self._parse_withisolatedloading() 2373 2374 def _parse_procedure_option(self) -> exp.Expression | None: 2375 if self._match_text_seq("EXECUTE", "AS"): 2376 return self.expression( 2377 exp.ExecuteAsProperty, 2378 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2379 or self._parse_string(), 2380 ) 2381 2382 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2383 2384 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2385 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2386 self._match(TokenType.EQ) 2387 2388 user = self._parse_id_var() 2389 self._match(TokenType.PARAMETER) 2390 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2391 2392 if not user or not host: 2393 return None 2394 2395 return exp.DefinerProperty(this=f"{user}@{host}") 2396 2397 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2398 self._match(TokenType.TABLE) 2399 self._match(TokenType.EQ) 2400 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2401 2402 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2403 return self.expression(exp.LogProperty, no=no) 2404 2405 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2406 return self.expression(exp.JournalProperty, **kwargs) 2407 2408 def _parse_checksum(self) -> exp.ChecksumProperty: 2409 self._match(TokenType.EQ) 2410 2411 on = None 2412 if self._match(TokenType.ON): 2413 on = True 2414 elif self._match_text_seq("OFF"): 2415 on = False 2416 2417 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2418 2419 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2420 return self.expression( 2421 exp.Cluster, 2422 expressions=( 2423 self._parse_wrapped_csv(self._parse_ordered) 2424 if wrapped 2425 else self._parse_csv(self._parse_ordered) 2426 ), 2427 ) 2428 2429 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2430 self._match_text_seq("BY") 2431 2432 self._match_l_paren() 2433 expressions = self._parse_csv(self._parse_column) 2434 self._match_r_paren() 2435 2436 if self._match_text_seq("SORTED", "BY"): 2437 self._match_l_paren() 2438 sorted_by = self._parse_csv(self._parse_ordered) 2439 self._match_r_paren() 2440 else: 2441 sorted_by = None 2442 2443 self._match(TokenType.INTO) 2444 buckets = self._parse_number() 2445 self._match_text_seq("BUCKETS") 2446 2447 return self.expression( 2448 exp.ClusteredByProperty, 2449 expressions=expressions, 2450 sorted_by=sorted_by, 2451 buckets=buckets, 2452 ) 2453 2454 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2455 if not self._match_text_seq("GRANTS"): 2456 self._retreat(self._index - 1) 2457 return None 2458 2459 return self.expression(exp.CopyGrantsProperty) 2460 2461 def _parse_freespace(self) -> exp.FreespaceProperty: 2462 self._match(TokenType.EQ) 2463 return self.expression( 2464 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2465 ) 2466 2467 def _parse_mergeblockratio( 2468 self, no: bool = False, default: bool = False 2469 ) -> exp.MergeBlockRatioProperty: 2470 if self._match(TokenType.EQ): 2471 return self.expression( 2472 exp.MergeBlockRatioProperty, 2473 this=self._parse_number(), 2474 percent=self._match(TokenType.PERCENT), 2475 ) 2476 2477 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2478 2479 def _parse_datablocksize( 2480 self, 2481 default: t.Optional[bool] = None, 2482 minimum: t.Optional[bool] = None, 2483 maximum: t.Optional[bool] = None, 2484 ) -> exp.DataBlocksizeProperty: 2485 self._match(TokenType.EQ) 2486 size = self._parse_number() 2487 2488 units = None 2489 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2490 units = self._prev.text 2491 2492 return self.expression( 2493 exp.DataBlocksizeProperty, 2494 size=size, 2495 units=units, 2496 default=default, 2497 minimum=minimum, 2498 maximum=maximum, 2499 ) 2500 2501 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2502 self._match(TokenType.EQ) 2503 always = self._match_text_seq("ALWAYS") 2504 manual = self._match_text_seq("MANUAL") 2505 never = self._match_text_seq("NEVER") 2506 default = self._match_text_seq("DEFAULT") 2507 2508 autotemp = None 2509 if self._match_text_seq("AUTOTEMP"): 2510 autotemp = self._parse_schema() 2511 2512 return self.expression( 2513 exp.BlockCompressionProperty, 2514 always=always, 2515 manual=manual, 2516 never=never, 2517 default=default, 2518 autotemp=autotemp, 2519 ) 2520 2521 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2522 index = self._index 2523 no = self._match_text_seq("NO") 2524 concurrent = self._match_text_seq("CONCURRENT") 2525 2526 if not self._match_text_seq("ISOLATED", "LOADING"): 2527 self._retreat(index) 2528 return None 2529 2530 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2531 return self.expression( 2532 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2533 ) 2534 2535 def _parse_locking(self) -> exp.LockingProperty: 2536 if self._match(TokenType.TABLE): 2537 kind = "TABLE" 2538 elif self._match(TokenType.VIEW): 2539 kind = "VIEW" 2540 elif self._match(TokenType.ROW): 2541 kind = "ROW" 2542 elif self._match_text_seq("DATABASE"): 2543 kind = "DATABASE" 2544 else: 2545 kind = None 2546 2547 if kind in ("DATABASE", "TABLE", "VIEW"): 2548 this = self._parse_table_parts() 2549 else: 2550 this = None 2551 2552 if self._match(TokenType.FOR): 2553 for_or_in = "FOR" 2554 elif self._match(TokenType.IN): 2555 for_or_in = "IN" 2556 else: 2557 for_or_in = None 2558 2559 if self._match_text_seq("ACCESS"): 2560 lock_type = "ACCESS" 2561 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2562 lock_type = "EXCLUSIVE" 2563 elif self._match_text_seq("SHARE"): 2564 lock_type = "SHARE" 2565 elif self._match_text_seq("READ"): 2566 lock_type = "READ" 2567 elif self._match_text_seq("WRITE"): 2568 lock_type = "WRITE" 2569 elif self._match_text_seq("CHECKSUM"): 2570 lock_type = "CHECKSUM" 2571 else: 2572 lock_type = None 2573 2574 override = self._match_text_seq("OVERRIDE") 2575 2576 return self.expression( 2577 exp.LockingProperty, 2578 this=this, 2579 kind=kind, 2580 for_or_in=for_or_in, 2581 lock_type=lock_type, 2582 override=override, 2583 ) 2584 2585 def _parse_partition_by(self) -> t.List[exp.Expression]: 2586 if self._match(TokenType.PARTITION_BY): 2587 return self._parse_csv(self._parse_assignment) 2588 return [] 2589 2590 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2591 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2592 if self._match_text_seq("MINVALUE"): 2593 return exp.var("MINVALUE") 2594 if self._match_text_seq("MAXVALUE"): 2595 return exp.var("MAXVALUE") 2596 return self._parse_bitwise() 2597 2598 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2599 expression = None 2600 from_expressions = None 2601 to_expressions = None 2602 2603 if self._match(TokenType.IN): 2604 this = self._parse_wrapped_csv(self._parse_bitwise) 2605 elif self._match(TokenType.FROM): 2606 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2607 self._match_text_seq("TO") 2608 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2609 elif self._match_text_seq("WITH", "(", "MODULUS"): 2610 this = self._parse_number() 2611 self._match_text_seq(",", "REMAINDER") 2612 expression = self._parse_number() 2613 self._match_r_paren() 2614 else: 2615 self.raise_error("Failed to parse partition bound spec.") 2616 2617 return self.expression( 2618 exp.PartitionBoundSpec, 2619 this=this, 2620 expression=expression, 2621 from_expressions=from_expressions, 2622 to_expressions=to_expressions, 2623 ) 2624 2625 # https://www.postgresql.org/docs/current/sql-createtable.html 2626 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2627 if not self._match_text_seq("OF"): 2628 self._retreat(self._index - 1) 2629 return None 2630 2631 this = self._parse_table(schema=True) 2632 2633 if self._match(TokenType.DEFAULT): 2634 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2635 elif self._match_text_seq("FOR", "VALUES"): 2636 expression = self._parse_partition_bound_spec() 2637 else: 2638 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2639 2640 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2641 2642 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2643 self._match(TokenType.EQ) 2644 return self.expression( 2645 exp.PartitionedByProperty, 2646 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2647 ) 2648 2649 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2650 if self._match_text_seq("AND", "STATISTICS"): 2651 statistics = True 2652 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2653 statistics = False 2654 else: 2655 statistics = None 2656 2657 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2658 2659 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2660 if self._match_text_seq("SQL"): 2661 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2662 return None 2663 2664 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2665 if self._match_text_seq("SQL", "DATA"): 2666 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2667 return None 2668 2669 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2670 if self._match_text_seq("PRIMARY", "INDEX"): 2671 return exp.NoPrimaryIndexProperty() 2672 if self._match_text_seq("SQL"): 2673 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2674 return None 2675 2676 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2677 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2678 return exp.OnCommitProperty() 2679 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2680 return exp.OnCommitProperty(delete=True) 2681 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2682 2683 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2684 if self._match_text_seq("SQL", "DATA"): 2685 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2686 return None 2687 2688 def _parse_distkey(self) -> exp.DistKeyProperty: 2689 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2690 2691 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2692 table = self._parse_table(schema=True) 2693 2694 options = [] 2695 while self._match_texts(("INCLUDING", "EXCLUDING")): 2696 this = self._prev.text.upper() 2697 2698 id_var = self._parse_id_var() 2699 if not id_var: 2700 return None 2701 2702 options.append( 2703 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2704 ) 2705 2706 return self.expression(exp.LikeProperty, this=table, expressions=options) 2707 2708 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2709 return self.expression( 2710 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2711 ) 2712 2713 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2714 self._match(TokenType.EQ) 2715 return self.expression( 2716 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2717 ) 2718 2719 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2720 self._match_text_seq("WITH", "CONNECTION") 2721 return self.expression( 2722 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2723 ) 2724 2725 def _parse_returns(self) -> exp.ReturnsProperty: 2726 value: t.Optional[exp.Expression] 2727 null = None 2728 is_table = self._match(TokenType.TABLE) 2729 2730 if is_table: 2731 if self._match(TokenType.LT): 2732 value = self.expression( 2733 exp.Schema, 2734 this="TABLE", 2735 expressions=self._parse_csv(self._parse_struct_types), 2736 ) 2737 if not self._match(TokenType.GT): 2738 self.raise_error("Expecting >") 2739 else: 2740 value = self._parse_schema(exp.var("TABLE")) 2741 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2742 null = True 2743 value = None 2744 else: 2745 value = self._parse_types() 2746 2747 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2748 2749 def _parse_describe(self) -> exp.Describe: 2750 kind = self._match_set(self.CREATABLES) and self._prev.text 2751 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2752 if self._match(TokenType.DOT): 2753 style = None 2754 self._retreat(self._index - 2) 2755 2756 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2757 2758 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2759 this = self._parse_statement() 2760 else: 2761 this = self._parse_table(schema=True) 2762 2763 properties = self._parse_properties() 2764 expressions = properties.expressions if properties else None 2765 partition = self._parse_partition() 2766 return self.expression( 2767 exp.Describe, 2768 this=this, 2769 style=style, 2770 kind=kind, 2771 expressions=expressions, 2772 partition=partition, 2773 format=format, 2774 ) 2775 2776 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2777 kind = self._prev.text.upper() 2778 expressions = [] 2779 2780 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2781 if self._match(TokenType.WHEN): 2782 expression = self._parse_disjunction() 2783 self._match(TokenType.THEN) 2784 else: 2785 expression = None 2786 2787 else_ = self._match(TokenType.ELSE) 2788 2789 if not self._match(TokenType.INTO): 2790 return None 2791 2792 return self.expression( 2793 exp.ConditionalInsert, 2794 this=self.expression( 2795 exp.Insert, 2796 this=self._parse_table(schema=True), 2797 expression=self._parse_derived_table_values(), 2798 ), 2799 expression=expression, 2800 else_=else_, 2801 ) 2802 2803 expression = parse_conditional_insert() 2804 while expression is not None: 2805 expressions.append(expression) 2806 expression = parse_conditional_insert() 2807 2808 return self.expression( 2809 exp.MultitableInserts, 2810 kind=kind, 2811 comments=comments, 2812 expressions=expressions, 2813 source=self._parse_table(), 2814 ) 2815 2816 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2817 comments = [] 2818 hint = self._parse_hint() 2819 overwrite = self._match(TokenType.OVERWRITE) 2820 ignore = self._match(TokenType.IGNORE) 2821 local = self._match_text_seq("LOCAL") 2822 alternative = None 2823 is_function = None 2824 2825 if self._match_text_seq("DIRECTORY"): 2826 this: t.Optional[exp.Expression] = self.expression( 2827 exp.Directory, 2828 this=self._parse_var_or_string(), 2829 local=local, 2830 row_format=self._parse_row_format(match_row=True), 2831 ) 2832 else: 2833 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2834 comments += ensure_list(self._prev_comments) 2835 return self._parse_multitable_inserts(comments) 2836 2837 if self._match(TokenType.OR): 2838 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2839 2840 self._match(TokenType.INTO) 2841 comments += ensure_list(self._prev_comments) 2842 self._match(TokenType.TABLE) 2843 is_function = self._match(TokenType.FUNCTION) 2844 2845 this = ( 2846 self._parse_table(schema=True, parse_partition=True) 2847 if not is_function 2848 else self._parse_function() 2849 ) 2850 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2851 this.set("alias", self._parse_table_alias()) 2852 2853 returning = self._parse_returning() 2854 2855 return self.expression( 2856 exp.Insert, 2857 comments=comments, 2858 hint=hint, 2859 is_function=is_function, 2860 this=this, 2861 stored=self._match_text_seq("STORED") and self._parse_stored(), 2862 by_name=self._match_text_seq("BY", "NAME"), 2863 exists=self._parse_exists(), 2864 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2865 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2866 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2867 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2868 conflict=self._parse_on_conflict(), 2869 returning=returning or self._parse_returning(), 2870 overwrite=overwrite, 2871 alternative=alternative, 2872 ignore=ignore, 2873 source=self._match(TokenType.TABLE) and self._parse_table(), 2874 ) 2875 2876 def _parse_kill(self) -> exp.Kill: 2877 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2878 2879 return self.expression( 2880 exp.Kill, 2881 this=self._parse_primary(), 2882 kind=kind, 2883 ) 2884 2885 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2886 conflict = self._match_text_seq("ON", "CONFLICT") 2887 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2888 2889 if not conflict and not duplicate: 2890 return None 2891 2892 conflict_keys = None 2893 constraint = None 2894 2895 if conflict: 2896 if self._match_text_seq("ON", "CONSTRAINT"): 2897 constraint = self._parse_id_var() 2898 elif self._match(TokenType.L_PAREN): 2899 conflict_keys = self._parse_csv(self._parse_id_var) 2900 self._match_r_paren() 2901 2902 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2903 if self._prev.token_type == TokenType.UPDATE: 2904 self._match(TokenType.SET) 2905 expressions = self._parse_csv(self._parse_equality) 2906 else: 2907 expressions = None 2908 2909 return self.expression( 2910 exp.OnConflict, 2911 duplicate=duplicate, 2912 expressions=expressions, 2913 action=action, 2914 conflict_keys=conflict_keys, 2915 constraint=constraint, 2916 where=self._parse_where(), 2917 ) 2918 2919 def _parse_returning(self) -> t.Optional[exp.Returning]: 2920 if not self._match(TokenType.RETURNING): 2921 return None 2922 return self.expression( 2923 exp.Returning, 2924 expressions=self._parse_csv(self._parse_expression), 2925 into=self._match(TokenType.INTO) and self._parse_table_part(), 2926 ) 2927 2928 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2929 if not self._match(TokenType.FORMAT): 2930 return None 2931 return self._parse_row_format() 2932 2933 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2934 index = self._index 2935 with_ = with_ or self._match_text_seq("WITH") 2936 2937 if not self._match(TokenType.SERDE_PROPERTIES): 2938 self._retreat(index) 2939 return None 2940 return self.expression( 2941 exp.SerdeProperties, 2942 **{ # type: ignore 2943 "expressions": self._parse_wrapped_properties(), 2944 "with": with_, 2945 }, 2946 ) 2947 2948 def _parse_row_format( 2949 self, match_row: bool = False 2950 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2951 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2952 return None 2953 2954 if self._match_text_seq("SERDE"): 2955 this = self._parse_string() 2956 2957 serde_properties = self._parse_serde_properties() 2958 2959 return self.expression( 2960 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2961 ) 2962 2963 self._match_text_seq("DELIMITED") 2964 2965 kwargs = {} 2966 2967 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2968 kwargs["fields"] = self._parse_string() 2969 if self._match_text_seq("ESCAPED", "BY"): 2970 kwargs["escaped"] = self._parse_string() 2971 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2972 kwargs["collection_items"] = self._parse_string() 2973 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2974 kwargs["map_keys"] = self._parse_string() 2975 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2976 kwargs["lines"] = self._parse_string() 2977 if self._match_text_seq("NULL", "DEFINED", "AS"): 2978 kwargs["null"] = self._parse_string() 2979 2980 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2981 2982 def _parse_load(self) -> exp.LoadData | exp.Command: 2983 if self._match_text_seq("DATA"): 2984 local = self._match_text_seq("LOCAL") 2985 self._match_text_seq("INPATH") 2986 inpath = self._parse_string() 2987 overwrite = self._match(TokenType.OVERWRITE) 2988 self._match_pair(TokenType.INTO, TokenType.TABLE) 2989 2990 return self.expression( 2991 exp.LoadData, 2992 this=self._parse_table(schema=True), 2993 local=local, 2994 overwrite=overwrite, 2995 inpath=inpath, 2996 partition=self._parse_partition(), 2997 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2998 serde=self._match_text_seq("SERDE") and self._parse_string(), 2999 ) 3000 return self._parse_as_command(self._prev) 3001 3002 def _parse_delete(self) -> exp.Delete: 3003 # This handles MySQL's "Multiple-Table Syntax" 3004 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3005 tables = None 3006 if not self._match(TokenType.FROM, advance=False): 3007 tables = self._parse_csv(self._parse_table) or None 3008 3009 returning = self._parse_returning() 3010 3011 return self.expression( 3012 exp.Delete, 3013 tables=tables, 3014 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3015 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3016 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3017 where=self._parse_where(), 3018 returning=returning or self._parse_returning(), 3019 limit=self._parse_limit(), 3020 ) 3021 3022 def _parse_update(self) -> exp.Update: 3023 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3024 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3025 returning = self._parse_returning() 3026 return self.expression( 3027 exp.Update, 3028 **{ # type: ignore 3029 "this": this, 3030 "expressions": expressions, 3031 "from": self._parse_from(joins=True), 3032 "where": self._parse_where(), 3033 "returning": returning or self._parse_returning(), 3034 "order": self._parse_order(), 3035 "limit": self._parse_limit(), 3036 }, 3037 ) 3038 3039 def _parse_use(self) -> exp.Use: 3040 return self.expression( 3041 exp.Use, 3042 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3043 this=self._parse_table(schema=False), 3044 ) 3045 3046 def _parse_uncache(self) -> exp.Uncache: 3047 if not self._match(TokenType.TABLE): 3048 self.raise_error("Expecting TABLE after UNCACHE") 3049 3050 return self.expression( 3051 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3052 ) 3053 3054 def _parse_cache(self) -> exp.Cache: 3055 lazy = self._match_text_seq("LAZY") 3056 self._match(TokenType.TABLE) 3057 table = self._parse_table(schema=True) 3058 3059 options = [] 3060 if self._match_text_seq("OPTIONS"): 3061 self._match_l_paren() 3062 k = self._parse_string() 3063 self._match(TokenType.EQ) 3064 v = self._parse_string() 3065 options = [k, v] 3066 self._match_r_paren() 3067 3068 self._match(TokenType.ALIAS) 3069 return self.expression( 3070 exp.Cache, 3071 this=table, 3072 lazy=lazy, 3073 options=options, 3074 expression=self._parse_select(nested=True), 3075 ) 3076 3077 def _parse_partition(self) -> t.Optional[exp.Partition]: 3078 if not self._match_texts(self.PARTITION_KEYWORDS): 3079 return None 3080 3081 return self.expression( 3082 exp.Partition, 3083 subpartition=self._prev.text.upper() == "SUBPARTITION", 3084 expressions=self._parse_wrapped_csv(self._parse_assignment), 3085 ) 3086 3087 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3088 def _parse_value_expression() -> t.Optional[exp.Expression]: 3089 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3090 return exp.var(self._prev.text.upper()) 3091 return self._parse_expression() 3092 3093 if self._match(TokenType.L_PAREN): 3094 expressions = self._parse_csv(_parse_value_expression) 3095 self._match_r_paren() 3096 return self.expression(exp.Tuple, expressions=expressions) 3097 3098 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3099 expression = self._parse_expression() 3100 if expression: 3101 return self.expression(exp.Tuple, expressions=[expression]) 3102 return None 3103 3104 def _parse_projections(self) -> t.List[exp.Expression]: 3105 return self._parse_expressions() 3106 3107 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3108 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3109 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3110 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3111 ) 3112 elif self._match(TokenType.FROM): 3113 from_ = self._parse_from(skip_from_token=True) 3114 # Support parentheses for duckdb FROM-first syntax 3115 select = self._parse_select() 3116 if select: 3117 select.set("from", from_) 3118 this = select 3119 else: 3120 this = exp.select("*").from_(t.cast(exp.From, from_)) 3121 else: 3122 this = ( 3123 self._parse_table() 3124 if table 3125 else self._parse_select(nested=True, parse_set_operation=False) 3126 ) 3127 3128 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3129 # in case a modifier (e.g. join) is following 3130 if table and isinstance(this, exp.Values) and this.alias: 3131 alias = this.args["alias"].pop() 3132 this = exp.Table(this=this, alias=alias) 3133 3134 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3135 3136 return this 3137 3138 def _parse_select( 3139 self, 3140 nested: bool = False, 3141 table: bool = False, 3142 parse_subquery_alias: bool = True, 3143 parse_set_operation: bool = True, 3144 ) -> t.Optional[exp.Expression]: 3145 cte = self._parse_with() 3146 3147 if cte: 3148 this = self._parse_statement() 3149 3150 if not this: 3151 self.raise_error("Failed to parse any statement following CTE") 3152 return cte 3153 3154 if "with" in this.arg_types: 3155 this.set("with", cte) 3156 else: 3157 self.raise_error(f"{this.key} does not support CTE") 3158 this = cte 3159 3160 return this 3161 3162 # duckdb supports leading with FROM x 3163 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3164 3165 if self._match(TokenType.SELECT): 3166 comments = self._prev_comments 3167 3168 hint = self._parse_hint() 3169 3170 if self._next and not self._next.token_type == TokenType.DOT: 3171 all_ = self._match(TokenType.ALL) 3172 distinct = self._match_set(self.DISTINCT_TOKENS) 3173 else: 3174 all_, distinct = None, None 3175 3176 kind = ( 3177 self._match(TokenType.ALIAS) 3178 and self._match_texts(("STRUCT", "VALUE")) 3179 and self._prev.text.upper() 3180 ) 3181 3182 if distinct: 3183 distinct = self.expression( 3184 exp.Distinct, 3185 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3186 ) 3187 3188 if all_ and distinct: 3189 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3190 3191 operation_modifiers = [] 3192 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3193 operation_modifiers.append(exp.var(self._prev.text.upper())) 3194 3195 limit = self._parse_limit(top=True) 3196 projections = self._parse_projections() 3197 3198 this = self.expression( 3199 exp.Select, 3200 kind=kind, 3201 hint=hint, 3202 distinct=distinct, 3203 expressions=projections, 3204 limit=limit, 3205 operation_modifiers=operation_modifiers or None, 3206 ) 3207 this.comments = comments 3208 3209 into = self._parse_into() 3210 if into: 3211 this.set("into", into) 3212 3213 if not from_: 3214 from_ = self._parse_from() 3215 3216 if from_: 3217 this.set("from", from_) 3218 3219 this = self._parse_query_modifiers(this) 3220 elif (table or nested) and self._match(TokenType.L_PAREN): 3221 this = self._parse_wrapped_select(table=table) 3222 3223 # We return early here so that the UNION isn't attached to the subquery by the 3224 # following call to _parse_set_operations, but instead becomes the parent node 3225 self._match_r_paren() 3226 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3227 elif self._match(TokenType.VALUES, advance=False): 3228 this = self._parse_derived_table_values() 3229 elif from_: 3230 this = exp.select("*").from_(from_.this, copy=False) 3231 elif self._match(TokenType.SUMMARIZE): 3232 table = self._match(TokenType.TABLE) 3233 this = self._parse_select() or self._parse_string() or self._parse_table() 3234 return self.expression(exp.Summarize, this=this, table=table) 3235 elif self._match(TokenType.DESCRIBE): 3236 this = self._parse_describe() 3237 elif self._match_text_seq("STREAM"): 3238 this = self._parse_function() 3239 if this: 3240 this = self.expression(exp.Stream, this=this) 3241 else: 3242 self._retreat(self._index - 1) 3243 else: 3244 this = None 3245 3246 return self._parse_set_operations(this) if parse_set_operation else this 3247 3248 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3249 self._match_text_seq("SEARCH") 3250 3251 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3252 3253 if not kind: 3254 return None 3255 3256 self._match_text_seq("FIRST", "BY") 3257 3258 return self.expression( 3259 exp.RecursiveWithSearch, 3260 kind=kind, 3261 this=self._parse_id_var(), 3262 expression=self._match_text_seq("SET") and self._parse_id_var(), 3263 using=self._match_text_seq("USING") and self._parse_id_var(), 3264 ) 3265 3266 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3267 if not skip_with_token and not self._match(TokenType.WITH): 3268 return None 3269 3270 comments = self._prev_comments 3271 recursive = self._match(TokenType.RECURSIVE) 3272 3273 last_comments = None 3274 expressions = [] 3275 while True: 3276 cte = self._parse_cte() 3277 if isinstance(cte, exp.CTE): 3278 expressions.append(cte) 3279 if last_comments: 3280 cte.add_comments(last_comments) 3281 3282 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3283 break 3284 else: 3285 self._match(TokenType.WITH) 3286 3287 last_comments = self._prev_comments 3288 3289 return self.expression( 3290 exp.With, 3291 comments=comments, 3292 expressions=expressions, 3293 recursive=recursive, 3294 search=self._parse_recursive_with_search(), 3295 ) 3296 3297 def _parse_cte(self) -> t.Optional[exp.CTE]: 3298 index = self._index 3299 3300 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3301 if not alias or not alias.this: 3302 self.raise_error("Expected CTE to have alias") 3303 3304 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3305 self._retreat(index) 3306 return None 3307 3308 comments = self._prev_comments 3309 3310 if self._match_text_seq("NOT", "MATERIALIZED"): 3311 materialized = False 3312 elif self._match_text_seq("MATERIALIZED"): 3313 materialized = True 3314 else: 3315 materialized = None 3316 3317 cte = self.expression( 3318 exp.CTE, 3319 this=self._parse_wrapped(self._parse_statement), 3320 alias=alias, 3321 materialized=materialized, 3322 comments=comments, 3323 ) 3324 3325 if isinstance(cte.this, exp.Values): 3326 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3327 3328 return cte 3329 3330 def _parse_table_alias( 3331 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3332 ) -> t.Optional[exp.TableAlias]: 3333 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3334 # so this section tries to parse the clause version and if it fails, it treats the token 3335 # as an identifier (alias) 3336 if self._can_parse_limit_or_offset(): 3337 return None 3338 3339 any_token = self._match(TokenType.ALIAS) 3340 alias = ( 3341 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3342 or self._parse_string_as_identifier() 3343 ) 3344 3345 index = self._index 3346 if self._match(TokenType.L_PAREN): 3347 columns = self._parse_csv(self._parse_function_parameter) 3348 self._match_r_paren() if columns else self._retreat(index) 3349 else: 3350 columns = None 3351 3352 if not alias and not columns: 3353 return None 3354 3355 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3356 3357 # We bubble up comments from the Identifier to the TableAlias 3358 if isinstance(alias, exp.Identifier): 3359 table_alias.add_comments(alias.pop_comments()) 3360 3361 return table_alias 3362 3363 def _parse_subquery( 3364 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3365 ) -> t.Optional[exp.Subquery]: 3366 if not this: 3367 return None 3368 3369 return self.expression( 3370 exp.Subquery, 3371 this=this, 3372 pivots=self._parse_pivots(), 3373 alias=self._parse_table_alias() if parse_alias else None, 3374 sample=self._parse_table_sample(), 3375 ) 3376 3377 def _implicit_unnests_to_explicit(self, this: E) -> E: 3378 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3379 3380 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3381 for i, join in enumerate(this.args.get("joins") or []): 3382 table = join.this 3383 normalized_table = table.copy() 3384 normalized_table.meta["maybe_column"] = True 3385 normalized_table = _norm(normalized_table, dialect=self.dialect) 3386 3387 if isinstance(table, exp.Table) and not join.args.get("on"): 3388 if normalized_table.parts[0].name in refs: 3389 table_as_column = table.to_column() 3390 unnest = exp.Unnest(expressions=[table_as_column]) 3391 3392 # Table.to_column creates a parent Alias node that we want to convert to 3393 # a TableAlias and attach to the Unnest, so it matches the parser's output 3394 if isinstance(table.args.get("alias"), exp.TableAlias): 3395 table_as_column.replace(table_as_column.this) 3396 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3397 3398 table.replace(unnest) 3399 3400 refs.add(normalized_table.alias_or_name) 3401 3402 return this 3403 3404 def _parse_query_modifiers( 3405 self, this: t.Optional[exp.Expression] 3406 ) -> t.Optional[exp.Expression]: 3407 if isinstance(this, self.MODIFIABLES): 3408 for join in self._parse_joins(): 3409 this.append("joins", join) 3410 for lateral in iter(self._parse_lateral, None): 3411 this.append("laterals", lateral) 3412 3413 while True: 3414 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3415 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3416 key, expression = parser(self) 3417 3418 if expression: 3419 this.set(key, expression) 3420 if key == "limit": 3421 offset = expression.args.pop("offset", None) 3422 3423 if offset: 3424 offset = exp.Offset(expression=offset) 3425 this.set("offset", offset) 3426 3427 limit_by_expressions = expression.expressions 3428 expression.set("expressions", None) 3429 offset.set("expressions", limit_by_expressions) 3430 continue 3431 break 3432 3433 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3434 this = self._implicit_unnests_to_explicit(this) 3435 3436 return this 3437 3438 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3439 start = self._curr 3440 while self._curr: 3441 self._advance() 3442 3443 end = self._tokens[self._index - 1] 3444 return exp.Hint(expressions=[self._find_sql(start, end)]) 3445 3446 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3447 return self._parse_function_call() 3448 3449 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3450 start_index = self._index 3451 should_fallback_to_string = False 3452 3453 hints = [] 3454 try: 3455 for hint in iter( 3456 lambda: self._parse_csv( 3457 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3458 ), 3459 [], 3460 ): 3461 hints.extend(hint) 3462 except ParseError: 3463 should_fallback_to_string = True 3464 3465 if should_fallback_to_string or self._curr: 3466 self._retreat(start_index) 3467 return self._parse_hint_fallback_to_string() 3468 3469 return self.expression(exp.Hint, expressions=hints) 3470 3471 def _parse_hint(self) -> t.Optional[exp.Hint]: 3472 if self._match(TokenType.HINT) and self._prev_comments: 3473 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3474 3475 return None 3476 3477 def _parse_into(self) -> t.Optional[exp.Into]: 3478 if not self._match(TokenType.INTO): 3479 return None 3480 3481 temp = self._match(TokenType.TEMPORARY) 3482 unlogged = self._match_text_seq("UNLOGGED") 3483 self._match(TokenType.TABLE) 3484 3485 return self.expression( 3486 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3487 ) 3488 3489 def _parse_from( 3490 self, joins: bool = False, skip_from_token: bool = False 3491 ) -> t.Optional[exp.From]: 3492 if not skip_from_token and not self._match(TokenType.FROM): 3493 return None 3494 3495 return self.expression( 3496 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3497 ) 3498 3499 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3500 return self.expression( 3501 exp.MatchRecognizeMeasure, 3502 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3503 this=self._parse_expression(), 3504 ) 3505 3506 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3507 if not self._match(TokenType.MATCH_RECOGNIZE): 3508 return None 3509 3510 self._match_l_paren() 3511 3512 partition = self._parse_partition_by() 3513 order = self._parse_order() 3514 3515 measures = ( 3516 self._parse_csv(self._parse_match_recognize_measure) 3517 if self._match_text_seq("MEASURES") 3518 else None 3519 ) 3520 3521 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3522 rows = exp.var("ONE ROW PER MATCH") 3523 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3524 text = "ALL ROWS PER MATCH" 3525 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3526 text += " SHOW EMPTY MATCHES" 3527 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3528 text += " OMIT EMPTY MATCHES" 3529 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3530 text += " WITH UNMATCHED ROWS" 3531 rows = exp.var(text) 3532 else: 3533 rows = None 3534 3535 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3536 text = "AFTER MATCH SKIP" 3537 if self._match_text_seq("PAST", "LAST", "ROW"): 3538 text += " PAST LAST ROW" 3539 elif self._match_text_seq("TO", "NEXT", "ROW"): 3540 text += " TO NEXT ROW" 3541 elif self._match_text_seq("TO", "FIRST"): 3542 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3543 elif self._match_text_seq("TO", "LAST"): 3544 text += f" TO LAST {self._advance_any().text}" # type: ignore 3545 after = exp.var(text) 3546 else: 3547 after = None 3548 3549 if self._match_text_seq("PATTERN"): 3550 self._match_l_paren() 3551 3552 if not self._curr: 3553 self.raise_error("Expecting )", self._curr) 3554 3555 paren = 1 3556 start = self._curr 3557 3558 while self._curr and paren > 0: 3559 if self._curr.token_type == TokenType.L_PAREN: 3560 paren += 1 3561 if self._curr.token_type == TokenType.R_PAREN: 3562 paren -= 1 3563 3564 end = self._prev 3565 self._advance() 3566 3567 if paren > 0: 3568 self.raise_error("Expecting )", self._curr) 3569 3570 pattern = exp.var(self._find_sql(start, end)) 3571 else: 3572 pattern = None 3573 3574 define = ( 3575 self._parse_csv(self._parse_name_as_expression) 3576 if self._match_text_seq("DEFINE") 3577 else None 3578 ) 3579 3580 self._match_r_paren() 3581 3582 return self.expression( 3583 exp.MatchRecognize, 3584 partition_by=partition, 3585 order=order, 3586 measures=measures, 3587 rows=rows, 3588 after=after, 3589 pattern=pattern, 3590 define=define, 3591 alias=self._parse_table_alias(), 3592 ) 3593 3594 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3595 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3596 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3597 cross_apply = False 3598 3599 if cross_apply is not None: 3600 this = self._parse_select(table=True) 3601 view = None 3602 outer = None 3603 elif self._match(TokenType.LATERAL): 3604 this = self._parse_select(table=True) 3605 view = self._match(TokenType.VIEW) 3606 outer = self._match(TokenType.OUTER) 3607 else: 3608 return None 3609 3610 if not this: 3611 this = ( 3612 self._parse_unnest() 3613 or self._parse_function() 3614 or self._parse_id_var(any_token=False) 3615 ) 3616 3617 while self._match(TokenType.DOT): 3618 this = exp.Dot( 3619 this=this, 3620 expression=self._parse_function() or self._parse_id_var(any_token=False), 3621 ) 3622 3623 ordinality: t.Optional[bool] = None 3624 3625 if view: 3626 table = self._parse_id_var(any_token=False) 3627 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3628 table_alias: t.Optional[exp.TableAlias] = self.expression( 3629 exp.TableAlias, this=table, columns=columns 3630 ) 3631 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3632 # We move the alias from the lateral's child node to the lateral itself 3633 table_alias = this.args["alias"].pop() 3634 else: 3635 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3636 table_alias = self._parse_table_alias() 3637 3638 return self.expression( 3639 exp.Lateral, 3640 this=this, 3641 view=view, 3642 outer=outer, 3643 alias=table_alias, 3644 cross_apply=cross_apply, 3645 ordinality=ordinality, 3646 ) 3647 3648 def _parse_join_parts( 3649 self, 3650 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3651 return ( 3652 self._match_set(self.JOIN_METHODS) and self._prev, 3653 self._match_set(self.JOIN_SIDES) and self._prev, 3654 self._match_set(self.JOIN_KINDS) and self._prev, 3655 ) 3656 3657 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3658 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3659 this = self._parse_column() 3660 if isinstance(this, exp.Column): 3661 return this.this 3662 return this 3663 3664 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3665 3666 def _parse_join( 3667 self, skip_join_token: bool = False, parse_bracket: bool = False 3668 ) -> t.Optional[exp.Join]: 3669 if self._match(TokenType.COMMA): 3670 table = self._try_parse(self._parse_table) 3671 if table: 3672 return self.expression(exp.Join, this=table) 3673 return None 3674 3675 index = self._index 3676 method, side, kind = self._parse_join_parts() 3677 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3678 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3679 3680 if not skip_join_token and not join: 3681 self._retreat(index) 3682 kind = None 3683 method = None 3684 side = None 3685 3686 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3687 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3688 3689 if not skip_join_token and not join and not outer_apply and not cross_apply: 3690 return None 3691 3692 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3693 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3694 kwargs["expressions"] = self._parse_csv( 3695 lambda: self._parse_table(parse_bracket=parse_bracket) 3696 ) 3697 3698 if method: 3699 kwargs["method"] = method.text 3700 if side: 3701 kwargs["side"] = side.text 3702 if kind: 3703 kwargs["kind"] = kind.text 3704 if hint: 3705 kwargs["hint"] = hint 3706 3707 if self._match(TokenType.MATCH_CONDITION): 3708 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3709 3710 if self._match(TokenType.ON): 3711 kwargs["on"] = self._parse_assignment() 3712 elif self._match(TokenType.USING): 3713 kwargs["using"] = self._parse_using_identifiers() 3714 elif ( 3715 not (outer_apply or cross_apply) 3716 and not isinstance(kwargs["this"], exp.Unnest) 3717 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3718 ): 3719 index = self._index 3720 joins: t.Optional[list] = list(self._parse_joins()) 3721 3722 if joins and self._match(TokenType.ON): 3723 kwargs["on"] = self._parse_assignment() 3724 elif joins and self._match(TokenType.USING): 3725 kwargs["using"] = self._parse_using_identifiers() 3726 else: 3727 joins = None 3728 self._retreat(index) 3729 3730 kwargs["this"].set("joins", joins if joins else None) 3731 3732 comments = [c for token in (method, side, kind) if token for c in token.comments] 3733 return self.expression(exp.Join, comments=comments, **kwargs) 3734 3735 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3736 this = self._parse_assignment() 3737 3738 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3739 return this 3740 3741 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3742 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3743 3744 return this 3745 3746 def _parse_index_params(self) -> exp.IndexParameters: 3747 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3748 3749 if self._match(TokenType.L_PAREN, advance=False): 3750 columns = self._parse_wrapped_csv(self._parse_with_operator) 3751 else: 3752 columns = None 3753 3754 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3755 partition_by = self._parse_partition_by() 3756 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3757 tablespace = ( 3758 self._parse_var(any_token=True) 3759 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3760 else None 3761 ) 3762 where = self._parse_where() 3763 3764 on = self._parse_field() if self._match(TokenType.ON) else None 3765 3766 return self.expression( 3767 exp.IndexParameters, 3768 using=using, 3769 columns=columns, 3770 include=include, 3771 partition_by=partition_by, 3772 where=where, 3773 with_storage=with_storage, 3774 tablespace=tablespace, 3775 on=on, 3776 ) 3777 3778 def _parse_index( 3779 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3780 ) -> t.Optional[exp.Index]: 3781 if index or anonymous: 3782 unique = None 3783 primary = None 3784 amp = None 3785 3786 self._match(TokenType.ON) 3787 self._match(TokenType.TABLE) # hive 3788 table = self._parse_table_parts(schema=True) 3789 else: 3790 unique = self._match(TokenType.UNIQUE) 3791 primary = self._match_text_seq("PRIMARY") 3792 amp = self._match_text_seq("AMP") 3793 3794 if not self._match(TokenType.INDEX): 3795 return None 3796 3797 index = self._parse_id_var() 3798 table = None 3799 3800 params = self._parse_index_params() 3801 3802 return self.expression( 3803 exp.Index, 3804 this=index, 3805 table=table, 3806 unique=unique, 3807 primary=primary, 3808 amp=amp, 3809 params=params, 3810 ) 3811 3812 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3813 hints: t.List[exp.Expression] = [] 3814 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3815 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3816 hints.append( 3817 self.expression( 3818 exp.WithTableHint, 3819 expressions=self._parse_csv( 3820 lambda: self._parse_function() or self._parse_var(any_token=True) 3821 ), 3822 ) 3823 ) 3824 self._match_r_paren() 3825 else: 3826 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3827 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3828 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3829 3830 self._match_set((TokenType.INDEX, TokenType.KEY)) 3831 if self._match(TokenType.FOR): 3832 hint.set("target", self._advance_any() and self._prev.text.upper()) 3833 3834 hint.set("expressions", self._parse_wrapped_id_vars()) 3835 hints.append(hint) 3836 3837 return hints or None 3838 3839 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3840 return ( 3841 (not schema and self._parse_function(optional_parens=False)) 3842 or self._parse_id_var(any_token=False) 3843 or self._parse_string_as_identifier() 3844 or self._parse_placeholder() 3845 ) 3846 3847 def _parse_table_parts( 3848 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3849 ) -> exp.Table: 3850 catalog = None 3851 db = None 3852 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3853 3854 while self._match(TokenType.DOT): 3855 if catalog: 3856 # This allows nesting the table in arbitrarily many dot expressions if needed 3857 table = self.expression( 3858 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3859 ) 3860 else: 3861 catalog = db 3862 db = table 3863 # "" used for tsql FROM a..b case 3864 table = self._parse_table_part(schema=schema) or "" 3865 3866 if ( 3867 wildcard 3868 and self._is_connected() 3869 and (isinstance(table, exp.Identifier) or not table) 3870 and self._match(TokenType.STAR) 3871 ): 3872 if isinstance(table, exp.Identifier): 3873 table.args["this"] += "*" 3874 else: 3875 table = exp.Identifier(this="*") 3876 3877 # We bubble up comments from the Identifier to the Table 3878 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3879 3880 if is_db_reference: 3881 catalog = db 3882 db = table 3883 table = None 3884 3885 if not table and not is_db_reference: 3886 self.raise_error(f"Expected table name but got {self._curr}") 3887 if not db and is_db_reference: 3888 self.raise_error(f"Expected database name but got {self._curr}") 3889 3890 table = self.expression( 3891 exp.Table, 3892 comments=comments, 3893 this=table, 3894 db=db, 3895 catalog=catalog, 3896 ) 3897 3898 changes = self._parse_changes() 3899 if changes: 3900 table.set("changes", changes) 3901 3902 at_before = self._parse_historical_data() 3903 if at_before: 3904 table.set("when", at_before) 3905 3906 pivots = self._parse_pivots() 3907 if pivots: 3908 table.set("pivots", pivots) 3909 3910 return table 3911 3912 def _parse_table( 3913 self, 3914 schema: bool = False, 3915 joins: bool = False, 3916 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3917 parse_bracket: bool = False, 3918 is_db_reference: bool = False, 3919 parse_partition: bool = False, 3920 ) -> t.Optional[exp.Expression]: 3921 lateral = self._parse_lateral() 3922 if lateral: 3923 return lateral 3924 3925 unnest = self._parse_unnest() 3926 if unnest: 3927 return unnest 3928 3929 values = self._parse_derived_table_values() 3930 if values: 3931 return values 3932 3933 subquery = self._parse_select(table=True) 3934 if subquery: 3935 if not subquery.args.get("pivots"): 3936 subquery.set("pivots", self._parse_pivots()) 3937 return subquery 3938 3939 bracket = parse_bracket and self._parse_bracket(None) 3940 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3941 3942 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3943 self._parse_table 3944 ) 3945 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3946 3947 only = self._match(TokenType.ONLY) 3948 3949 this = t.cast( 3950 exp.Expression, 3951 bracket 3952 or rows_from 3953 or self._parse_bracket( 3954 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3955 ), 3956 ) 3957 3958 if only: 3959 this.set("only", only) 3960 3961 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3962 self._match_text_seq("*") 3963 3964 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3965 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3966 this.set("partition", self._parse_partition()) 3967 3968 if schema: 3969 return self._parse_schema(this=this) 3970 3971 version = self._parse_version() 3972 3973 if version: 3974 this.set("version", version) 3975 3976 if self.dialect.ALIAS_POST_TABLESAMPLE: 3977 this.set("sample", self._parse_table_sample()) 3978 3979 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3980 if alias: 3981 this.set("alias", alias) 3982 3983 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3984 return self.expression( 3985 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3986 ) 3987 3988 this.set("hints", self._parse_table_hints()) 3989 3990 if not this.args.get("pivots"): 3991 this.set("pivots", self._parse_pivots()) 3992 3993 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3994 this.set("sample", self._parse_table_sample()) 3995 3996 if joins: 3997 for join in self._parse_joins(): 3998 this.append("joins", join) 3999 4000 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4001 this.set("ordinality", True) 4002 this.set("alias", self._parse_table_alias()) 4003 4004 return this 4005 4006 def _parse_version(self) -> t.Optional[exp.Version]: 4007 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4008 this = "TIMESTAMP" 4009 elif self._match(TokenType.VERSION_SNAPSHOT): 4010 this = "VERSION" 4011 else: 4012 return None 4013 4014 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4015 kind = self._prev.text.upper() 4016 start = self._parse_bitwise() 4017 self._match_texts(("TO", "AND")) 4018 end = self._parse_bitwise() 4019 expression: t.Optional[exp.Expression] = self.expression( 4020 exp.Tuple, expressions=[start, end] 4021 ) 4022 elif self._match_text_seq("CONTAINED", "IN"): 4023 kind = "CONTAINED IN" 4024 expression = self.expression( 4025 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4026 ) 4027 elif self._match(TokenType.ALL): 4028 kind = "ALL" 4029 expression = None 4030 else: 4031 self._match_text_seq("AS", "OF") 4032 kind = "AS OF" 4033 expression = self._parse_type() 4034 4035 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4036 4037 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4038 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4039 index = self._index 4040 historical_data = None 4041 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4042 this = self._prev.text.upper() 4043 kind = ( 4044 self._match(TokenType.L_PAREN) 4045 and self._match_texts(self.HISTORICAL_DATA_KIND) 4046 and self._prev.text.upper() 4047 ) 4048 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4049 4050 if expression: 4051 self._match_r_paren() 4052 historical_data = self.expression( 4053 exp.HistoricalData, this=this, kind=kind, expression=expression 4054 ) 4055 else: 4056 self._retreat(index) 4057 4058 return historical_data 4059 4060 def _parse_changes(self) -> t.Optional[exp.Changes]: 4061 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4062 return None 4063 4064 information = self._parse_var(any_token=True) 4065 self._match_r_paren() 4066 4067 return self.expression( 4068 exp.Changes, 4069 information=information, 4070 at_before=self._parse_historical_data(), 4071 end=self._parse_historical_data(), 4072 ) 4073 4074 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4075 if not self._match(TokenType.UNNEST): 4076 return None 4077 4078 expressions = self._parse_wrapped_csv(self._parse_equality) 4079 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4080 4081 alias = self._parse_table_alias() if with_alias else None 4082 4083 if alias: 4084 if self.dialect.UNNEST_COLUMN_ONLY: 4085 if alias.args.get("columns"): 4086 self.raise_error("Unexpected extra column alias in unnest.") 4087 4088 alias.set("columns", [alias.this]) 4089 alias.set("this", None) 4090 4091 columns = alias.args.get("columns") or [] 4092 if offset and len(expressions) < len(columns): 4093 offset = columns.pop() 4094 4095 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4096 self._match(TokenType.ALIAS) 4097 offset = self._parse_id_var( 4098 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4099 ) or exp.to_identifier("offset") 4100 4101 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4102 4103 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4104 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4105 if not is_derived and not ( 4106 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4107 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4108 ): 4109 return None 4110 4111 expressions = self._parse_csv(self._parse_value) 4112 alias = self._parse_table_alias() 4113 4114 if is_derived: 4115 self._match_r_paren() 4116 4117 return self.expression( 4118 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4119 ) 4120 4121 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4122 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4123 as_modifier and self._match_text_seq("USING", "SAMPLE") 4124 ): 4125 return None 4126 4127 bucket_numerator = None 4128 bucket_denominator = None 4129 bucket_field = None 4130 percent = None 4131 size = None 4132 seed = None 4133 4134 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4135 matched_l_paren = self._match(TokenType.L_PAREN) 4136 4137 if self.TABLESAMPLE_CSV: 4138 num = None 4139 expressions = self._parse_csv(self._parse_primary) 4140 else: 4141 expressions = None 4142 num = ( 4143 self._parse_factor() 4144 if self._match(TokenType.NUMBER, advance=False) 4145 else self._parse_primary() or self._parse_placeholder() 4146 ) 4147 4148 if self._match_text_seq("BUCKET"): 4149 bucket_numerator = self._parse_number() 4150 self._match_text_seq("OUT", "OF") 4151 bucket_denominator = bucket_denominator = self._parse_number() 4152 self._match(TokenType.ON) 4153 bucket_field = self._parse_field() 4154 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4155 percent = num 4156 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4157 size = num 4158 else: 4159 percent = num 4160 4161 if matched_l_paren: 4162 self._match_r_paren() 4163 4164 if self._match(TokenType.L_PAREN): 4165 method = self._parse_var(upper=True) 4166 seed = self._match(TokenType.COMMA) and self._parse_number() 4167 self._match_r_paren() 4168 elif self._match_texts(("SEED", "REPEATABLE")): 4169 seed = self._parse_wrapped(self._parse_number) 4170 4171 if not method and self.DEFAULT_SAMPLING_METHOD: 4172 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4173 4174 return self.expression( 4175 exp.TableSample, 4176 expressions=expressions, 4177 method=method, 4178 bucket_numerator=bucket_numerator, 4179 bucket_denominator=bucket_denominator, 4180 bucket_field=bucket_field, 4181 percent=percent, 4182 size=size, 4183 seed=seed, 4184 ) 4185 4186 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4187 return list(iter(self._parse_pivot, None)) or None 4188 4189 def _parse_joins(self) -> t.Iterator[exp.Join]: 4190 return iter(self._parse_join, None) 4191 4192 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4193 if not self._match(TokenType.INTO): 4194 return None 4195 4196 return self.expression( 4197 exp.UnpivotColumns, 4198 this=self._match_text_seq("NAME") and self._parse_column(), 4199 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4200 ) 4201 4202 # https://duckdb.org/docs/sql/statements/pivot 4203 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4204 def _parse_on() -> t.Optional[exp.Expression]: 4205 this = self._parse_bitwise() 4206 4207 if self._match(TokenType.IN): 4208 # PIVOT ... ON col IN (row_val1, row_val2) 4209 return self._parse_in(this) 4210 if self._match(TokenType.ALIAS, advance=False): 4211 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4212 return self._parse_alias(this) 4213 4214 return this 4215 4216 this = self._parse_table() 4217 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4218 into = self._parse_unpivot_columns() 4219 using = self._match(TokenType.USING) and self._parse_csv( 4220 lambda: self._parse_alias(self._parse_function()) 4221 ) 4222 group = self._parse_group() 4223 4224 return self.expression( 4225 exp.Pivot, 4226 this=this, 4227 expressions=expressions, 4228 using=using, 4229 group=group, 4230 unpivot=is_unpivot, 4231 into=into, 4232 ) 4233 4234 def _parse_pivot_in(self) -> exp.In: 4235 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4236 this = self._parse_select_or_expression() 4237 4238 self._match(TokenType.ALIAS) 4239 alias = self._parse_bitwise() 4240 if alias: 4241 if isinstance(alias, exp.Column) and not alias.db: 4242 alias = alias.this 4243 return self.expression(exp.PivotAlias, this=this, alias=alias) 4244 4245 return this 4246 4247 value = self._parse_column() 4248 4249 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4250 self.raise_error("Expecting IN (") 4251 4252 if self._match(TokenType.ANY): 4253 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4254 else: 4255 exprs = self._parse_csv(_parse_aliased_expression) 4256 4257 self._match_r_paren() 4258 return self.expression(exp.In, this=value, expressions=exprs) 4259 4260 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4261 index = self._index 4262 include_nulls = None 4263 4264 if self._match(TokenType.PIVOT): 4265 unpivot = False 4266 elif self._match(TokenType.UNPIVOT): 4267 unpivot = True 4268 4269 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4270 if self._match_text_seq("INCLUDE", "NULLS"): 4271 include_nulls = True 4272 elif self._match_text_seq("EXCLUDE", "NULLS"): 4273 include_nulls = False 4274 else: 4275 return None 4276 4277 expressions = [] 4278 4279 if not self._match(TokenType.L_PAREN): 4280 self._retreat(index) 4281 return None 4282 4283 if unpivot: 4284 expressions = self._parse_csv(self._parse_column) 4285 else: 4286 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4287 4288 if not expressions: 4289 self.raise_error("Failed to parse PIVOT's aggregation list") 4290 4291 if not self._match(TokenType.FOR): 4292 self.raise_error("Expecting FOR") 4293 4294 fields = [] 4295 while True: 4296 field = self._try_parse(self._parse_pivot_in) 4297 if not field: 4298 break 4299 fields.append(field) 4300 4301 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4302 self._parse_bitwise 4303 ) 4304 4305 group = self._parse_group() 4306 4307 self._match_r_paren() 4308 4309 pivot = self.expression( 4310 exp.Pivot, 4311 expressions=expressions, 4312 fields=fields, 4313 unpivot=unpivot, 4314 include_nulls=include_nulls, 4315 default_on_null=default_on_null, 4316 group=group, 4317 ) 4318 4319 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4320 pivot.set("alias", self._parse_table_alias()) 4321 4322 if not unpivot: 4323 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4324 4325 columns: t.List[exp.Expression] = [] 4326 all_fields = [] 4327 for pivot_field in pivot.fields: 4328 pivot_field_expressions = pivot_field.expressions 4329 4330 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4331 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4332 continue 4333 4334 all_fields.append( 4335 [ 4336 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4337 for fld in pivot_field_expressions 4338 ] 4339 ) 4340 4341 if all_fields: 4342 if names: 4343 all_fields.append(names) 4344 4345 # Generate all possible combinations of the pivot columns 4346 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4347 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4348 for fld_parts_tuple in itertools.product(*all_fields): 4349 fld_parts = list(fld_parts_tuple) 4350 4351 if names and self.PREFIXED_PIVOT_COLUMNS: 4352 # Move the "name" to the front of the list 4353 fld_parts.insert(0, fld_parts.pop(-1)) 4354 4355 columns.append(exp.to_identifier("_".join(fld_parts))) 4356 4357 pivot.set("columns", columns) 4358 4359 return pivot 4360 4361 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4362 return [agg.alias for agg in aggregations if agg.alias] 4363 4364 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4365 if not skip_where_token and not self._match(TokenType.PREWHERE): 4366 return None 4367 4368 return self.expression( 4369 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4370 ) 4371 4372 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4373 if not skip_where_token and not self._match(TokenType.WHERE): 4374 return None 4375 4376 return self.expression( 4377 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4378 ) 4379 4380 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4381 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4382 return None 4383 4384 elements: t.Dict[str, t.Any] = defaultdict(list) 4385 4386 if self._match(TokenType.ALL): 4387 elements["all"] = True 4388 elif self._match(TokenType.DISTINCT): 4389 elements["all"] = False 4390 4391 while True: 4392 index = self._index 4393 4394 elements["expressions"].extend( 4395 self._parse_csv( 4396 lambda: None 4397 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4398 else self._parse_assignment() 4399 ) 4400 ) 4401 4402 before_with_index = self._index 4403 with_prefix = self._match(TokenType.WITH) 4404 4405 if self._match(TokenType.ROLLUP): 4406 elements["rollup"].append( 4407 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4408 ) 4409 elif self._match(TokenType.CUBE): 4410 elements["cube"].append( 4411 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4412 ) 4413 elif self._match(TokenType.GROUPING_SETS): 4414 elements["grouping_sets"].append( 4415 self.expression( 4416 exp.GroupingSets, 4417 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4418 ) 4419 ) 4420 elif self._match_text_seq("TOTALS"): 4421 elements["totals"] = True # type: ignore 4422 4423 if before_with_index <= self._index <= before_with_index + 1: 4424 self._retreat(before_with_index) 4425 break 4426 4427 if index == self._index: 4428 break 4429 4430 return self.expression(exp.Group, **elements) # type: ignore 4431 4432 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4433 return self.expression( 4434 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4435 ) 4436 4437 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4438 if self._match(TokenType.L_PAREN): 4439 grouping_set = self._parse_csv(self._parse_column) 4440 self._match_r_paren() 4441 return self.expression(exp.Tuple, expressions=grouping_set) 4442 4443 return self._parse_column() 4444 4445 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4446 if not skip_having_token and not self._match(TokenType.HAVING): 4447 return None 4448 return self.expression(exp.Having, this=self._parse_assignment()) 4449 4450 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4451 if not self._match(TokenType.QUALIFY): 4452 return None 4453 return self.expression(exp.Qualify, this=self._parse_assignment()) 4454 4455 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4456 if skip_start_token: 4457 start = None 4458 elif self._match(TokenType.START_WITH): 4459 start = self._parse_assignment() 4460 else: 4461 return None 4462 4463 self._match(TokenType.CONNECT_BY) 4464 nocycle = self._match_text_seq("NOCYCLE") 4465 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4466 exp.Prior, this=self._parse_bitwise() 4467 ) 4468 connect = self._parse_assignment() 4469 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4470 4471 if not start and self._match(TokenType.START_WITH): 4472 start = self._parse_assignment() 4473 4474 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4475 4476 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4477 this = self._parse_id_var(any_token=True) 4478 if self._match(TokenType.ALIAS): 4479 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4480 return this 4481 4482 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4483 if self._match_text_seq("INTERPOLATE"): 4484 return self._parse_wrapped_csv(self._parse_name_as_expression) 4485 return None 4486 4487 def _parse_order( 4488 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4489 ) -> t.Optional[exp.Expression]: 4490 siblings = None 4491 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4492 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4493 return this 4494 4495 siblings = True 4496 4497 return self.expression( 4498 exp.Order, 4499 this=this, 4500 expressions=self._parse_csv(self._parse_ordered), 4501 siblings=siblings, 4502 ) 4503 4504 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4505 if not self._match(token): 4506 return None 4507 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4508 4509 def _parse_ordered( 4510 self, parse_method: t.Optional[t.Callable] = None 4511 ) -> t.Optional[exp.Ordered]: 4512 this = parse_method() if parse_method else self._parse_assignment() 4513 if not this: 4514 return None 4515 4516 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4517 this = exp.var("ALL") 4518 4519 asc = self._match(TokenType.ASC) 4520 desc = self._match(TokenType.DESC) or (asc and False) 4521 4522 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4523 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4524 4525 nulls_first = is_nulls_first or False 4526 explicitly_null_ordered = is_nulls_first or is_nulls_last 4527 4528 if ( 4529 not explicitly_null_ordered 4530 and ( 4531 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4532 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4533 ) 4534 and self.dialect.NULL_ORDERING != "nulls_are_last" 4535 ): 4536 nulls_first = True 4537 4538 if self._match_text_seq("WITH", "FILL"): 4539 with_fill = self.expression( 4540 exp.WithFill, 4541 **{ # type: ignore 4542 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4543 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4544 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4545 "interpolate": self._parse_interpolate(), 4546 }, 4547 ) 4548 else: 4549 with_fill = None 4550 4551 return self.expression( 4552 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4553 ) 4554 4555 def _parse_limit_options(self) -> exp.LimitOptions: 4556 percent = self._match(TokenType.PERCENT) 4557 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4558 self._match_text_seq("ONLY") 4559 with_ties = self._match_text_seq("WITH", "TIES") 4560 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4561 4562 def _parse_limit( 4563 self, 4564 this: t.Optional[exp.Expression] = None, 4565 top: bool = False, 4566 skip_limit_token: bool = False, 4567 ) -> t.Optional[exp.Expression]: 4568 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4569 comments = self._prev_comments 4570 if top: 4571 limit_paren = self._match(TokenType.L_PAREN) 4572 expression = self._parse_term() if limit_paren else self._parse_number() 4573 4574 if limit_paren: 4575 self._match_r_paren() 4576 4577 limit_options = self._parse_limit_options() 4578 else: 4579 limit_options = None 4580 expression = self._parse_term() 4581 4582 if self._match(TokenType.COMMA): 4583 offset = expression 4584 expression = self._parse_term() 4585 else: 4586 offset = None 4587 4588 limit_exp = self.expression( 4589 exp.Limit, 4590 this=this, 4591 expression=expression, 4592 offset=offset, 4593 comments=comments, 4594 limit_options=limit_options, 4595 expressions=self._parse_limit_by(), 4596 ) 4597 4598 return limit_exp 4599 4600 if self._match(TokenType.FETCH): 4601 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4602 direction = self._prev.text.upper() if direction else "FIRST" 4603 4604 count = self._parse_field(tokens=self.FETCH_TOKENS) 4605 4606 return self.expression( 4607 exp.Fetch, 4608 direction=direction, 4609 count=count, 4610 limit_options=self._parse_limit_options(), 4611 ) 4612 4613 return this 4614 4615 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4616 if not self._match(TokenType.OFFSET): 4617 return this 4618 4619 count = self._parse_term() 4620 self._match_set((TokenType.ROW, TokenType.ROWS)) 4621 4622 return self.expression( 4623 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4624 ) 4625 4626 def _can_parse_limit_or_offset(self) -> bool: 4627 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4628 return False 4629 4630 index = self._index 4631 result = bool( 4632 self._try_parse(self._parse_limit, retreat=True) 4633 or self._try_parse(self._parse_offset, retreat=True) 4634 ) 4635 self._retreat(index) 4636 return result 4637 4638 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4639 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4640 4641 def _parse_locks(self) -> t.List[exp.Lock]: 4642 locks = [] 4643 while True: 4644 if self._match_text_seq("FOR", "UPDATE"): 4645 update = True 4646 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4647 "LOCK", "IN", "SHARE", "MODE" 4648 ): 4649 update = False 4650 else: 4651 break 4652 4653 expressions = None 4654 if self._match_text_seq("OF"): 4655 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4656 4657 wait: t.Optional[bool | exp.Expression] = None 4658 if self._match_text_seq("NOWAIT"): 4659 wait = True 4660 elif self._match_text_seq("WAIT"): 4661 wait = self._parse_primary() 4662 elif self._match_text_seq("SKIP", "LOCKED"): 4663 wait = False 4664 4665 locks.append( 4666 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4667 ) 4668 4669 return locks 4670 4671 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4672 start = self._index 4673 _, side_token, kind_token = self._parse_join_parts() 4674 4675 side = side_token.text if side_token else None 4676 kind = kind_token.text if kind_token else None 4677 4678 if not self._match_set(self.SET_OPERATIONS): 4679 self._retreat(start) 4680 return None 4681 4682 token_type = self._prev.token_type 4683 4684 if token_type == TokenType.UNION: 4685 operation: t.Type[exp.SetOperation] = exp.Union 4686 elif token_type == TokenType.EXCEPT: 4687 operation = exp.Except 4688 else: 4689 operation = exp.Intersect 4690 4691 comments = self._prev.comments 4692 4693 if self._match(TokenType.DISTINCT): 4694 distinct: t.Optional[bool] = True 4695 elif self._match(TokenType.ALL): 4696 distinct = False 4697 else: 4698 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4699 if distinct is None: 4700 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4701 4702 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4703 "STRICT", "CORRESPONDING" 4704 ) 4705 if self._match_text_seq("CORRESPONDING"): 4706 by_name = True 4707 if not side and not kind: 4708 kind = "INNER" 4709 4710 on_column_list = None 4711 if by_name and self._match_texts(("ON", "BY")): 4712 on_column_list = self._parse_wrapped_csv(self._parse_column) 4713 4714 expression = self._parse_select(nested=True, parse_set_operation=False) 4715 4716 return self.expression( 4717 operation, 4718 comments=comments, 4719 this=this, 4720 distinct=distinct, 4721 by_name=by_name, 4722 expression=expression, 4723 side=side, 4724 kind=kind, 4725 on=on_column_list, 4726 ) 4727 4728 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4729 while this: 4730 setop = self.parse_set_operation(this) 4731 if not setop: 4732 break 4733 this = setop 4734 4735 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4736 expression = this.expression 4737 4738 if expression: 4739 for arg in self.SET_OP_MODIFIERS: 4740 expr = expression.args.get(arg) 4741 if expr: 4742 this.set(arg, expr.pop()) 4743 4744 return this 4745 4746 def _parse_expression(self) -> t.Optional[exp.Expression]: 4747 return self._parse_alias(self._parse_assignment()) 4748 4749 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4750 this = self._parse_disjunction() 4751 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4752 # This allows us to parse <non-identifier token> := <expr> 4753 this = exp.column( 4754 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4755 ) 4756 4757 while self._match_set(self.ASSIGNMENT): 4758 if isinstance(this, exp.Column) and len(this.parts) == 1: 4759 this = this.this 4760 4761 this = self.expression( 4762 self.ASSIGNMENT[self._prev.token_type], 4763 this=this, 4764 comments=self._prev_comments, 4765 expression=self._parse_assignment(), 4766 ) 4767 4768 return this 4769 4770 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4771 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4772 4773 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4774 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4775 4776 def _parse_equality(self) -> t.Optional[exp.Expression]: 4777 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4778 4779 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4780 return self._parse_tokens(self._parse_range, self.COMPARISON) 4781 4782 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4783 this = this or self._parse_bitwise() 4784 negate = self._match(TokenType.NOT) 4785 4786 if self._match_set(self.RANGE_PARSERS): 4787 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4788 if not expression: 4789 return this 4790 4791 this = expression 4792 elif self._match(TokenType.ISNULL): 4793 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4794 4795 # Postgres supports ISNULL and NOTNULL for conditions. 4796 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4797 if self._match(TokenType.NOTNULL): 4798 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4799 this = self.expression(exp.Not, this=this) 4800 4801 if negate: 4802 this = self._negate_range(this) 4803 4804 if self._match(TokenType.IS): 4805 this = self._parse_is(this) 4806 4807 return this 4808 4809 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4810 if not this: 4811 return this 4812 4813 return self.expression(exp.Not, this=this) 4814 4815 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4816 index = self._index - 1 4817 negate = self._match(TokenType.NOT) 4818 4819 if self._match_text_seq("DISTINCT", "FROM"): 4820 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4821 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4822 4823 if self._match(TokenType.JSON): 4824 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4825 4826 if self._match_text_seq("WITH"): 4827 _with = True 4828 elif self._match_text_seq("WITHOUT"): 4829 _with = False 4830 else: 4831 _with = None 4832 4833 unique = self._match(TokenType.UNIQUE) 4834 self._match_text_seq("KEYS") 4835 expression: t.Optional[exp.Expression] = self.expression( 4836 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4837 ) 4838 else: 4839 expression = self._parse_primary() or self._parse_null() 4840 if not expression: 4841 self._retreat(index) 4842 return None 4843 4844 this = self.expression(exp.Is, this=this, expression=expression) 4845 return self.expression(exp.Not, this=this) if negate else this 4846 4847 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4848 unnest = self._parse_unnest(with_alias=False) 4849 if unnest: 4850 this = self.expression(exp.In, this=this, unnest=unnest) 4851 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4852 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4853 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4854 4855 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4856 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4857 else: 4858 this = self.expression(exp.In, this=this, expressions=expressions) 4859 4860 if matched_l_paren: 4861 self._match_r_paren(this) 4862 elif not self._match(TokenType.R_BRACKET, expression=this): 4863 self.raise_error("Expecting ]") 4864 else: 4865 this = self.expression(exp.In, this=this, field=self._parse_column()) 4866 4867 return this 4868 4869 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4870 low = self._parse_bitwise() 4871 self._match(TokenType.AND) 4872 high = self._parse_bitwise() 4873 return self.expression(exp.Between, this=this, low=low, high=high) 4874 4875 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4876 if not self._match(TokenType.ESCAPE): 4877 return this 4878 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4879 4880 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4881 index = self._index 4882 4883 if not self._match(TokenType.INTERVAL) and match_interval: 4884 return None 4885 4886 if self._match(TokenType.STRING, advance=False): 4887 this = self._parse_primary() 4888 else: 4889 this = self._parse_term() 4890 4891 if not this or ( 4892 isinstance(this, exp.Column) 4893 and not this.table 4894 and not this.this.quoted 4895 and this.name.upper() == "IS" 4896 ): 4897 self._retreat(index) 4898 return None 4899 4900 unit = self._parse_function() or ( 4901 not self._match(TokenType.ALIAS, advance=False) 4902 and self._parse_var(any_token=True, upper=True) 4903 ) 4904 4905 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4906 # each INTERVAL expression into this canonical form so it's easy to transpile 4907 if this and this.is_number: 4908 this = exp.Literal.string(this.to_py()) 4909 elif this and this.is_string: 4910 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4911 if parts and unit: 4912 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4913 unit = None 4914 self._retreat(self._index - 1) 4915 4916 if len(parts) == 1: 4917 this = exp.Literal.string(parts[0][0]) 4918 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4919 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4920 unit = self.expression( 4921 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4922 ) 4923 4924 interval = self.expression(exp.Interval, this=this, unit=unit) 4925 4926 index = self._index 4927 self._match(TokenType.PLUS) 4928 4929 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4930 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4931 return self.expression( 4932 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4933 ) 4934 4935 self._retreat(index) 4936 return interval 4937 4938 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4939 this = self._parse_term() 4940 4941 while True: 4942 if self._match_set(self.BITWISE): 4943 this = self.expression( 4944 self.BITWISE[self._prev.token_type], 4945 this=this, 4946 expression=self._parse_term(), 4947 ) 4948 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4949 this = self.expression( 4950 exp.DPipe, 4951 this=this, 4952 expression=self._parse_term(), 4953 safe=not self.dialect.STRICT_STRING_CONCAT, 4954 ) 4955 elif self._match(TokenType.DQMARK): 4956 this = self.expression( 4957 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4958 ) 4959 elif self._match_pair(TokenType.LT, TokenType.LT): 4960 this = self.expression( 4961 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4962 ) 4963 elif self._match_pair(TokenType.GT, TokenType.GT): 4964 this = self.expression( 4965 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4966 ) 4967 else: 4968 break 4969 4970 return this 4971 4972 def _parse_term(self) -> t.Optional[exp.Expression]: 4973 this = self._parse_factor() 4974 4975 while self._match_set(self.TERM): 4976 klass = self.TERM[self._prev.token_type] 4977 comments = self._prev_comments 4978 expression = self._parse_factor() 4979 4980 this = self.expression(klass, this=this, comments=comments, expression=expression) 4981 4982 if isinstance(this, exp.Collate): 4983 expr = this.expression 4984 4985 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4986 # fallback to Identifier / Var 4987 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4988 ident = expr.this 4989 if isinstance(ident, exp.Identifier): 4990 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4991 4992 return this 4993 4994 def _parse_factor(self) -> t.Optional[exp.Expression]: 4995 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4996 this = parse_method() 4997 4998 while self._match_set(self.FACTOR): 4999 klass = self.FACTOR[self._prev.token_type] 5000 comments = self._prev_comments 5001 expression = parse_method() 5002 5003 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5004 self._retreat(self._index - 1) 5005 return this 5006 5007 this = self.expression(klass, this=this, comments=comments, expression=expression) 5008 5009 if isinstance(this, exp.Div): 5010 this.args["typed"] = self.dialect.TYPED_DIVISION 5011 this.args["safe"] = self.dialect.SAFE_DIVISION 5012 5013 return this 5014 5015 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5016 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5017 5018 def _parse_unary(self) -> t.Optional[exp.Expression]: 5019 if self._match_set(self.UNARY_PARSERS): 5020 return self.UNARY_PARSERS[self._prev.token_type](self) 5021 return self._parse_at_time_zone(self._parse_type()) 5022 5023 def _parse_type( 5024 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5025 ) -> t.Optional[exp.Expression]: 5026 interval = parse_interval and self._parse_interval() 5027 if interval: 5028 return interval 5029 5030 index = self._index 5031 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5032 5033 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5034 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5035 if isinstance(data_type, exp.Cast): 5036 # This constructor can contain ops directly after it, for instance struct unnesting: 5037 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5038 return self._parse_column_ops(data_type) 5039 5040 if data_type: 5041 index2 = self._index 5042 this = self._parse_primary() 5043 5044 if isinstance(this, exp.Literal): 5045 this = self._parse_column_ops(this) 5046 5047 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5048 if parser: 5049 return parser(self, this, data_type) 5050 5051 return self.expression(exp.Cast, this=this, to=data_type) 5052 5053 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5054 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5055 # 5056 # If the index difference here is greater than 1, that means the parser itself must have 5057 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5058 # 5059 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5060 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5061 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5062 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5063 # 5064 # In these cases, we don't really want to return the converted type, but instead retreat 5065 # and try to parse a Column or Identifier in the section below. 5066 if data_type.expressions and index2 - index > 1: 5067 self._retreat(index2) 5068 return self._parse_column_ops(data_type) 5069 5070 self._retreat(index) 5071 5072 if fallback_to_identifier: 5073 return self._parse_id_var() 5074 5075 this = self._parse_column() 5076 return this and self._parse_column_ops(this) 5077 5078 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5079 this = self._parse_type() 5080 if not this: 5081 return None 5082 5083 if isinstance(this, exp.Column) and not this.table: 5084 this = exp.var(this.name.upper()) 5085 5086 return self.expression( 5087 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5088 ) 5089 5090 def _parse_types( 5091 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5092 ) -> t.Optional[exp.Expression]: 5093 index = self._index 5094 5095 this: t.Optional[exp.Expression] = None 5096 prefix = self._match_text_seq("SYSUDTLIB", ".") 5097 5098 if not self._match_set(self.TYPE_TOKENS): 5099 identifier = allow_identifiers and self._parse_id_var( 5100 any_token=False, tokens=(TokenType.VAR,) 5101 ) 5102 if isinstance(identifier, exp.Identifier): 5103 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5104 5105 if len(tokens) != 1: 5106 self.raise_error("Unexpected identifier", self._prev) 5107 5108 if tokens[0].token_type in self.TYPE_TOKENS: 5109 self._prev = tokens[0] 5110 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5111 type_name = identifier.name 5112 5113 while self._match(TokenType.DOT): 5114 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5115 5116 this = exp.DataType.build(type_name, udt=True) 5117 else: 5118 self._retreat(self._index - 1) 5119 return None 5120 else: 5121 return None 5122 5123 type_token = self._prev.token_type 5124 5125 if type_token == TokenType.PSEUDO_TYPE: 5126 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5127 5128 if type_token == TokenType.OBJECT_IDENTIFIER: 5129 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5130 5131 # https://materialize.com/docs/sql/types/map/ 5132 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5133 key_type = self._parse_types( 5134 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5135 ) 5136 if not self._match(TokenType.FARROW): 5137 self._retreat(index) 5138 return None 5139 5140 value_type = self._parse_types( 5141 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5142 ) 5143 if not self._match(TokenType.R_BRACKET): 5144 self._retreat(index) 5145 return None 5146 5147 return exp.DataType( 5148 this=exp.DataType.Type.MAP, 5149 expressions=[key_type, value_type], 5150 nested=True, 5151 prefix=prefix, 5152 ) 5153 5154 nested = type_token in self.NESTED_TYPE_TOKENS 5155 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5156 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5157 expressions = None 5158 maybe_func = False 5159 5160 if self._match(TokenType.L_PAREN): 5161 if is_struct: 5162 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5163 elif nested: 5164 expressions = self._parse_csv( 5165 lambda: self._parse_types( 5166 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5167 ) 5168 ) 5169 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5170 this = expressions[0] 5171 this.set("nullable", True) 5172 self._match_r_paren() 5173 return this 5174 elif type_token in self.ENUM_TYPE_TOKENS: 5175 expressions = self._parse_csv(self._parse_equality) 5176 elif is_aggregate: 5177 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5178 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5179 ) 5180 if not func_or_ident: 5181 return None 5182 expressions = [func_or_ident] 5183 if self._match(TokenType.COMMA): 5184 expressions.extend( 5185 self._parse_csv( 5186 lambda: self._parse_types( 5187 check_func=check_func, 5188 schema=schema, 5189 allow_identifiers=allow_identifiers, 5190 ) 5191 ) 5192 ) 5193 else: 5194 expressions = self._parse_csv(self._parse_type_size) 5195 5196 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5197 if type_token == TokenType.VECTOR and len(expressions) == 2: 5198 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5199 5200 if not expressions or not self._match(TokenType.R_PAREN): 5201 self._retreat(index) 5202 return None 5203 5204 maybe_func = True 5205 5206 values: t.Optional[t.List[exp.Expression]] = None 5207 5208 if nested and self._match(TokenType.LT): 5209 if is_struct: 5210 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5211 else: 5212 expressions = self._parse_csv( 5213 lambda: self._parse_types( 5214 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5215 ) 5216 ) 5217 5218 if not self._match(TokenType.GT): 5219 self.raise_error("Expecting >") 5220 5221 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5222 values = self._parse_csv(self._parse_assignment) 5223 if not values and is_struct: 5224 values = None 5225 self._retreat(self._index - 1) 5226 else: 5227 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5228 5229 if type_token in self.TIMESTAMPS: 5230 if self._match_text_seq("WITH", "TIME", "ZONE"): 5231 maybe_func = False 5232 tz_type = ( 5233 exp.DataType.Type.TIMETZ 5234 if type_token in self.TIMES 5235 else exp.DataType.Type.TIMESTAMPTZ 5236 ) 5237 this = exp.DataType(this=tz_type, expressions=expressions) 5238 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5239 maybe_func = False 5240 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5241 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5242 maybe_func = False 5243 elif type_token == TokenType.INTERVAL: 5244 unit = self._parse_var(upper=True) 5245 if unit: 5246 if self._match_text_seq("TO"): 5247 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5248 5249 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5250 else: 5251 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5252 elif type_token == TokenType.VOID: 5253 this = exp.DataType(this=exp.DataType.Type.NULL) 5254 5255 if maybe_func and check_func: 5256 index2 = self._index 5257 peek = self._parse_string() 5258 5259 if not peek: 5260 self._retreat(index) 5261 return None 5262 5263 self._retreat(index2) 5264 5265 if not this: 5266 if self._match_text_seq("UNSIGNED"): 5267 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5268 if not unsigned_type_token: 5269 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5270 5271 type_token = unsigned_type_token or type_token 5272 5273 this = exp.DataType( 5274 this=exp.DataType.Type[type_token.value], 5275 expressions=expressions, 5276 nested=nested, 5277 prefix=prefix, 5278 ) 5279 5280 # Empty arrays/structs are allowed 5281 if values is not None: 5282 cls = exp.Struct if is_struct else exp.Array 5283 this = exp.cast(cls(expressions=values), this, copy=False) 5284 5285 elif expressions: 5286 this.set("expressions", expressions) 5287 5288 # https://materialize.com/docs/sql/types/list/#type-name 5289 while self._match(TokenType.LIST): 5290 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5291 5292 index = self._index 5293 5294 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5295 matched_array = self._match(TokenType.ARRAY) 5296 5297 while self._curr: 5298 datatype_token = self._prev.token_type 5299 matched_l_bracket = self._match(TokenType.L_BRACKET) 5300 5301 if (not matched_l_bracket and not matched_array) or ( 5302 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5303 ): 5304 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5305 # not to be confused with the fixed size array parsing 5306 break 5307 5308 matched_array = False 5309 values = self._parse_csv(self._parse_assignment) or None 5310 if ( 5311 values 5312 and not schema 5313 and ( 5314 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5315 ) 5316 ): 5317 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5318 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5319 self._retreat(index) 5320 break 5321 5322 this = exp.DataType( 5323 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5324 ) 5325 self._match(TokenType.R_BRACKET) 5326 5327 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5328 converter = self.TYPE_CONVERTERS.get(this.this) 5329 if converter: 5330 this = converter(t.cast(exp.DataType, this)) 5331 5332 return this 5333 5334 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5335 index = self._index 5336 5337 if ( 5338 self._curr 5339 and self._next 5340 and self._curr.token_type in self.TYPE_TOKENS 5341 and self._next.token_type in self.TYPE_TOKENS 5342 ): 5343 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5344 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5345 this = self._parse_id_var() 5346 else: 5347 this = ( 5348 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5349 or self._parse_id_var() 5350 ) 5351 5352 self._match(TokenType.COLON) 5353 5354 if ( 5355 type_required 5356 and not isinstance(this, exp.DataType) 5357 and not self._match_set(self.TYPE_TOKENS, advance=False) 5358 ): 5359 self._retreat(index) 5360 return self._parse_types() 5361 5362 return self._parse_column_def(this) 5363 5364 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5365 if not self._match_text_seq("AT", "TIME", "ZONE"): 5366 return this 5367 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5368 5369 def _parse_column(self) -> t.Optional[exp.Expression]: 5370 this = self._parse_column_reference() 5371 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5372 5373 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5374 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5375 5376 return column 5377 5378 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5379 this = self._parse_field() 5380 if ( 5381 not this 5382 and self._match(TokenType.VALUES, advance=False) 5383 and self.VALUES_FOLLOWED_BY_PAREN 5384 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5385 ): 5386 this = self._parse_id_var() 5387 5388 if isinstance(this, exp.Identifier): 5389 # We bubble up comments from the Identifier to the Column 5390 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5391 5392 return this 5393 5394 def _parse_colon_as_variant_extract( 5395 self, this: t.Optional[exp.Expression] 5396 ) -> t.Optional[exp.Expression]: 5397 casts = [] 5398 json_path = [] 5399 escape = None 5400 5401 while self._match(TokenType.COLON): 5402 start_index = self._index 5403 5404 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5405 path = self._parse_column_ops( 5406 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5407 ) 5408 5409 # The cast :: operator has a lower precedence than the extraction operator :, so 5410 # we rearrange the AST appropriately to avoid casting the JSON path 5411 while isinstance(path, exp.Cast): 5412 casts.append(path.to) 5413 path = path.this 5414 5415 if casts: 5416 dcolon_offset = next( 5417 i 5418 for i, t in enumerate(self._tokens[start_index:]) 5419 if t.token_type == TokenType.DCOLON 5420 ) 5421 end_token = self._tokens[start_index + dcolon_offset - 1] 5422 else: 5423 end_token = self._prev 5424 5425 if path: 5426 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5427 # it'll roundtrip to a string literal in GET_PATH 5428 if isinstance(path, exp.Identifier) and path.quoted: 5429 escape = True 5430 5431 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5432 5433 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5434 # Databricks transforms it back to the colon/dot notation 5435 if json_path: 5436 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5437 5438 if json_path_expr: 5439 json_path_expr.set("escape", escape) 5440 5441 this = self.expression( 5442 exp.JSONExtract, 5443 this=this, 5444 expression=json_path_expr, 5445 variant_extract=True, 5446 ) 5447 5448 while casts: 5449 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5450 5451 return this 5452 5453 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5454 return self._parse_types() 5455 5456 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5457 this = self._parse_bracket(this) 5458 5459 while self._match_set(self.COLUMN_OPERATORS): 5460 op_token = self._prev.token_type 5461 op = self.COLUMN_OPERATORS.get(op_token) 5462 5463 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5464 field = self._parse_dcolon() 5465 if not field: 5466 self.raise_error("Expected type") 5467 elif op and self._curr: 5468 field = self._parse_column_reference() or self._parse_bracket() 5469 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5470 field = self._parse_column_ops(field) 5471 else: 5472 field = self._parse_field(any_token=True, anonymous_func=True) 5473 5474 if isinstance(field, (exp.Func, exp.Window)) and this: 5475 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5476 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5477 this = exp.replace_tree( 5478 this, 5479 lambda n: ( 5480 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5481 if n.table 5482 else n.this 5483 ) 5484 if isinstance(n, exp.Column) 5485 else n, 5486 ) 5487 5488 if op: 5489 this = op(self, this, field) 5490 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5491 this = self.expression( 5492 exp.Column, 5493 comments=this.comments, 5494 this=field, 5495 table=this.this, 5496 db=this.args.get("table"), 5497 catalog=this.args.get("db"), 5498 ) 5499 elif isinstance(field, exp.Window): 5500 # Move the exp.Dot's to the window's function 5501 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5502 field.set("this", window_func) 5503 this = field 5504 else: 5505 this = self.expression(exp.Dot, this=this, expression=field) 5506 5507 if field and field.comments: 5508 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5509 5510 this = self._parse_bracket(this) 5511 5512 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5513 5514 def _parse_primary(self) -> t.Optional[exp.Expression]: 5515 if self._match_set(self.PRIMARY_PARSERS): 5516 token_type = self._prev.token_type 5517 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5518 5519 if token_type == TokenType.STRING: 5520 expressions = [primary] 5521 while self._match(TokenType.STRING): 5522 expressions.append(exp.Literal.string(self._prev.text)) 5523 5524 if len(expressions) > 1: 5525 return self.expression(exp.Concat, expressions=expressions) 5526 5527 return primary 5528 5529 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5530 return exp.Literal.number(f"0.{self._prev.text}") 5531 5532 if self._match(TokenType.L_PAREN): 5533 comments = self._prev_comments 5534 query = self._parse_select() 5535 5536 if query: 5537 expressions = [query] 5538 else: 5539 expressions = self._parse_expressions() 5540 5541 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5542 5543 if not this and self._match(TokenType.R_PAREN, advance=False): 5544 this = self.expression(exp.Tuple) 5545 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5546 this = self._parse_subquery(this=this, parse_alias=False) 5547 elif isinstance(this, exp.Subquery): 5548 this = self._parse_subquery( 5549 this=self._parse_set_operations(this), parse_alias=False 5550 ) 5551 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5552 this = self.expression(exp.Tuple, expressions=expressions) 5553 else: 5554 this = self.expression(exp.Paren, this=this) 5555 5556 if this: 5557 this.add_comments(comments) 5558 5559 self._match_r_paren(expression=this) 5560 return this 5561 5562 return None 5563 5564 def _parse_field( 5565 self, 5566 any_token: bool = False, 5567 tokens: t.Optional[t.Collection[TokenType]] = None, 5568 anonymous_func: bool = False, 5569 ) -> t.Optional[exp.Expression]: 5570 if anonymous_func: 5571 field = ( 5572 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5573 or self._parse_primary() 5574 ) 5575 else: 5576 field = self._parse_primary() or self._parse_function( 5577 anonymous=anonymous_func, any_token=any_token 5578 ) 5579 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5580 5581 def _parse_function( 5582 self, 5583 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5584 anonymous: bool = False, 5585 optional_parens: bool = True, 5586 any_token: bool = False, 5587 ) -> t.Optional[exp.Expression]: 5588 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5589 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5590 fn_syntax = False 5591 if ( 5592 self._match(TokenType.L_BRACE, advance=False) 5593 and self._next 5594 and self._next.text.upper() == "FN" 5595 ): 5596 self._advance(2) 5597 fn_syntax = True 5598 5599 func = self._parse_function_call( 5600 functions=functions, 5601 anonymous=anonymous, 5602 optional_parens=optional_parens, 5603 any_token=any_token, 5604 ) 5605 5606 if fn_syntax: 5607 self._match(TokenType.R_BRACE) 5608 5609 return func 5610 5611 def _parse_function_call( 5612 self, 5613 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5614 anonymous: bool = False, 5615 optional_parens: bool = True, 5616 any_token: bool = False, 5617 ) -> t.Optional[exp.Expression]: 5618 if not self._curr: 5619 return None 5620 5621 comments = self._curr.comments 5622 token = self._curr 5623 token_type = self._curr.token_type 5624 this = self._curr.text 5625 upper = this.upper() 5626 5627 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5628 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5629 self._advance() 5630 return self._parse_window(parser(self)) 5631 5632 if not self._next or self._next.token_type != TokenType.L_PAREN: 5633 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5634 self._advance() 5635 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5636 5637 return None 5638 5639 if any_token: 5640 if token_type in self.RESERVED_TOKENS: 5641 return None 5642 elif token_type not in self.FUNC_TOKENS: 5643 return None 5644 5645 self._advance(2) 5646 5647 parser = self.FUNCTION_PARSERS.get(upper) 5648 if parser and not anonymous: 5649 this = parser(self) 5650 else: 5651 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5652 5653 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5654 this = self.expression( 5655 subquery_predicate, comments=comments, this=self._parse_select() 5656 ) 5657 self._match_r_paren() 5658 return this 5659 5660 if functions is None: 5661 functions = self.FUNCTIONS 5662 5663 function = functions.get(upper) 5664 known_function = function and not anonymous 5665 5666 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5667 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5668 5669 post_func_comments = self._curr and self._curr.comments 5670 if known_function and post_func_comments: 5671 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5672 # call we'll construct it as exp.Anonymous, even if it's "known" 5673 if any( 5674 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5675 for comment in post_func_comments 5676 ): 5677 known_function = False 5678 5679 if alias and known_function: 5680 args = self._kv_to_prop_eq(args) 5681 5682 if known_function: 5683 func_builder = t.cast(t.Callable, function) 5684 5685 if "dialect" in func_builder.__code__.co_varnames: 5686 func = func_builder(args, dialect=self.dialect) 5687 else: 5688 func = func_builder(args) 5689 5690 func = self.validate_expression(func, args) 5691 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5692 func.meta["name"] = this 5693 5694 this = func 5695 else: 5696 if token_type == TokenType.IDENTIFIER: 5697 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5698 this = self.expression(exp.Anonymous, this=this, expressions=args) 5699 5700 if isinstance(this, exp.Expression): 5701 this.add_comments(comments) 5702 5703 self._match_r_paren(this) 5704 return self._parse_window(this) 5705 5706 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5707 return expression 5708 5709 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5710 transformed = [] 5711 5712 for index, e in enumerate(expressions): 5713 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5714 if isinstance(e, exp.Alias): 5715 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5716 5717 if not isinstance(e, exp.PropertyEQ): 5718 e = self.expression( 5719 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5720 ) 5721 5722 if isinstance(e.this, exp.Column): 5723 e.this.replace(e.this.this) 5724 else: 5725 e = self._to_prop_eq(e, index) 5726 5727 transformed.append(e) 5728 5729 return transformed 5730 5731 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5732 return self._parse_statement() 5733 5734 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5735 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5736 5737 def _parse_user_defined_function( 5738 self, kind: t.Optional[TokenType] = None 5739 ) -> t.Optional[exp.Expression]: 5740 this = self._parse_table_parts(schema=True) 5741 5742 if not self._match(TokenType.L_PAREN): 5743 return this 5744 5745 expressions = self._parse_csv(self._parse_function_parameter) 5746 self._match_r_paren() 5747 return self.expression( 5748 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5749 ) 5750 5751 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5752 literal = self._parse_primary() 5753 if literal: 5754 return self.expression(exp.Introducer, this=token.text, expression=literal) 5755 5756 return self._identifier_expression(token) 5757 5758 def _parse_session_parameter(self) -> exp.SessionParameter: 5759 kind = None 5760 this = self._parse_id_var() or self._parse_primary() 5761 5762 if this and self._match(TokenType.DOT): 5763 kind = this.name 5764 this = self._parse_var() or self._parse_primary() 5765 5766 return self.expression(exp.SessionParameter, this=this, kind=kind) 5767 5768 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5769 return self._parse_id_var() 5770 5771 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5772 index = self._index 5773 5774 if self._match(TokenType.L_PAREN): 5775 expressions = t.cast( 5776 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5777 ) 5778 5779 if not self._match(TokenType.R_PAREN): 5780 self._retreat(index) 5781 else: 5782 expressions = [self._parse_lambda_arg()] 5783 5784 if self._match_set(self.LAMBDAS): 5785 return self.LAMBDAS[self._prev.token_type](self, expressions) 5786 5787 self._retreat(index) 5788 5789 this: t.Optional[exp.Expression] 5790 5791 if self._match(TokenType.DISTINCT): 5792 this = self.expression( 5793 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5794 ) 5795 else: 5796 this = self._parse_select_or_expression(alias=alias) 5797 5798 return self._parse_limit( 5799 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5800 ) 5801 5802 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5803 index = self._index 5804 if not self._match(TokenType.L_PAREN): 5805 return this 5806 5807 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5808 # expr can be of both types 5809 if self._match_set(self.SELECT_START_TOKENS): 5810 self._retreat(index) 5811 return this 5812 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5813 self._match_r_paren() 5814 return self.expression(exp.Schema, this=this, expressions=args) 5815 5816 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5817 return self._parse_column_def(self._parse_field(any_token=True)) 5818 5819 def _parse_column_def( 5820 self, this: t.Optional[exp.Expression], computed_column: bool = True 5821 ) -> t.Optional[exp.Expression]: 5822 # column defs are not really columns, they're identifiers 5823 if isinstance(this, exp.Column): 5824 this = this.this 5825 5826 if not computed_column: 5827 self._match(TokenType.ALIAS) 5828 5829 kind = self._parse_types(schema=True) 5830 5831 if self._match_text_seq("FOR", "ORDINALITY"): 5832 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5833 5834 constraints: t.List[exp.Expression] = [] 5835 5836 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5837 ("ALIAS", "MATERIALIZED") 5838 ): 5839 persisted = self._prev.text.upper() == "MATERIALIZED" 5840 constraint_kind = exp.ComputedColumnConstraint( 5841 this=self._parse_assignment(), 5842 persisted=persisted or self._match_text_seq("PERSISTED"), 5843 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5844 ) 5845 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5846 elif ( 5847 kind 5848 and self._match(TokenType.ALIAS, advance=False) 5849 and ( 5850 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5851 or (self._next and self._next.token_type == TokenType.L_PAREN) 5852 ) 5853 ): 5854 self._advance() 5855 constraints.append( 5856 self.expression( 5857 exp.ColumnConstraint, 5858 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5859 ) 5860 ) 5861 5862 while True: 5863 constraint = self._parse_column_constraint() 5864 if not constraint: 5865 break 5866 constraints.append(constraint) 5867 5868 if not kind and not constraints: 5869 return this 5870 5871 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5872 5873 def _parse_auto_increment( 5874 self, 5875 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5876 start = None 5877 increment = None 5878 5879 if self._match(TokenType.L_PAREN, advance=False): 5880 args = self._parse_wrapped_csv(self._parse_bitwise) 5881 start = seq_get(args, 0) 5882 increment = seq_get(args, 1) 5883 elif self._match_text_seq("START"): 5884 start = self._parse_bitwise() 5885 self._match_text_seq("INCREMENT") 5886 increment = self._parse_bitwise() 5887 5888 if start and increment: 5889 return exp.GeneratedAsIdentityColumnConstraint( 5890 start=start, increment=increment, this=False 5891 ) 5892 5893 return exp.AutoIncrementColumnConstraint() 5894 5895 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5896 if not self._match_text_seq("REFRESH"): 5897 self._retreat(self._index - 1) 5898 return None 5899 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5900 5901 def _parse_compress(self) -> exp.CompressColumnConstraint: 5902 if self._match(TokenType.L_PAREN, advance=False): 5903 return self.expression( 5904 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5905 ) 5906 5907 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5908 5909 def _parse_generated_as_identity( 5910 self, 5911 ) -> ( 5912 exp.GeneratedAsIdentityColumnConstraint 5913 | exp.ComputedColumnConstraint 5914 | exp.GeneratedAsRowColumnConstraint 5915 ): 5916 if self._match_text_seq("BY", "DEFAULT"): 5917 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5918 this = self.expression( 5919 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5920 ) 5921 else: 5922 self._match_text_seq("ALWAYS") 5923 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5924 5925 self._match(TokenType.ALIAS) 5926 5927 if self._match_text_seq("ROW"): 5928 start = self._match_text_seq("START") 5929 if not start: 5930 self._match(TokenType.END) 5931 hidden = self._match_text_seq("HIDDEN") 5932 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5933 5934 identity = self._match_text_seq("IDENTITY") 5935 5936 if self._match(TokenType.L_PAREN): 5937 if self._match(TokenType.START_WITH): 5938 this.set("start", self._parse_bitwise()) 5939 if self._match_text_seq("INCREMENT", "BY"): 5940 this.set("increment", self._parse_bitwise()) 5941 if self._match_text_seq("MINVALUE"): 5942 this.set("minvalue", self._parse_bitwise()) 5943 if self._match_text_seq("MAXVALUE"): 5944 this.set("maxvalue", self._parse_bitwise()) 5945 5946 if self._match_text_seq("CYCLE"): 5947 this.set("cycle", True) 5948 elif self._match_text_seq("NO", "CYCLE"): 5949 this.set("cycle", False) 5950 5951 if not identity: 5952 this.set("expression", self._parse_range()) 5953 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5954 args = self._parse_csv(self._parse_bitwise) 5955 this.set("start", seq_get(args, 0)) 5956 this.set("increment", seq_get(args, 1)) 5957 5958 self._match_r_paren() 5959 5960 return this 5961 5962 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5963 self._match_text_seq("LENGTH") 5964 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5965 5966 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5967 if self._match_text_seq("NULL"): 5968 return self.expression(exp.NotNullColumnConstraint) 5969 if self._match_text_seq("CASESPECIFIC"): 5970 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5971 if self._match_text_seq("FOR", "REPLICATION"): 5972 return self.expression(exp.NotForReplicationColumnConstraint) 5973 5974 # Unconsume the `NOT` token 5975 self._retreat(self._index - 1) 5976 return None 5977 5978 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5979 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5980 5981 procedure_option_follows = ( 5982 self._match(TokenType.WITH, advance=False) 5983 and self._next 5984 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5985 ) 5986 5987 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5988 return self.expression( 5989 exp.ColumnConstraint, 5990 this=this, 5991 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5992 ) 5993 5994 return this 5995 5996 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5997 if not self._match(TokenType.CONSTRAINT): 5998 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5999 6000 return self.expression( 6001 exp.Constraint, 6002 this=self._parse_id_var(), 6003 expressions=self._parse_unnamed_constraints(), 6004 ) 6005 6006 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6007 constraints = [] 6008 while True: 6009 constraint = self._parse_unnamed_constraint() or self._parse_function() 6010 if not constraint: 6011 break 6012 constraints.append(constraint) 6013 6014 return constraints 6015 6016 def _parse_unnamed_constraint( 6017 self, constraints: t.Optional[t.Collection[str]] = None 6018 ) -> t.Optional[exp.Expression]: 6019 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6020 constraints or self.CONSTRAINT_PARSERS 6021 ): 6022 return None 6023 6024 constraint = self._prev.text.upper() 6025 if constraint not in self.CONSTRAINT_PARSERS: 6026 self.raise_error(f"No parser found for schema constraint {constraint}.") 6027 6028 return self.CONSTRAINT_PARSERS[constraint](self) 6029 6030 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6031 return self._parse_id_var(any_token=False) 6032 6033 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6034 self._match_text_seq("KEY") 6035 return self.expression( 6036 exp.UniqueColumnConstraint, 6037 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6038 this=self._parse_schema(self._parse_unique_key()), 6039 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6040 on_conflict=self._parse_on_conflict(), 6041 options=self._parse_key_constraint_options(), 6042 ) 6043 6044 def _parse_key_constraint_options(self) -> t.List[str]: 6045 options = [] 6046 while True: 6047 if not self._curr: 6048 break 6049 6050 if self._match(TokenType.ON): 6051 action = None 6052 on = self._advance_any() and self._prev.text 6053 6054 if self._match_text_seq("NO", "ACTION"): 6055 action = "NO ACTION" 6056 elif self._match_text_seq("CASCADE"): 6057 action = "CASCADE" 6058 elif self._match_text_seq("RESTRICT"): 6059 action = "RESTRICT" 6060 elif self._match_pair(TokenType.SET, TokenType.NULL): 6061 action = "SET NULL" 6062 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6063 action = "SET DEFAULT" 6064 else: 6065 self.raise_error("Invalid key constraint") 6066 6067 options.append(f"ON {on} {action}") 6068 else: 6069 var = self._parse_var_from_options( 6070 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6071 ) 6072 if not var: 6073 break 6074 options.append(var.name) 6075 6076 return options 6077 6078 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6079 if match and not self._match(TokenType.REFERENCES): 6080 return None 6081 6082 expressions = None 6083 this = self._parse_table(schema=True) 6084 options = self._parse_key_constraint_options() 6085 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6086 6087 def _parse_foreign_key(self) -> exp.ForeignKey: 6088 expressions = self._parse_wrapped_id_vars() 6089 reference = self._parse_references() 6090 on_options = {} 6091 6092 while self._match(TokenType.ON): 6093 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6094 self.raise_error("Expected DELETE or UPDATE") 6095 6096 kind = self._prev.text.lower() 6097 6098 if self._match_text_seq("NO", "ACTION"): 6099 action = "NO ACTION" 6100 elif self._match(TokenType.SET): 6101 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6102 action = "SET " + self._prev.text.upper() 6103 else: 6104 self._advance() 6105 action = self._prev.text.upper() 6106 6107 on_options[kind] = action 6108 6109 return self.expression( 6110 exp.ForeignKey, 6111 expressions=expressions, 6112 reference=reference, 6113 options=self._parse_key_constraint_options(), 6114 **on_options, # type: ignore 6115 ) 6116 6117 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6118 return self._parse_ordered() or self._parse_field() 6119 6120 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6121 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6122 self._retreat(self._index - 1) 6123 return None 6124 6125 id_vars = self._parse_wrapped_id_vars() 6126 return self.expression( 6127 exp.PeriodForSystemTimeConstraint, 6128 this=seq_get(id_vars, 0), 6129 expression=seq_get(id_vars, 1), 6130 ) 6131 6132 def _parse_primary_key( 6133 self, wrapped_optional: bool = False, in_props: bool = False 6134 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6135 desc = ( 6136 self._match_set((TokenType.ASC, TokenType.DESC)) 6137 and self._prev.token_type == TokenType.DESC 6138 ) 6139 6140 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6141 return self.expression( 6142 exp.PrimaryKeyColumnConstraint, 6143 desc=desc, 6144 options=self._parse_key_constraint_options(), 6145 ) 6146 6147 expressions = self._parse_wrapped_csv( 6148 self._parse_primary_key_part, optional=wrapped_optional 6149 ) 6150 options = self._parse_key_constraint_options() 6151 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6152 6153 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6154 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6155 6156 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6157 """ 6158 Parses a datetime column in ODBC format. We parse the column into the corresponding 6159 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6160 same as we did for `DATE('yyyy-mm-dd')`. 6161 6162 Reference: 6163 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6164 """ 6165 self._match(TokenType.VAR) 6166 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6167 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6168 if not self._match(TokenType.R_BRACE): 6169 self.raise_error("Expected }") 6170 return expression 6171 6172 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6173 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6174 return this 6175 6176 bracket_kind = self._prev.token_type 6177 if ( 6178 bracket_kind == TokenType.L_BRACE 6179 and self._curr 6180 and self._curr.token_type == TokenType.VAR 6181 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6182 ): 6183 return self._parse_odbc_datetime_literal() 6184 6185 expressions = self._parse_csv( 6186 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6187 ) 6188 6189 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6190 self.raise_error("Expected ]") 6191 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6192 self.raise_error("Expected }") 6193 6194 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6195 if bracket_kind == TokenType.L_BRACE: 6196 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6197 elif not this: 6198 this = build_array_constructor( 6199 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6200 ) 6201 else: 6202 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6203 if constructor_type: 6204 return build_array_constructor( 6205 constructor_type, 6206 args=expressions, 6207 bracket_kind=bracket_kind, 6208 dialect=self.dialect, 6209 ) 6210 6211 expressions = apply_index_offset( 6212 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6213 ) 6214 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6215 6216 self._add_comments(this) 6217 return self._parse_bracket(this) 6218 6219 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6220 if self._match(TokenType.COLON): 6221 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6222 return this 6223 6224 def _parse_case(self) -> t.Optional[exp.Expression]: 6225 ifs = [] 6226 default = None 6227 6228 comments = self._prev_comments 6229 expression = self._parse_assignment() 6230 6231 while self._match(TokenType.WHEN): 6232 this = self._parse_assignment() 6233 self._match(TokenType.THEN) 6234 then = self._parse_assignment() 6235 ifs.append(self.expression(exp.If, this=this, true=then)) 6236 6237 if self._match(TokenType.ELSE): 6238 default = self._parse_assignment() 6239 6240 if not self._match(TokenType.END): 6241 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6242 default = exp.column("interval") 6243 else: 6244 self.raise_error("Expected END after CASE", self._prev) 6245 6246 return self.expression( 6247 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6248 ) 6249 6250 def _parse_if(self) -> t.Optional[exp.Expression]: 6251 if self._match(TokenType.L_PAREN): 6252 args = self._parse_csv( 6253 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6254 ) 6255 this = self.validate_expression(exp.If.from_arg_list(args), args) 6256 self._match_r_paren() 6257 else: 6258 index = self._index - 1 6259 6260 if self.NO_PAREN_IF_COMMANDS and index == 0: 6261 return self._parse_as_command(self._prev) 6262 6263 condition = self._parse_assignment() 6264 6265 if not condition: 6266 self._retreat(index) 6267 return None 6268 6269 self._match(TokenType.THEN) 6270 true = self._parse_assignment() 6271 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6272 self._match(TokenType.END) 6273 this = self.expression(exp.If, this=condition, true=true, false=false) 6274 6275 return this 6276 6277 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6278 if not self._match_text_seq("VALUE", "FOR"): 6279 self._retreat(self._index - 1) 6280 return None 6281 6282 return self.expression( 6283 exp.NextValueFor, 6284 this=self._parse_column(), 6285 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6286 ) 6287 6288 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6289 this = self._parse_function() or self._parse_var_or_string(upper=True) 6290 6291 if self._match(TokenType.FROM): 6292 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6293 6294 if not self._match(TokenType.COMMA): 6295 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6296 6297 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6298 6299 def _parse_gap_fill(self) -> exp.GapFill: 6300 self._match(TokenType.TABLE) 6301 this = self._parse_table() 6302 6303 self._match(TokenType.COMMA) 6304 args = [this, *self._parse_csv(self._parse_lambda)] 6305 6306 gap_fill = exp.GapFill.from_arg_list(args) 6307 return self.validate_expression(gap_fill, args) 6308 6309 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6310 this = self._parse_assignment() 6311 6312 if not self._match(TokenType.ALIAS): 6313 if self._match(TokenType.COMMA): 6314 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6315 6316 self.raise_error("Expected AS after CAST") 6317 6318 fmt = None 6319 to = self._parse_types() 6320 6321 default = self._match(TokenType.DEFAULT) 6322 if default: 6323 default = self._parse_bitwise() 6324 self._match_text_seq("ON", "CONVERSION", "ERROR") 6325 6326 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6327 fmt_string = self._parse_string() 6328 fmt = self._parse_at_time_zone(fmt_string) 6329 6330 if not to: 6331 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6332 if to.this in exp.DataType.TEMPORAL_TYPES: 6333 this = self.expression( 6334 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6335 this=this, 6336 format=exp.Literal.string( 6337 format_time( 6338 fmt_string.this if fmt_string else "", 6339 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6340 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6341 ) 6342 ), 6343 safe=safe, 6344 ) 6345 6346 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6347 this.set("zone", fmt.args["zone"]) 6348 return this 6349 elif not to: 6350 self.raise_error("Expected TYPE after CAST") 6351 elif isinstance(to, exp.Identifier): 6352 to = exp.DataType.build(to.name, udt=True) 6353 elif to.this == exp.DataType.Type.CHAR: 6354 if self._match(TokenType.CHARACTER_SET): 6355 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6356 6357 return self.expression( 6358 exp.Cast if strict else exp.TryCast, 6359 this=this, 6360 to=to, 6361 format=fmt, 6362 safe=safe, 6363 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6364 default=default, 6365 ) 6366 6367 def _parse_string_agg(self) -> exp.GroupConcat: 6368 if self._match(TokenType.DISTINCT): 6369 args: t.List[t.Optional[exp.Expression]] = [ 6370 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6371 ] 6372 if self._match(TokenType.COMMA): 6373 args.extend(self._parse_csv(self._parse_assignment)) 6374 else: 6375 args = self._parse_csv(self._parse_assignment) # type: ignore 6376 6377 if self._match_text_seq("ON", "OVERFLOW"): 6378 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6379 if self._match_text_seq("ERROR"): 6380 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6381 else: 6382 self._match_text_seq("TRUNCATE") 6383 on_overflow = self.expression( 6384 exp.OverflowTruncateBehavior, 6385 this=self._parse_string(), 6386 with_count=( 6387 self._match_text_seq("WITH", "COUNT") 6388 or not self._match_text_seq("WITHOUT", "COUNT") 6389 ), 6390 ) 6391 else: 6392 on_overflow = None 6393 6394 index = self._index 6395 if not self._match(TokenType.R_PAREN) and args: 6396 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6397 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6398 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6399 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6400 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6401 6402 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6403 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6404 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6405 if not self._match_text_seq("WITHIN", "GROUP"): 6406 self._retreat(index) 6407 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6408 6409 # The corresponding match_r_paren will be called in parse_function (caller) 6410 self._match_l_paren() 6411 6412 return self.expression( 6413 exp.GroupConcat, 6414 this=self._parse_order(this=seq_get(args, 0)), 6415 separator=seq_get(args, 1), 6416 on_overflow=on_overflow, 6417 ) 6418 6419 def _parse_convert( 6420 self, strict: bool, safe: t.Optional[bool] = None 6421 ) -> t.Optional[exp.Expression]: 6422 this = self._parse_bitwise() 6423 6424 if self._match(TokenType.USING): 6425 to: t.Optional[exp.Expression] = self.expression( 6426 exp.CharacterSet, this=self._parse_var() 6427 ) 6428 elif self._match(TokenType.COMMA): 6429 to = self._parse_types() 6430 else: 6431 to = None 6432 6433 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6434 6435 def _parse_xml_table(self) -> exp.XMLTable: 6436 namespaces = None 6437 passing = None 6438 columns = None 6439 6440 if self._match_text_seq("XMLNAMESPACES", "("): 6441 namespaces = self._parse_xml_namespace() 6442 self._match_text_seq(")", ",") 6443 6444 this = self._parse_string() 6445 6446 if self._match_text_seq("PASSING"): 6447 # The BY VALUE keywords are optional and are provided for semantic clarity 6448 self._match_text_seq("BY", "VALUE") 6449 passing = self._parse_csv(self._parse_column) 6450 6451 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6452 6453 if self._match_text_seq("COLUMNS"): 6454 columns = self._parse_csv(self._parse_field_def) 6455 6456 return self.expression( 6457 exp.XMLTable, 6458 this=this, 6459 namespaces=namespaces, 6460 passing=passing, 6461 columns=columns, 6462 by_ref=by_ref, 6463 ) 6464 6465 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6466 namespaces = [] 6467 6468 while True: 6469 if self._match(TokenType.DEFAULT): 6470 uri = self._parse_string() 6471 else: 6472 uri = self._parse_alias(self._parse_string()) 6473 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6474 if not self._match(TokenType.COMMA): 6475 break 6476 6477 return namespaces 6478 6479 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6480 """ 6481 There are generally two variants of the DECODE function: 6482 6483 - DECODE(bin, charset) 6484 - DECODE(expression, search, result [, search, result] ... [, default]) 6485 6486 The second variant will always be parsed into a CASE expression. Note that NULL 6487 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6488 instead of relying on pattern matching. 6489 """ 6490 args = self._parse_csv(self._parse_assignment) 6491 6492 if len(args) < 3: 6493 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6494 6495 expression, *expressions = args 6496 if not expression: 6497 return None 6498 6499 ifs = [] 6500 for search, result in zip(expressions[::2], expressions[1::2]): 6501 if not search or not result: 6502 return None 6503 6504 if isinstance(search, exp.Literal): 6505 ifs.append( 6506 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6507 ) 6508 elif isinstance(search, exp.Null): 6509 ifs.append( 6510 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6511 ) 6512 else: 6513 cond = exp.or_( 6514 exp.EQ(this=expression.copy(), expression=search), 6515 exp.and_( 6516 exp.Is(this=expression.copy(), expression=exp.Null()), 6517 exp.Is(this=search.copy(), expression=exp.Null()), 6518 copy=False, 6519 ), 6520 copy=False, 6521 ) 6522 ifs.append(exp.If(this=cond, true=result)) 6523 6524 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6525 6526 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6527 self._match_text_seq("KEY") 6528 key = self._parse_column() 6529 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6530 self._match_text_seq("VALUE") 6531 value = self._parse_bitwise() 6532 6533 if not key and not value: 6534 return None 6535 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6536 6537 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6538 if not this or not self._match_text_seq("FORMAT", "JSON"): 6539 return this 6540 6541 return self.expression(exp.FormatJson, this=this) 6542 6543 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6544 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6545 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6546 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6547 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6548 else: 6549 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6550 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6551 6552 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6553 6554 if not empty and not error and not null: 6555 return None 6556 6557 return self.expression( 6558 exp.OnCondition, 6559 empty=empty, 6560 error=error, 6561 null=null, 6562 ) 6563 6564 def _parse_on_handling( 6565 self, on: str, *values: str 6566 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6567 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6568 for value in values: 6569 if self._match_text_seq(value, "ON", on): 6570 return f"{value} ON {on}" 6571 6572 index = self._index 6573 if self._match(TokenType.DEFAULT): 6574 default_value = self._parse_bitwise() 6575 if self._match_text_seq("ON", on): 6576 return default_value 6577 6578 self._retreat(index) 6579 6580 return None 6581 6582 @t.overload 6583 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6584 6585 @t.overload 6586 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6587 6588 def _parse_json_object(self, agg=False): 6589 star = self._parse_star() 6590 expressions = ( 6591 [star] 6592 if star 6593 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6594 ) 6595 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6596 6597 unique_keys = None 6598 if self._match_text_seq("WITH", "UNIQUE"): 6599 unique_keys = True 6600 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6601 unique_keys = False 6602 6603 self._match_text_seq("KEYS") 6604 6605 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6606 self._parse_type() 6607 ) 6608 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6609 6610 return self.expression( 6611 exp.JSONObjectAgg if agg else exp.JSONObject, 6612 expressions=expressions, 6613 null_handling=null_handling, 6614 unique_keys=unique_keys, 6615 return_type=return_type, 6616 encoding=encoding, 6617 ) 6618 6619 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6620 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6621 if not self._match_text_seq("NESTED"): 6622 this = self._parse_id_var() 6623 kind = self._parse_types(allow_identifiers=False) 6624 nested = None 6625 else: 6626 this = None 6627 kind = None 6628 nested = True 6629 6630 path = self._match_text_seq("PATH") and self._parse_string() 6631 nested_schema = nested and self._parse_json_schema() 6632 6633 return self.expression( 6634 exp.JSONColumnDef, 6635 this=this, 6636 kind=kind, 6637 path=path, 6638 nested_schema=nested_schema, 6639 ) 6640 6641 def _parse_json_schema(self) -> exp.JSONSchema: 6642 self._match_text_seq("COLUMNS") 6643 return self.expression( 6644 exp.JSONSchema, 6645 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6646 ) 6647 6648 def _parse_json_table(self) -> exp.JSONTable: 6649 this = self._parse_format_json(self._parse_bitwise()) 6650 path = self._match(TokenType.COMMA) and self._parse_string() 6651 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6652 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6653 schema = self._parse_json_schema() 6654 6655 return exp.JSONTable( 6656 this=this, 6657 schema=schema, 6658 path=path, 6659 error_handling=error_handling, 6660 empty_handling=empty_handling, 6661 ) 6662 6663 def _parse_match_against(self) -> exp.MatchAgainst: 6664 expressions = self._parse_csv(self._parse_column) 6665 6666 self._match_text_seq(")", "AGAINST", "(") 6667 6668 this = self._parse_string() 6669 6670 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6671 modifier = "IN NATURAL LANGUAGE MODE" 6672 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6673 modifier = f"{modifier} WITH QUERY EXPANSION" 6674 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6675 modifier = "IN BOOLEAN MODE" 6676 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6677 modifier = "WITH QUERY EXPANSION" 6678 else: 6679 modifier = None 6680 6681 return self.expression( 6682 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6683 ) 6684 6685 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6686 def _parse_open_json(self) -> exp.OpenJSON: 6687 this = self._parse_bitwise() 6688 path = self._match(TokenType.COMMA) and self._parse_string() 6689 6690 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6691 this = self._parse_field(any_token=True) 6692 kind = self._parse_types() 6693 path = self._parse_string() 6694 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6695 6696 return self.expression( 6697 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6698 ) 6699 6700 expressions = None 6701 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6702 self._match_l_paren() 6703 expressions = self._parse_csv(_parse_open_json_column_def) 6704 6705 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6706 6707 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6708 args = self._parse_csv(self._parse_bitwise) 6709 6710 if self._match(TokenType.IN): 6711 return self.expression( 6712 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6713 ) 6714 6715 if haystack_first: 6716 haystack = seq_get(args, 0) 6717 needle = seq_get(args, 1) 6718 else: 6719 haystack = seq_get(args, 1) 6720 needle = seq_get(args, 0) 6721 6722 return self.expression( 6723 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6724 ) 6725 6726 def _parse_predict(self) -> exp.Predict: 6727 self._match_text_seq("MODEL") 6728 this = self._parse_table() 6729 6730 self._match(TokenType.COMMA) 6731 self._match_text_seq("TABLE") 6732 6733 return self.expression( 6734 exp.Predict, 6735 this=this, 6736 expression=self._parse_table(), 6737 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6738 ) 6739 6740 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6741 args = self._parse_csv(self._parse_table) 6742 return exp.JoinHint(this=func_name.upper(), expressions=args) 6743 6744 def _parse_substring(self) -> exp.Substring: 6745 # Postgres supports the form: substring(string [from int] [for int]) 6746 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6747 6748 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6749 6750 if self._match(TokenType.FROM): 6751 args.append(self._parse_bitwise()) 6752 if self._match(TokenType.FOR): 6753 if len(args) == 1: 6754 args.append(exp.Literal.number(1)) 6755 args.append(self._parse_bitwise()) 6756 6757 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6758 6759 def _parse_trim(self) -> exp.Trim: 6760 # https://www.w3resource.com/sql/character-functions/trim.php 6761 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6762 6763 position = None 6764 collation = None 6765 expression = None 6766 6767 if self._match_texts(self.TRIM_TYPES): 6768 position = self._prev.text.upper() 6769 6770 this = self._parse_bitwise() 6771 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6772 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6773 expression = self._parse_bitwise() 6774 6775 if invert_order: 6776 this, expression = expression, this 6777 6778 if self._match(TokenType.COLLATE): 6779 collation = self._parse_bitwise() 6780 6781 return self.expression( 6782 exp.Trim, this=this, position=position, expression=expression, collation=collation 6783 ) 6784 6785 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6786 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6787 6788 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6789 return self._parse_window(self._parse_id_var(), alias=True) 6790 6791 def _parse_respect_or_ignore_nulls( 6792 self, this: t.Optional[exp.Expression] 6793 ) -> t.Optional[exp.Expression]: 6794 if self._match_text_seq("IGNORE", "NULLS"): 6795 return self.expression(exp.IgnoreNulls, this=this) 6796 if self._match_text_seq("RESPECT", "NULLS"): 6797 return self.expression(exp.RespectNulls, this=this) 6798 return this 6799 6800 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6801 if self._match(TokenType.HAVING): 6802 self._match_texts(("MAX", "MIN")) 6803 max = self._prev.text.upper() != "MIN" 6804 return self.expression( 6805 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6806 ) 6807 6808 return this 6809 6810 def _parse_window( 6811 self, this: t.Optional[exp.Expression], alias: bool = False 6812 ) -> t.Optional[exp.Expression]: 6813 func = this 6814 comments = func.comments if isinstance(func, exp.Expression) else None 6815 6816 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6817 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6818 if self._match_text_seq("WITHIN", "GROUP"): 6819 order = self._parse_wrapped(self._parse_order) 6820 this = self.expression(exp.WithinGroup, this=this, expression=order) 6821 6822 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6823 self._match(TokenType.WHERE) 6824 this = self.expression( 6825 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6826 ) 6827 self._match_r_paren() 6828 6829 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6830 # Some dialects choose to implement and some do not. 6831 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6832 6833 # There is some code above in _parse_lambda that handles 6834 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6835 6836 # The below changes handle 6837 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6838 6839 # Oracle allows both formats 6840 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6841 # and Snowflake chose to do the same for familiarity 6842 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6843 if isinstance(this, exp.AggFunc): 6844 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6845 6846 if ignore_respect and ignore_respect is not this: 6847 ignore_respect.replace(ignore_respect.this) 6848 this = self.expression(ignore_respect.__class__, this=this) 6849 6850 this = self._parse_respect_or_ignore_nulls(this) 6851 6852 # bigquery select from window x AS (partition by ...) 6853 if alias: 6854 over = None 6855 self._match(TokenType.ALIAS) 6856 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6857 return this 6858 else: 6859 over = self._prev.text.upper() 6860 6861 if comments and isinstance(func, exp.Expression): 6862 func.pop_comments() 6863 6864 if not self._match(TokenType.L_PAREN): 6865 return self.expression( 6866 exp.Window, 6867 comments=comments, 6868 this=this, 6869 alias=self._parse_id_var(False), 6870 over=over, 6871 ) 6872 6873 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6874 6875 first = self._match(TokenType.FIRST) 6876 if self._match_text_seq("LAST"): 6877 first = False 6878 6879 partition, order = self._parse_partition_and_order() 6880 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6881 6882 if kind: 6883 self._match(TokenType.BETWEEN) 6884 start = self._parse_window_spec() 6885 self._match(TokenType.AND) 6886 end = self._parse_window_spec() 6887 6888 spec = self.expression( 6889 exp.WindowSpec, 6890 kind=kind, 6891 start=start["value"], 6892 start_side=start["side"], 6893 end=end["value"], 6894 end_side=end["side"], 6895 ) 6896 else: 6897 spec = None 6898 6899 self._match_r_paren() 6900 6901 window = self.expression( 6902 exp.Window, 6903 comments=comments, 6904 this=this, 6905 partition_by=partition, 6906 order=order, 6907 spec=spec, 6908 alias=window_alias, 6909 over=over, 6910 first=first, 6911 ) 6912 6913 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6914 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6915 return self._parse_window(window, alias=alias) 6916 6917 return window 6918 6919 def _parse_partition_and_order( 6920 self, 6921 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6922 return self._parse_partition_by(), self._parse_order() 6923 6924 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6925 self._match(TokenType.BETWEEN) 6926 6927 return { 6928 "value": ( 6929 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6930 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6931 or self._parse_bitwise() 6932 ), 6933 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6934 } 6935 6936 def _parse_alias( 6937 self, this: t.Optional[exp.Expression], explicit: bool = False 6938 ) -> t.Optional[exp.Expression]: 6939 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6940 # so this section tries to parse the clause version and if it fails, it treats the token 6941 # as an identifier (alias) 6942 if self._can_parse_limit_or_offset(): 6943 return this 6944 6945 any_token = self._match(TokenType.ALIAS) 6946 comments = self._prev_comments or [] 6947 6948 if explicit and not any_token: 6949 return this 6950 6951 if self._match(TokenType.L_PAREN): 6952 aliases = self.expression( 6953 exp.Aliases, 6954 comments=comments, 6955 this=this, 6956 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6957 ) 6958 self._match_r_paren(aliases) 6959 return aliases 6960 6961 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6962 self.STRING_ALIASES and self._parse_string_as_identifier() 6963 ) 6964 6965 if alias: 6966 comments.extend(alias.pop_comments()) 6967 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6968 column = this.this 6969 6970 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6971 if not this.comments and column and column.comments: 6972 this.comments = column.pop_comments() 6973 6974 return this 6975 6976 def _parse_id_var( 6977 self, 6978 any_token: bool = True, 6979 tokens: t.Optional[t.Collection[TokenType]] = None, 6980 ) -> t.Optional[exp.Expression]: 6981 expression = self._parse_identifier() 6982 if not expression and ( 6983 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6984 ): 6985 quoted = self._prev.token_type == TokenType.STRING 6986 expression = self._identifier_expression(quoted=quoted) 6987 6988 return expression 6989 6990 def _parse_string(self) -> t.Optional[exp.Expression]: 6991 if self._match_set(self.STRING_PARSERS): 6992 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6993 return self._parse_placeholder() 6994 6995 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6996 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6997 if output: 6998 output.update_positions(self._prev) 6999 return output 7000 7001 def _parse_number(self) -> t.Optional[exp.Expression]: 7002 if self._match_set(self.NUMERIC_PARSERS): 7003 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7004 return self._parse_placeholder() 7005 7006 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7007 if self._match(TokenType.IDENTIFIER): 7008 return self._identifier_expression(quoted=True) 7009 return self._parse_placeholder() 7010 7011 def _parse_var( 7012 self, 7013 any_token: bool = False, 7014 tokens: t.Optional[t.Collection[TokenType]] = None, 7015 upper: bool = False, 7016 ) -> t.Optional[exp.Expression]: 7017 if ( 7018 (any_token and self._advance_any()) 7019 or self._match(TokenType.VAR) 7020 or (self._match_set(tokens) if tokens else False) 7021 ): 7022 return self.expression( 7023 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7024 ) 7025 return self._parse_placeholder() 7026 7027 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7028 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7029 self._advance() 7030 return self._prev 7031 return None 7032 7033 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7034 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7035 7036 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7037 return self._parse_primary() or self._parse_var(any_token=True) 7038 7039 def _parse_null(self) -> t.Optional[exp.Expression]: 7040 if self._match_set(self.NULL_TOKENS): 7041 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7042 return self._parse_placeholder() 7043 7044 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7045 if self._match(TokenType.TRUE): 7046 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7047 if self._match(TokenType.FALSE): 7048 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7049 return self._parse_placeholder() 7050 7051 def _parse_star(self) -> t.Optional[exp.Expression]: 7052 if self._match(TokenType.STAR): 7053 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7054 return self._parse_placeholder() 7055 7056 def _parse_parameter(self) -> exp.Parameter: 7057 this = self._parse_identifier() or self._parse_primary_or_var() 7058 return self.expression(exp.Parameter, this=this) 7059 7060 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7061 if self._match_set(self.PLACEHOLDER_PARSERS): 7062 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7063 if placeholder: 7064 return placeholder 7065 self._advance(-1) 7066 return None 7067 7068 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7069 if not self._match_texts(keywords): 7070 return None 7071 if self._match(TokenType.L_PAREN, advance=False): 7072 return self._parse_wrapped_csv(self._parse_expression) 7073 7074 expression = self._parse_expression() 7075 return [expression] if expression else None 7076 7077 def _parse_csv( 7078 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7079 ) -> t.List[exp.Expression]: 7080 parse_result = parse_method() 7081 items = [parse_result] if parse_result is not None else [] 7082 7083 while self._match(sep): 7084 self._add_comments(parse_result) 7085 parse_result = parse_method() 7086 if parse_result is not None: 7087 items.append(parse_result) 7088 7089 return items 7090 7091 def _parse_tokens( 7092 self, parse_method: t.Callable, expressions: t.Dict 7093 ) -> t.Optional[exp.Expression]: 7094 this = parse_method() 7095 7096 while self._match_set(expressions): 7097 this = self.expression( 7098 expressions[self._prev.token_type], 7099 this=this, 7100 comments=self._prev_comments, 7101 expression=parse_method(), 7102 ) 7103 7104 return this 7105 7106 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7107 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7108 7109 def _parse_wrapped_csv( 7110 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7111 ) -> t.List[exp.Expression]: 7112 return self._parse_wrapped( 7113 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7114 ) 7115 7116 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7117 wrapped = self._match(TokenType.L_PAREN) 7118 if not wrapped and not optional: 7119 self.raise_error("Expecting (") 7120 parse_result = parse_method() 7121 if wrapped: 7122 self._match_r_paren() 7123 return parse_result 7124 7125 def _parse_expressions(self) -> t.List[exp.Expression]: 7126 return self._parse_csv(self._parse_expression) 7127 7128 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7129 return self._parse_select() or self._parse_set_operations( 7130 self._parse_alias(self._parse_assignment(), explicit=True) 7131 if alias 7132 else self._parse_assignment() 7133 ) 7134 7135 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7136 return self._parse_query_modifiers( 7137 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7138 ) 7139 7140 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7141 this = None 7142 if self._match_texts(self.TRANSACTION_KIND): 7143 this = self._prev.text 7144 7145 self._match_texts(("TRANSACTION", "WORK")) 7146 7147 modes = [] 7148 while True: 7149 mode = [] 7150 while self._match(TokenType.VAR): 7151 mode.append(self._prev.text) 7152 7153 if mode: 7154 modes.append(" ".join(mode)) 7155 if not self._match(TokenType.COMMA): 7156 break 7157 7158 return self.expression(exp.Transaction, this=this, modes=modes) 7159 7160 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7161 chain = None 7162 savepoint = None 7163 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7164 7165 self._match_texts(("TRANSACTION", "WORK")) 7166 7167 if self._match_text_seq("TO"): 7168 self._match_text_seq("SAVEPOINT") 7169 savepoint = self._parse_id_var() 7170 7171 if self._match(TokenType.AND): 7172 chain = not self._match_text_seq("NO") 7173 self._match_text_seq("CHAIN") 7174 7175 if is_rollback: 7176 return self.expression(exp.Rollback, savepoint=savepoint) 7177 7178 return self.expression(exp.Commit, chain=chain) 7179 7180 def _parse_refresh(self) -> exp.Refresh: 7181 self._match(TokenType.TABLE) 7182 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7183 7184 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7185 if not self._match_text_seq("ADD"): 7186 return None 7187 7188 self._match(TokenType.COLUMN) 7189 exists_column = self._parse_exists(not_=True) 7190 expression = self._parse_field_def() 7191 7192 if expression: 7193 expression.set("exists", exists_column) 7194 7195 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7196 if self._match_texts(("FIRST", "AFTER")): 7197 position = self._prev.text 7198 column_position = self.expression( 7199 exp.ColumnPosition, this=self._parse_column(), position=position 7200 ) 7201 expression.set("position", column_position) 7202 7203 return expression 7204 7205 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7206 drop = self._match(TokenType.DROP) and self._parse_drop() 7207 if drop and not isinstance(drop, exp.Command): 7208 drop.set("kind", drop.args.get("kind", "COLUMN")) 7209 return drop 7210 7211 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7212 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7213 return self.expression( 7214 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7215 ) 7216 7217 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7218 index = self._index - 1 7219 7220 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7221 return self._parse_csv( 7222 lambda: self.expression( 7223 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7224 ) 7225 ) 7226 7227 self._retreat(index) 7228 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7229 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7230 7231 if self._match_text_seq("ADD", "COLUMNS"): 7232 schema = self._parse_schema() 7233 if schema: 7234 return [schema] 7235 return [] 7236 7237 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7238 7239 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7240 if self._match_texts(self.ALTER_ALTER_PARSERS): 7241 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7242 7243 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7244 # keyword after ALTER we default to parsing this statement 7245 self._match(TokenType.COLUMN) 7246 column = self._parse_field(any_token=True) 7247 7248 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7249 return self.expression(exp.AlterColumn, this=column, drop=True) 7250 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7251 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7252 if self._match(TokenType.COMMENT): 7253 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7254 if self._match_text_seq("DROP", "NOT", "NULL"): 7255 return self.expression( 7256 exp.AlterColumn, 7257 this=column, 7258 drop=True, 7259 allow_null=True, 7260 ) 7261 if self._match_text_seq("SET", "NOT", "NULL"): 7262 return self.expression( 7263 exp.AlterColumn, 7264 this=column, 7265 allow_null=False, 7266 ) 7267 7268 if self._match_text_seq("SET", "VISIBLE"): 7269 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7270 if self._match_text_seq("SET", "INVISIBLE"): 7271 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7272 7273 self._match_text_seq("SET", "DATA") 7274 self._match_text_seq("TYPE") 7275 return self.expression( 7276 exp.AlterColumn, 7277 this=column, 7278 dtype=self._parse_types(), 7279 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7280 using=self._match(TokenType.USING) and self._parse_assignment(), 7281 ) 7282 7283 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7284 if self._match_texts(("ALL", "EVEN", "AUTO")): 7285 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7286 7287 self._match_text_seq("KEY", "DISTKEY") 7288 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7289 7290 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7291 if compound: 7292 self._match_text_seq("SORTKEY") 7293 7294 if self._match(TokenType.L_PAREN, advance=False): 7295 return self.expression( 7296 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7297 ) 7298 7299 self._match_texts(("AUTO", "NONE")) 7300 return self.expression( 7301 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7302 ) 7303 7304 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7305 index = self._index - 1 7306 7307 partition_exists = self._parse_exists() 7308 if self._match(TokenType.PARTITION, advance=False): 7309 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7310 7311 self._retreat(index) 7312 return self._parse_csv(self._parse_drop_column) 7313 7314 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7315 if self._match(TokenType.COLUMN): 7316 exists = self._parse_exists() 7317 old_column = self._parse_column() 7318 to = self._match_text_seq("TO") 7319 new_column = self._parse_column() 7320 7321 if old_column is None or to is None or new_column is None: 7322 return None 7323 7324 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7325 7326 self._match_text_seq("TO") 7327 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7328 7329 def _parse_alter_table_set(self) -> exp.AlterSet: 7330 alter_set = self.expression(exp.AlterSet) 7331 7332 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7333 "TABLE", "PROPERTIES" 7334 ): 7335 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7336 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7337 alter_set.set("expressions", [self._parse_assignment()]) 7338 elif self._match_texts(("LOGGED", "UNLOGGED")): 7339 alter_set.set("option", exp.var(self._prev.text.upper())) 7340 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7341 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7342 elif self._match_text_seq("LOCATION"): 7343 alter_set.set("location", self._parse_field()) 7344 elif self._match_text_seq("ACCESS", "METHOD"): 7345 alter_set.set("access_method", self._parse_field()) 7346 elif self._match_text_seq("TABLESPACE"): 7347 alter_set.set("tablespace", self._parse_field()) 7348 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7349 alter_set.set("file_format", [self._parse_field()]) 7350 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7351 alter_set.set("file_format", self._parse_wrapped_options()) 7352 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7353 alter_set.set("copy_options", self._parse_wrapped_options()) 7354 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7355 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7356 else: 7357 if self._match_text_seq("SERDE"): 7358 alter_set.set("serde", self._parse_field()) 7359 7360 alter_set.set("expressions", [self._parse_properties()]) 7361 7362 return alter_set 7363 7364 def _parse_alter(self) -> exp.Alter | exp.Command: 7365 start = self._prev 7366 7367 alter_token = self._match_set(self.ALTERABLES) and self._prev 7368 if not alter_token: 7369 return self._parse_as_command(start) 7370 7371 exists = self._parse_exists() 7372 only = self._match_text_seq("ONLY") 7373 this = self._parse_table(schema=True) 7374 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7375 7376 if self._next: 7377 self._advance() 7378 7379 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7380 if parser: 7381 actions = ensure_list(parser(self)) 7382 not_valid = self._match_text_seq("NOT", "VALID") 7383 options = self._parse_csv(self._parse_property) 7384 7385 if not self._curr and actions: 7386 return self.expression( 7387 exp.Alter, 7388 this=this, 7389 kind=alter_token.text.upper(), 7390 exists=exists, 7391 actions=actions, 7392 only=only, 7393 options=options, 7394 cluster=cluster, 7395 not_valid=not_valid, 7396 ) 7397 7398 return self._parse_as_command(start) 7399 7400 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7401 start = self._prev 7402 # https://duckdb.org/docs/sql/statements/analyze 7403 if not self._curr: 7404 return self.expression(exp.Analyze) 7405 7406 options = [] 7407 while self._match_texts(self.ANALYZE_STYLES): 7408 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7409 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7410 else: 7411 options.append(self._prev.text.upper()) 7412 7413 this: t.Optional[exp.Expression] = None 7414 inner_expression: t.Optional[exp.Expression] = None 7415 7416 kind = self._curr and self._curr.text.upper() 7417 7418 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7419 this = self._parse_table_parts() 7420 elif self._match_text_seq("TABLES"): 7421 if self._match_set((TokenType.FROM, TokenType.IN)): 7422 kind = f"{kind} {self._prev.text.upper()}" 7423 this = self._parse_table(schema=True, is_db_reference=True) 7424 elif self._match_text_seq("DATABASE"): 7425 this = self._parse_table(schema=True, is_db_reference=True) 7426 elif self._match_text_seq("CLUSTER"): 7427 this = self._parse_table() 7428 # Try matching inner expr keywords before fallback to parse table. 7429 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7430 kind = None 7431 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7432 else: 7433 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7434 kind = None 7435 this = self._parse_table_parts() 7436 7437 partition = self._try_parse(self._parse_partition) 7438 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7439 return self._parse_as_command(start) 7440 7441 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7442 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7443 "WITH", "ASYNC", "MODE" 7444 ): 7445 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7446 else: 7447 mode = None 7448 7449 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7450 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7451 7452 properties = self._parse_properties() 7453 return self.expression( 7454 exp.Analyze, 7455 kind=kind, 7456 this=this, 7457 mode=mode, 7458 partition=partition, 7459 properties=properties, 7460 expression=inner_expression, 7461 options=options, 7462 ) 7463 7464 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7465 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7466 this = None 7467 kind = self._prev.text.upper() 7468 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7469 expressions = [] 7470 7471 if not self._match_text_seq("STATISTICS"): 7472 self.raise_error("Expecting token STATISTICS") 7473 7474 if self._match_text_seq("NOSCAN"): 7475 this = "NOSCAN" 7476 elif self._match(TokenType.FOR): 7477 if self._match_text_seq("ALL", "COLUMNS"): 7478 this = "FOR ALL COLUMNS" 7479 if self._match_texts("COLUMNS"): 7480 this = "FOR COLUMNS" 7481 expressions = self._parse_csv(self._parse_column_reference) 7482 elif self._match_text_seq("SAMPLE"): 7483 sample = self._parse_number() 7484 expressions = [ 7485 self.expression( 7486 exp.AnalyzeSample, 7487 sample=sample, 7488 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7489 ) 7490 ] 7491 7492 return self.expression( 7493 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7494 ) 7495 7496 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7497 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7498 kind = None 7499 this = None 7500 expression: t.Optional[exp.Expression] = None 7501 if self._match_text_seq("REF", "UPDATE"): 7502 kind = "REF" 7503 this = "UPDATE" 7504 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7505 this = "UPDATE SET DANGLING TO NULL" 7506 elif self._match_text_seq("STRUCTURE"): 7507 kind = "STRUCTURE" 7508 if self._match_text_seq("CASCADE", "FAST"): 7509 this = "CASCADE FAST" 7510 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7511 ("ONLINE", "OFFLINE") 7512 ): 7513 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7514 expression = self._parse_into() 7515 7516 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7517 7518 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7519 this = self._prev.text.upper() 7520 if self._match_text_seq("COLUMNS"): 7521 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7522 return None 7523 7524 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7525 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7526 if self._match_text_seq("STATISTICS"): 7527 return self.expression(exp.AnalyzeDelete, kind=kind) 7528 return None 7529 7530 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7531 if self._match_text_seq("CHAINED", "ROWS"): 7532 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7533 return None 7534 7535 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7536 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7537 this = self._prev.text.upper() 7538 expression: t.Optional[exp.Expression] = None 7539 expressions = [] 7540 update_options = None 7541 7542 if self._match_text_seq("HISTOGRAM", "ON"): 7543 expressions = self._parse_csv(self._parse_column_reference) 7544 with_expressions = [] 7545 while self._match(TokenType.WITH): 7546 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7547 if self._match_texts(("SYNC", "ASYNC")): 7548 if self._match_text_seq("MODE", advance=False): 7549 with_expressions.append(f"{self._prev.text.upper()} MODE") 7550 self._advance() 7551 else: 7552 buckets = self._parse_number() 7553 if self._match_text_seq("BUCKETS"): 7554 with_expressions.append(f"{buckets} BUCKETS") 7555 if with_expressions: 7556 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7557 7558 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7559 TokenType.UPDATE, advance=False 7560 ): 7561 update_options = self._prev.text.upper() 7562 self._advance() 7563 elif self._match_text_seq("USING", "DATA"): 7564 expression = self.expression(exp.UsingData, this=self._parse_string()) 7565 7566 return self.expression( 7567 exp.AnalyzeHistogram, 7568 this=this, 7569 expressions=expressions, 7570 expression=expression, 7571 update_options=update_options, 7572 ) 7573 7574 def _parse_merge(self) -> exp.Merge: 7575 self._match(TokenType.INTO) 7576 target = self._parse_table() 7577 7578 if target and self._match(TokenType.ALIAS, advance=False): 7579 target.set("alias", self._parse_table_alias()) 7580 7581 self._match(TokenType.USING) 7582 using = self._parse_table() 7583 7584 self._match(TokenType.ON) 7585 on = self._parse_assignment() 7586 7587 return self.expression( 7588 exp.Merge, 7589 this=target, 7590 using=using, 7591 on=on, 7592 whens=self._parse_when_matched(), 7593 returning=self._parse_returning(), 7594 ) 7595 7596 def _parse_when_matched(self) -> exp.Whens: 7597 whens = [] 7598 7599 while self._match(TokenType.WHEN): 7600 matched = not self._match(TokenType.NOT) 7601 self._match_text_seq("MATCHED") 7602 source = ( 7603 False 7604 if self._match_text_seq("BY", "TARGET") 7605 else self._match_text_seq("BY", "SOURCE") 7606 ) 7607 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7608 7609 self._match(TokenType.THEN) 7610 7611 if self._match(TokenType.INSERT): 7612 this = self._parse_star() 7613 if this: 7614 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7615 else: 7616 then = self.expression( 7617 exp.Insert, 7618 this=exp.var("ROW") 7619 if self._match_text_seq("ROW") 7620 else self._parse_value(values=False), 7621 expression=self._match_text_seq("VALUES") and self._parse_value(), 7622 ) 7623 elif self._match(TokenType.UPDATE): 7624 expressions = self._parse_star() 7625 if expressions: 7626 then = self.expression(exp.Update, expressions=expressions) 7627 else: 7628 then = self.expression( 7629 exp.Update, 7630 expressions=self._match(TokenType.SET) 7631 and self._parse_csv(self._parse_equality), 7632 ) 7633 elif self._match(TokenType.DELETE): 7634 then = self.expression(exp.Var, this=self._prev.text) 7635 else: 7636 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7637 7638 whens.append( 7639 self.expression( 7640 exp.When, 7641 matched=matched, 7642 source=source, 7643 condition=condition, 7644 then=then, 7645 ) 7646 ) 7647 return self.expression(exp.Whens, expressions=whens) 7648 7649 def _parse_show(self) -> t.Optional[exp.Expression]: 7650 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7651 if parser: 7652 return parser(self) 7653 return self._parse_as_command(self._prev) 7654 7655 def _parse_set_item_assignment( 7656 self, kind: t.Optional[str] = None 7657 ) -> t.Optional[exp.Expression]: 7658 index = self._index 7659 7660 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7661 return self._parse_set_transaction(global_=kind == "GLOBAL") 7662 7663 left = self._parse_primary() or self._parse_column() 7664 assignment_delimiter = self._match_texts(("=", "TO")) 7665 7666 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7667 self._retreat(index) 7668 return None 7669 7670 right = self._parse_statement() or self._parse_id_var() 7671 if isinstance(right, (exp.Column, exp.Identifier)): 7672 right = exp.var(right.name) 7673 7674 this = self.expression(exp.EQ, this=left, expression=right) 7675 return self.expression(exp.SetItem, this=this, kind=kind) 7676 7677 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7678 self._match_text_seq("TRANSACTION") 7679 characteristics = self._parse_csv( 7680 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7681 ) 7682 return self.expression( 7683 exp.SetItem, 7684 expressions=characteristics, 7685 kind="TRANSACTION", 7686 **{"global": global_}, # type: ignore 7687 ) 7688 7689 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7690 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7691 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7692 7693 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7694 index = self._index 7695 set_ = self.expression( 7696 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7697 ) 7698 7699 if self._curr: 7700 self._retreat(index) 7701 return self._parse_as_command(self._prev) 7702 7703 return set_ 7704 7705 def _parse_var_from_options( 7706 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7707 ) -> t.Optional[exp.Var]: 7708 start = self._curr 7709 if not start: 7710 return None 7711 7712 option = start.text.upper() 7713 continuations = options.get(option) 7714 7715 index = self._index 7716 self._advance() 7717 for keywords in continuations or []: 7718 if isinstance(keywords, str): 7719 keywords = (keywords,) 7720 7721 if self._match_text_seq(*keywords): 7722 option = f"{option} {' '.join(keywords)}" 7723 break 7724 else: 7725 if continuations or continuations is None: 7726 if raise_unmatched: 7727 self.raise_error(f"Unknown option {option}") 7728 7729 self._retreat(index) 7730 return None 7731 7732 return exp.var(option) 7733 7734 def _parse_as_command(self, start: Token) -> exp.Command: 7735 while self._curr: 7736 self._advance() 7737 text = self._find_sql(start, self._prev) 7738 size = len(start.text) 7739 self._warn_unsupported() 7740 return exp.Command(this=text[:size], expression=text[size:]) 7741 7742 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7743 settings = [] 7744 7745 self._match_l_paren() 7746 kind = self._parse_id_var() 7747 7748 if self._match(TokenType.L_PAREN): 7749 while True: 7750 key = self._parse_id_var() 7751 value = self._parse_primary() 7752 if not key and value is None: 7753 break 7754 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7755 self._match(TokenType.R_PAREN) 7756 7757 self._match_r_paren() 7758 7759 return self.expression( 7760 exp.DictProperty, 7761 this=this, 7762 kind=kind.this if kind else None, 7763 settings=settings, 7764 ) 7765 7766 def _parse_dict_range(self, this: str) -> exp.DictRange: 7767 self._match_l_paren() 7768 has_min = self._match_text_seq("MIN") 7769 if has_min: 7770 min = self._parse_var() or self._parse_primary() 7771 self._match_text_seq("MAX") 7772 max = self._parse_var() or self._parse_primary() 7773 else: 7774 max = self._parse_var() or self._parse_primary() 7775 min = exp.Literal.number(0) 7776 self._match_r_paren() 7777 return self.expression(exp.DictRange, this=this, min=min, max=max) 7778 7779 def _parse_comprehension( 7780 self, this: t.Optional[exp.Expression] 7781 ) -> t.Optional[exp.Comprehension]: 7782 index = self._index 7783 expression = self._parse_column() 7784 if not self._match(TokenType.IN): 7785 self._retreat(index - 1) 7786 return None 7787 iterator = self._parse_column() 7788 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7789 return self.expression( 7790 exp.Comprehension, 7791 this=this, 7792 expression=expression, 7793 iterator=iterator, 7794 condition=condition, 7795 ) 7796 7797 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7798 if self._match(TokenType.HEREDOC_STRING): 7799 return self.expression(exp.Heredoc, this=self._prev.text) 7800 7801 if not self._match_text_seq("$"): 7802 return None 7803 7804 tags = ["$"] 7805 tag_text = None 7806 7807 if self._is_connected(): 7808 self._advance() 7809 tags.append(self._prev.text.upper()) 7810 else: 7811 self.raise_error("No closing $ found") 7812 7813 if tags[-1] != "$": 7814 if self._is_connected() and self._match_text_seq("$"): 7815 tag_text = tags[-1] 7816 tags.append("$") 7817 else: 7818 self.raise_error("No closing $ found") 7819 7820 heredoc_start = self._curr 7821 7822 while self._curr: 7823 if self._match_text_seq(*tags, advance=False): 7824 this = self._find_sql(heredoc_start, self._prev) 7825 self._advance(len(tags)) 7826 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7827 7828 self._advance() 7829 7830 self.raise_error(f"No closing {''.join(tags)} found") 7831 return None 7832 7833 def _find_parser( 7834 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7835 ) -> t.Optional[t.Callable]: 7836 if not self._curr: 7837 return None 7838 7839 index = self._index 7840 this = [] 7841 while True: 7842 # The current token might be multiple words 7843 curr = self._curr.text.upper() 7844 key = curr.split(" ") 7845 this.append(curr) 7846 7847 self._advance() 7848 result, trie = in_trie(trie, key) 7849 if result == TrieResult.FAILED: 7850 break 7851 7852 if result == TrieResult.EXISTS: 7853 subparser = parsers[" ".join(this)] 7854 return subparser 7855 7856 self._retreat(index) 7857 return None 7858 7859 def _match(self, token_type, advance=True, expression=None): 7860 if not self._curr: 7861 return None 7862 7863 if self._curr.token_type == token_type: 7864 if advance: 7865 self._advance() 7866 self._add_comments(expression) 7867 return True 7868 7869 return None 7870 7871 def _match_set(self, types, advance=True): 7872 if not self._curr: 7873 return None 7874 7875 if self._curr.token_type in types: 7876 if advance: 7877 self._advance() 7878 return True 7879 7880 return None 7881 7882 def _match_pair(self, token_type_a, token_type_b, advance=True): 7883 if not self._curr or not self._next: 7884 return None 7885 7886 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7887 if advance: 7888 self._advance(2) 7889 return True 7890 7891 return None 7892 7893 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7894 if not self._match(TokenType.L_PAREN, expression=expression): 7895 self.raise_error("Expecting (") 7896 7897 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7898 if not self._match(TokenType.R_PAREN, expression=expression): 7899 self.raise_error("Expecting )") 7900 7901 def _match_texts(self, texts, advance=True): 7902 if ( 7903 self._curr 7904 and self._curr.token_type != TokenType.STRING 7905 and self._curr.text.upper() in texts 7906 ): 7907 if advance: 7908 self._advance() 7909 return True 7910 return None 7911 7912 def _match_text_seq(self, *texts, advance=True): 7913 index = self._index 7914 for text in texts: 7915 if ( 7916 self._curr 7917 and self._curr.token_type != TokenType.STRING 7918 and self._curr.text.upper() == text 7919 ): 7920 self._advance() 7921 else: 7922 self._retreat(index) 7923 return None 7924 7925 if not advance: 7926 self._retreat(index) 7927 7928 return True 7929 7930 def _replace_lambda( 7931 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7932 ) -> t.Optional[exp.Expression]: 7933 if not node: 7934 return node 7935 7936 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7937 7938 for column in node.find_all(exp.Column): 7939 typ = lambda_types.get(column.parts[0].name) 7940 if typ is not None: 7941 dot_or_id = column.to_dot() if column.table else column.this 7942 7943 if typ: 7944 dot_or_id = self.expression( 7945 exp.Cast, 7946 this=dot_or_id, 7947 to=typ, 7948 ) 7949 7950 parent = column.parent 7951 7952 while isinstance(parent, exp.Dot): 7953 if not isinstance(parent.parent, exp.Dot): 7954 parent.replace(dot_or_id) 7955 break 7956 parent = parent.parent 7957 else: 7958 if column is node: 7959 node = dot_or_id 7960 else: 7961 column.replace(dot_or_id) 7962 return node 7963 7964 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7965 start = self._prev 7966 7967 # Not to be confused with TRUNCATE(number, decimals) function call 7968 if self._match(TokenType.L_PAREN): 7969 self._retreat(self._index - 2) 7970 return self._parse_function() 7971 7972 # Clickhouse supports TRUNCATE DATABASE as well 7973 is_database = self._match(TokenType.DATABASE) 7974 7975 self._match(TokenType.TABLE) 7976 7977 exists = self._parse_exists(not_=False) 7978 7979 expressions = self._parse_csv( 7980 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7981 ) 7982 7983 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7984 7985 if self._match_text_seq("RESTART", "IDENTITY"): 7986 identity = "RESTART" 7987 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7988 identity = "CONTINUE" 7989 else: 7990 identity = None 7991 7992 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7993 option = self._prev.text 7994 else: 7995 option = None 7996 7997 partition = self._parse_partition() 7998 7999 # Fallback case 8000 if self._curr: 8001 return self._parse_as_command(start) 8002 8003 return self.expression( 8004 exp.TruncateTable, 8005 expressions=expressions, 8006 is_database=is_database, 8007 exists=exists, 8008 cluster=cluster, 8009 identity=identity, 8010 option=option, 8011 partition=partition, 8012 ) 8013 8014 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8015 this = self._parse_ordered(self._parse_opclass) 8016 8017 if not self._match(TokenType.WITH): 8018 return this 8019 8020 op = self._parse_var(any_token=True) 8021 8022 return self.expression(exp.WithOperator, this=this, op=op) 8023 8024 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8025 self._match(TokenType.EQ) 8026 self._match(TokenType.L_PAREN) 8027 8028 opts: t.List[t.Optional[exp.Expression]] = [] 8029 option: exp.Expression | None 8030 while self._curr and not self._match(TokenType.R_PAREN): 8031 if self._match_text_seq("FORMAT_NAME", "="): 8032 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8033 option = self._parse_format_name() 8034 else: 8035 option = self._parse_property() 8036 8037 if option is None: 8038 self.raise_error("Unable to parse option") 8039 break 8040 8041 opts.append(option) 8042 8043 return opts 8044 8045 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8046 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8047 8048 options = [] 8049 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8050 option = self._parse_var(any_token=True) 8051 prev = self._prev.text.upper() 8052 8053 # Different dialects might separate options and values by white space, "=" and "AS" 8054 self._match(TokenType.EQ) 8055 self._match(TokenType.ALIAS) 8056 8057 param = self.expression(exp.CopyParameter, this=option) 8058 8059 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8060 TokenType.L_PAREN, advance=False 8061 ): 8062 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8063 param.set("expressions", self._parse_wrapped_options()) 8064 elif prev == "FILE_FORMAT": 8065 # T-SQL's external file format case 8066 param.set("expression", self._parse_field()) 8067 else: 8068 param.set("expression", self._parse_unquoted_field()) 8069 8070 options.append(param) 8071 self._match(sep) 8072 8073 return options 8074 8075 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8076 expr = self.expression(exp.Credentials) 8077 8078 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8079 expr.set("storage", self._parse_field()) 8080 if self._match_text_seq("CREDENTIALS"): 8081 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8082 creds = ( 8083 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8084 ) 8085 expr.set("credentials", creds) 8086 if self._match_text_seq("ENCRYPTION"): 8087 expr.set("encryption", self._parse_wrapped_options()) 8088 if self._match_text_seq("IAM_ROLE"): 8089 expr.set("iam_role", self._parse_field()) 8090 if self._match_text_seq("REGION"): 8091 expr.set("region", self._parse_field()) 8092 8093 return expr 8094 8095 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8096 return self._parse_field() 8097 8098 def _parse_copy(self) -> exp.Copy | exp.Command: 8099 start = self._prev 8100 8101 self._match(TokenType.INTO) 8102 8103 this = ( 8104 self._parse_select(nested=True, parse_subquery_alias=False) 8105 if self._match(TokenType.L_PAREN, advance=False) 8106 else self._parse_table(schema=True) 8107 ) 8108 8109 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8110 8111 files = self._parse_csv(self._parse_file_location) 8112 credentials = self._parse_credentials() 8113 8114 self._match_text_seq("WITH") 8115 8116 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8117 8118 # Fallback case 8119 if self._curr: 8120 return self._parse_as_command(start) 8121 8122 return self.expression( 8123 exp.Copy, 8124 this=this, 8125 kind=kind, 8126 credentials=credentials, 8127 files=files, 8128 params=params, 8129 ) 8130 8131 def _parse_normalize(self) -> exp.Normalize: 8132 return self.expression( 8133 exp.Normalize, 8134 this=self._parse_bitwise(), 8135 form=self._match(TokenType.COMMA) and self._parse_var(), 8136 ) 8137 8138 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8139 args = self._parse_csv(lambda: self._parse_lambda()) 8140 8141 this = seq_get(args, 0) 8142 decimals = seq_get(args, 1) 8143 8144 return expr_type( 8145 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8146 ) 8147 8148 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8149 if self._match_text_seq("COLUMNS", "(", advance=False): 8150 this = self._parse_function() 8151 if isinstance(this, exp.Columns): 8152 this.set("unpack", True) 8153 return this 8154 8155 return self.expression( 8156 exp.Star, 8157 **{ # type: ignore 8158 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8159 "replace": self._parse_star_op("REPLACE"), 8160 "rename": self._parse_star_op("RENAME"), 8161 }, 8162 ) 8163 8164 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8165 privilege_parts = [] 8166 8167 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8168 # (end of privilege list) or L_PAREN (start of column list) are met 8169 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8170 privilege_parts.append(self._curr.text.upper()) 8171 self._advance() 8172 8173 this = exp.var(" ".join(privilege_parts)) 8174 expressions = ( 8175 self._parse_wrapped_csv(self._parse_column) 8176 if self._match(TokenType.L_PAREN, advance=False) 8177 else None 8178 ) 8179 8180 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8181 8182 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8183 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8184 principal = self._parse_id_var() 8185 8186 if not principal: 8187 return None 8188 8189 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8190 8191 def _parse_grant(self) -> exp.Grant | exp.Command: 8192 start = self._prev 8193 8194 privileges = self._parse_csv(self._parse_grant_privilege) 8195 8196 self._match(TokenType.ON) 8197 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8198 8199 # Attempt to parse the securable e.g. MySQL allows names 8200 # such as "foo.*", "*.*" which are not easily parseable yet 8201 securable = self._try_parse(self._parse_table_parts) 8202 8203 if not securable or not self._match_text_seq("TO"): 8204 return self._parse_as_command(start) 8205 8206 principals = self._parse_csv(self._parse_grant_principal) 8207 8208 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8209 8210 if self._curr: 8211 return self._parse_as_command(start) 8212 8213 return self.expression( 8214 exp.Grant, 8215 privileges=privileges, 8216 kind=kind, 8217 securable=securable, 8218 principals=principals, 8219 grant_option=grant_option, 8220 ) 8221 8222 def _parse_overlay(self) -> exp.Overlay: 8223 return self.expression( 8224 exp.Overlay, 8225 **{ # type: ignore 8226 "this": self._parse_bitwise(), 8227 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8228 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8229 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8230 }, 8231 ) 8232 8233 def _parse_format_name(self) -> exp.Property: 8234 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8235 # for FILE_FORMAT = <format_name> 8236 return self.expression( 8237 exp.Property, 8238 this=exp.var("FORMAT_NAME"), 8239 value=self._parse_string() or self._parse_table_parts(), 8240 ) 8241 8242 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8243 args: t.List[exp.Expression] = [] 8244 8245 if self._match(TokenType.DISTINCT): 8246 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8247 self._match(TokenType.COMMA) 8248 8249 args.extend(self._parse_csv(self._parse_assignment)) 8250 8251 return self.expression( 8252 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8253 ) 8254 8255 def _identifier_expression( 8256 self, token: t.Optional[Token] = None, **kwargs: t.Any 8257 ) -> exp.Identifier: 8258 token = token or self._prev 8259 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8260 expression.update_positions(token) 8261 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.VOID, 402 TokenType.OBJECT, 403 TokenType.OBJECT_IDENTIFIER, 404 TokenType.INET, 405 TokenType.IPADDRESS, 406 TokenType.IPPREFIX, 407 TokenType.IPV4, 408 TokenType.IPV6, 409 TokenType.UNKNOWN, 410 TokenType.NOTHING, 411 TokenType.NULL, 412 TokenType.NAME, 413 TokenType.TDIGEST, 414 TokenType.DYNAMIC, 415 *ENUM_TYPE_TOKENS, 416 *NESTED_TYPE_TOKENS, 417 *AGGREGATE_TYPE_TOKENS, 418 } 419 420 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 421 TokenType.BIGINT: TokenType.UBIGINT, 422 TokenType.INT: TokenType.UINT, 423 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 424 TokenType.SMALLINT: TokenType.USMALLINT, 425 TokenType.TINYINT: TokenType.UTINYINT, 426 TokenType.DECIMAL: TokenType.UDECIMAL, 427 TokenType.DOUBLE: TokenType.UDOUBLE, 428 } 429 430 SUBQUERY_PREDICATES = { 431 TokenType.ANY: exp.Any, 432 TokenType.ALL: exp.All, 433 TokenType.EXISTS: exp.Exists, 434 TokenType.SOME: exp.Any, 435 } 436 437 RESERVED_TOKENS = { 438 *Tokenizer.SINGLE_TOKENS.values(), 439 TokenType.SELECT, 440 } - {TokenType.IDENTIFIER} 441 442 DB_CREATABLES = { 443 TokenType.DATABASE, 444 TokenType.DICTIONARY, 445 TokenType.FILE_FORMAT, 446 TokenType.MODEL, 447 TokenType.NAMESPACE, 448 TokenType.SCHEMA, 449 TokenType.SEQUENCE, 450 TokenType.SINK, 451 TokenType.SOURCE, 452 TokenType.STAGE, 453 TokenType.STORAGE_INTEGRATION, 454 TokenType.STREAMLIT, 455 TokenType.TABLE, 456 TokenType.TAG, 457 TokenType.VIEW, 458 TokenType.WAREHOUSE, 459 } 460 461 CREATABLES = { 462 TokenType.COLUMN, 463 TokenType.CONSTRAINT, 464 TokenType.FOREIGN_KEY, 465 TokenType.FUNCTION, 466 TokenType.INDEX, 467 TokenType.PROCEDURE, 468 *DB_CREATABLES, 469 } 470 471 ALTERABLES = { 472 TokenType.INDEX, 473 TokenType.TABLE, 474 TokenType.VIEW, 475 } 476 477 # Tokens that can represent identifiers 478 ID_VAR_TOKENS = { 479 TokenType.ALL, 480 TokenType.ATTACH, 481 TokenType.VAR, 482 TokenType.ANTI, 483 TokenType.APPLY, 484 TokenType.ASC, 485 TokenType.ASOF, 486 TokenType.AUTO_INCREMENT, 487 TokenType.BEGIN, 488 TokenType.BPCHAR, 489 TokenType.CACHE, 490 TokenType.CASE, 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.COMMENT, 494 TokenType.COMMIT, 495 TokenType.CONSTRAINT, 496 TokenType.COPY, 497 TokenType.CUBE, 498 TokenType.CURRENT_SCHEMA, 499 TokenType.DEFAULT, 500 TokenType.DELETE, 501 TokenType.DESC, 502 TokenType.DESCRIBE, 503 TokenType.DETACH, 504 TokenType.DICTIONARY, 505 TokenType.DIV, 506 TokenType.END, 507 TokenType.EXECUTE, 508 TokenType.EXPORT, 509 TokenType.ESCAPE, 510 TokenType.FALSE, 511 TokenType.FIRST, 512 TokenType.FILTER, 513 TokenType.FINAL, 514 TokenType.FORMAT, 515 TokenType.FULL, 516 TokenType.GET, 517 TokenType.IDENTIFIER, 518 TokenType.IS, 519 TokenType.ISNULL, 520 TokenType.INTERVAL, 521 TokenType.KEEP, 522 TokenType.KILL, 523 TokenType.LEFT, 524 TokenType.LIMIT, 525 TokenType.LOAD, 526 TokenType.MERGE, 527 TokenType.NATURAL, 528 TokenType.NEXT, 529 TokenType.OFFSET, 530 TokenType.OPERATOR, 531 TokenType.ORDINALITY, 532 TokenType.OVERLAPS, 533 TokenType.OVERWRITE, 534 TokenType.PARTITION, 535 TokenType.PERCENT, 536 TokenType.PIVOT, 537 TokenType.PRAGMA, 538 TokenType.PUT, 539 TokenType.RANGE, 540 TokenType.RECURSIVE, 541 TokenType.REFERENCES, 542 TokenType.REFRESH, 543 TokenType.RENAME, 544 TokenType.REPLACE, 545 TokenType.RIGHT, 546 TokenType.ROLLUP, 547 TokenType.ROW, 548 TokenType.ROWS, 549 TokenType.SEMI, 550 TokenType.SET, 551 TokenType.SETTINGS, 552 TokenType.SHOW, 553 TokenType.TEMPORARY, 554 TokenType.TOP, 555 TokenType.TRUE, 556 TokenType.TRUNCATE, 557 TokenType.UNIQUE, 558 TokenType.UNNEST, 559 TokenType.UNPIVOT, 560 TokenType.UPDATE, 561 TokenType.USE, 562 TokenType.VOLATILE, 563 TokenType.WINDOW, 564 *CREATABLES, 565 *SUBQUERY_PREDICATES, 566 *TYPE_TOKENS, 567 *NO_PAREN_FUNCTIONS, 568 } 569 ID_VAR_TOKENS.remove(TokenType.UNION) 570 571 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 572 TokenType.ANTI, 573 TokenType.APPLY, 574 TokenType.ASOF, 575 TokenType.FULL, 576 TokenType.LEFT, 577 TokenType.LOCK, 578 TokenType.NATURAL, 579 TokenType.RIGHT, 580 TokenType.SEMI, 581 TokenType.WINDOW, 582 } 583 584 ALIAS_TOKENS = ID_VAR_TOKENS 585 586 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 587 588 ARRAY_CONSTRUCTORS = { 589 "ARRAY": exp.Array, 590 "LIST": exp.List, 591 } 592 593 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 594 595 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 596 597 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 598 599 FUNC_TOKENS = { 600 TokenType.COLLATE, 601 TokenType.COMMAND, 602 TokenType.CURRENT_DATE, 603 TokenType.CURRENT_DATETIME, 604 TokenType.CURRENT_SCHEMA, 605 TokenType.CURRENT_TIMESTAMP, 606 TokenType.CURRENT_TIME, 607 TokenType.CURRENT_USER, 608 TokenType.FILTER, 609 TokenType.FIRST, 610 TokenType.FORMAT, 611 TokenType.GET, 612 TokenType.GLOB, 613 TokenType.IDENTIFIER, 614 TokenType.INDEX, 615 TokenType.ISNULL, 616 TokenType.ILIKE, 617 TokenType.INSERT, 618 TokenType.LIKE, 619 TokenType.MERGE, 620 TokenType.NEXT, 621 TokenType.OFFSET, 622 TokenType.PRIMARY_KEY, 623 TokenType.RANGE, 624 TokenType.REPLACE, 625 TokenType.RLIKE, 626 TokenType.ROW, 627 TokenType.UNNEST, 628 TokenType.VAR, 629 TokenType.LEFT, 630 TokenType.RIGHT, 631 TokenType.SEQUENCE, 632 TokenType.DATE, 633 TokenType.DATETIME, 634 TokenType.TABLE, 635 TokenType.TIMESTAMP, 636 TokenType.TIMESTAMPTZ, 637 TokenType.TRUNCATE, 638 TokenType.WINDOW, 639 TokenType.XOR, 640 *TYPE_TOKENS, 641 *SUBQUERY_PREDICATES, 642 } 643 644 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 645 TokenType.AND: exp.And, 646 } 647 648 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 649 TokenType.COLON_EQ: exp.PropertyEQ, 650 } 651 652 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 653 TokenType.OR: exp.Or, 654 } 655 656 EQUALITY = { 657 TokenType.EQ: exp.EQ, 658 TokenType.NEQ: exp.NEQ, 659 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 660 } 661 662 COMPARISON = { 663 TokenType.GT: exp.GT, 664 TokenType.GTE: exp.GTE, 665 TokenType.LT: exp.LT, 666 TokenType.LTE: exp.LTE, 667 } 668 669 BITWISE = { 670 TokenType.AMP: exp.BitwiseAnd, 671 TokenType.CARET: exp.BitwiseXor, 672 TokenType.PIPE: exp.BitwiseOr, 673 } 674 675 TERM = { 676 TokenType.DASH: exp.Sub, 677 TokenType.PLUS: exp.Add, 678 TokenType.MOD: exp.Mod, 679 TokenType.COLLATE: exp.Collate, 680 } 681 682 FACTOR = { 683 TokenType.DIV: exp.IntDiv, 684 TokenType.LR_ARROW: exp.Distance, 685 TokenType.SLASH: exp.Div, 686 TokenType.STAR: exp.Mul, 687 } 688 689 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 690 691 TIMES = { 692 TokenType.TIME, 693 TokenType.TIMETZ, 694 } 695 696 TIMESTAMPS = { 697 TokenType.TIMESTAMP, 698 TokenType.TIMESTAMPNTZ, 699 TokenType.TIMESTAMPTZ, 700 TokenType.TIMESTAMPLTZ, 701 *TIMES, 702 } 703 704 SET_OPERATIONS = { 705 TokenType.UNION, 706 TokenType.INTERSECT, 707 TokenType.EXCEPT, 708 } 709 710 JOIN_METHODS = { 711 TokenType.ASOF, 712 TokenType.NATURAL, 713 TokenType.POSITIONAL, 714 } 715 716 JOIN_SIDES = { 717 TokenType.LEFT, 718 TokenType.RIGHT, 719 TokenType.FULL, 720 } 721 722 JOIN_KINDS = { 723 TokenType.ANTI, 724 TokenType.CROSS, 725 TokenType.INNER, 726 TokenType.OUTER, 727 TokenType.SEMI, 728 TokenType.STRAIGHT_JOIN, 729 } 730 731 JOIN_HINTS: t.Set[str] = set() 732 733 LAMBDAS = { 734 TokenType.ARROW: lambda self, expressions: self.expression( 735 exp.Lambda, 736 this=self._replace_lambda( 737 self._parse_assignment(), 738 expressions, 739 ), 740 expressions=expressions, 741 ), 742 TokenType.FARROW: lambda self, expressions: self.expression( 743 exp.Kwarg, 744 this=exp.var(expressions[0].name), 745 expression=self._parse_assignment(), 746 ), 747 } 748 749 COLUMN_OPERATORS = { 750 TokenType.DOT: None, 751 TokenType.DOTCOLON: lambda self, this, to: self.expression( 752 exp.JSONCast, 753 this=this, 754 to=to, 755 ), 756 TokenType.DCOLON: lambda self, this, to: self.expression( 757 exp.Cast if self.STRICT_CAST else exp.TryCast, 758 this=this, 759 to=to, 760 ), 761 TokenType.ARROW: lambda self, this, path: self.expression( 762 exp.JSONExtract, 763 this=this, 764 expression=self.dialect.to_json_path(path), 765 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 766 ), 767 TokenType.DARROW: lambda self, this, path: self.expression( 768 exp.JSONExtractScalar, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 774 exp.JSONBExtract, 775 this=this, 776 expression=path, 777 ), 778 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtractScalar, 780 this=this, 781 expression=path, 782 ), 783 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 784 exp.JSONBContains, 785 this=this, 786 expression=key, 787 ), 788 } 789 790 EXPRESSION_PARSERS = { 791 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 792 exp.Column: lambda self: self._parse_column(), 793 exp.Condition: lambda self: self._parse_assignment(), 794 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 795 exp.Expression: lambda self: self._parse_expression(), 796 exp.From: lambda self: self._parse_from(joins=True), 797 exp.Group: lambda self: self._parse_group(), 798 exp.Having: lambda self: self._parse_having(), 799 exp.Hint: lambda self: self._parse_hint_body(), 800 exp.Identifier: lambda self: self._parse_id_var(), 801 exp.Join: lambda self: self._parse_join(), 802 exp.Lambda: lambda self: self._parse_lambda(), 803 exp.Lateral: lambda self: self._parse_lateral(), 804 exp.Limit: lambda self: self._parse_limit(), 805 exp.Offset: lambda self: self._parse_offset(), 806 exp.Order: lambda self: self._parse_order(), 807 exp.Ordered: lambda self: self._parse_ordered(), 808 exp.Properties: lambda self: self._parse_properties(), 809 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 810 exp.Qualify: lambda self: self._parse_qualify(), 811 exp.Returning: lambda self: self._parse_returning(), 812 exp.Select: lambda self: self._parse_select(), 813 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 814 exp.Table: lambda self: self._parse_table_parts(), 815 exp.TableAlias: lambda self: self._parse_table_alias(), 816 exp.Tuple: lambda self: self._parse_value(values=False), 817 exp.Whens: lambda self: self._parse_when_matched(), 818 exp.Where: lambda self: self._parse_where(), 819 exp.Window: lambda self: self._parse_named_window(), 820 exp.With: lambda self: self._parse_with(), 821 "JOIN_TYPE": lambda self: self._parse_join_parts(), 822 } 823 824 STATEMENT_PARSERS = { 825 TokenType.ALTER: lambda self: self._parse_alter(), 826 TokenType.ANALYZE: lambda self: self._parse_analyze(), 827 TokenType.BEGIN: lambda self: self._parse_transaction(), 828 TokenType.CACHE: lambda self: self._parse_cache(), 829 TokenType.COMMENT: lambda self: self._parse_comment(), 830 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 831 TokenType.COPY: lambda self: self._parse_copy(), 832 TokenType.CREATE: lambda self: self._parse_create(), 833 TokenType.DELETE: lambda self: self._parse_delete(), 834 TokenType.DESC: lambda self: self._parse_describe(), 835 TokenType.DESCRIBE: lambda self: self._parse_describe(), 836 TokenType.DROP: lambda self: self._parse_drop(), 837 TokenType.GRANT: lambda self: self._parse_grant(), 838 TokenType.INSERT: lambda self: self._parse_insert(), 839 TokenType.KILL: lambda self: self._parse_kill(), 840 TokenType.LOAD: lambda self: self._parse_load(), 841 TokenType.MERGE: lambda self: self._parse_merge(), 842 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 843 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 844 TokenType.REFRESH: lambda self: self._parse_refresh(), 845 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 846 TokenType.SET: lambda self: self._parse_set(), 847 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 848 TokenType.UNCACHE: lambda self: self._parse_uncache(), 849 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 850 TokenType.UPDATE: lambda self: self._parse_update(), 851 TokenType.USE: lambda self: self._parse_use(), 852 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 853 } 854 855 UNARY_PARSERS = { 856 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 857 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 858 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 859 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 860 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 861 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 862 } 863 864 STRING_PARSERS = { 865 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 866 exp.RawString, this=token.text 867 ), 868 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 869 exp.National, this=token.text 870 ), 871 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 872 TokenType.STRING: lambda self, token: self.expression( 873 exp.Literal, this=token.text, is_string=True 874 ), 875 TokenType.UNICODE_STRING: lambda self, token: self.expression( 876 exp.UnicodeString, 877 this=token.text, 878 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 879 ), 880 } 881 882 NUMERIC_PARSERS = { 883 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 884 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 885 TokenType.HEX_STRING: lambda self, token: self.expression( 886 exp.HexString, 887 this=token.text, 888 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 889 ), 890 TokenType.NUMBER: lambda self, token: self.expression( 891 exp.Literal, this=token.text, is_string=False 892 ), 893 } 894 895 PRIMARY_PARSERS = { 896 **STRING_PARSERS, 897 **NUMERIC_PARSERS, 898 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 899 TokenType.NULL: lambda self, _: self.expression(exp.Null), 900 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 901 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 902 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 903 TokenType.STAR: lambda self, _: self._parse_star_ops(), 904 } 905 906 PLACEHOLDER_PARSERS = { 907 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 908 TokenType.PARAMETER: lambda self: self._parse_parameter(), 909 TokenType.COLON: lambda self: ( 910 self.expression(exp.Placeholder, this=self._prev.text) 911 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 912 else None 913 ), 914 } 915 916 RANGE_PARSERS = { 917 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 918 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 919 TokenType.GLOB: binary_range_parser(exp.Glob), 920 TokenType.ILIKE: binary_range_parser(exp.ILike), 921 TokenType.IN: lambda self, this: self._parse_in(this), 922 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 923 TokenType.IS: lambda self, this: self._parse_is(this), 924 TokenType.LIKE: binary_range_parser(exp.Like), 925 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 926 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 927 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 928 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 929 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 930 } 931 932 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 933 "ALLOWED_VALUES": lambda self: self.expression( 934 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 935 ), 936 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 937 "AUTO": lambda self: self._parse_auto_property(), 938 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 939 "BACKUP": lambda self: self.expression( 940 exp.BackupProperty, this=self._parse_var(any_token=True) 941 ), 942 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 943 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 944 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 945 "CHECKSUM": lambda self: self._parse_checksum(), 946 "CLUSTER BY": lambda self: self._parse_cluster(), 947 "CLUSTERED": lambda self: self._parse_clustered_by(), 948 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 949 exp.CollateProperty, **kwargs 950 ), 951 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 952 "CONTAINS": lambda self: self._parse_contains_property(), 953 "COPY": lambda self: self._parse_copy_property(), 954 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 955 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 956 "DEFINER": lambda self: self._parse_definer(), 957 "DETERMINISTIC": lambda self: self.expression( 958 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 959 ), 960 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 961 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 962 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 963 "DISTKEY": lambda self: self._parse_distkey(), 964 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 965 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 966 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 967 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 968 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 969 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 970 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 971 "FREESPACE": lambda self: self._parse_freespace(), 972 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 973 "HEAP": lambda self: self.expression(exp.HeapProperty), 974 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 975 "IMMUTABLE": lambda self: self.expression( 976 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 977 ), 978 "INHERITS": lambda self: self.expression( 979 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 980 ), 981 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 982 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 983 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 984 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 985 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 986 "LIKE": lambda self: self._parse_create_like(), 987 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 988 "LOCK": lambda self: self._parse_locking(), 989 "LOCKING": lambda self: self._parse_locking(), 990 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 991 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 992 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 993 "MODIFIES": lambda self: self._parse_modifies_property(), 994 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 995 "NO": lambda self: self._parse_no_property(), 996 "ON": lambda self: self._parse_on_property(), 997 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 998 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 999 "PARTITION": lambda self: self._parse_partitioned_of(), 1000 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1001 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1002 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1003 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1004 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1005 "READS": lambda self: self._parse_reads_property(), 1006 "REMOTE": lambda self: self._parse_remote_with_connection(), 1007 "RETURNS": lambda self: self._parse_returns(), 1008 "STRICT": lambda self: self.expression(exp.StrictProperty), 1009 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1010 "ROW": lambda self: self._parse_row(), 1011 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1012 "SAMPLE": lambda self: self.expression( 1013 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1014 ), 1015 "SECURE": lambda self: self.expression(exp.SecureProperty), 1016 "SECURITY": lambda self: self._parse_security(), 1017 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1018 "SETTINGS": lambda self: self._parse_settings_property(), 1019 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1020 "SORTKEY": lambda self: self._parse_sortkey(), 1021 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1022 "STABLE": lambda self: self.expression( 1023 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1024 ), 1025 "STORED": lambda self: self._parse_stored(), 1026 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1027 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1028 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1029 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1030 "TO": lambda self: self._parse_to_table(), 1031 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1032 "TRANSFORM": lambda self: self.expression( 1033 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1034 ), 1035 "TTL": lambda self: self._parse_ttl(), 1036 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1037 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1038 "VOLATILE": lambda self: self._parse_volatile_property(), 1039 "WITH": lambda self: self._parse_with_property(), 1040 } 1041 1042 CONSTRAINT_PARSERS = { 1043 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1044 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1045 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1046 "CHARACTER SET": lambda self: self.expression( 1047 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1048 ), 1049 "CHECK": lambda self: self.expression( 1050 exp.CheckColumnConstraint, 1051 this=self._parse_wrapped(self._parse_assignment), 1052 enforced=self._match_text_seq("ENFORCED"), 1053 ), 1054 "COLLATE": lambda self: self.expression( 1055 exp.CollateColumnConstraint, 1056 this=self._parse_identifier() or self._parse_column(), 1057 ), 1058 "COMMENT": lambda self: self.expression( 1059 exp.CommentColumnConstraint, this=self._parse_string() 1060 ), 1061 "COMPRESS": lambda self: self._parse_compress(), 1062 "CLUSTERED": lambda self: self.expression( 1063 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1064 ), 1065 "NONCLUSTERED": lambda self: self.expression( 1066 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1067 ), 1068 "DEFAULT": lambda self: self.expression( 1069 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1070 ), 1071 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1072 "EPHEMERAL": lambda self: self.expression( 1073 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1074 ), 1075 "EXCLUDE": lambda self: self.expression( 1076 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1077 ), 1078 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1079 "FORMAT": lambda self: self.expression( 1080 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1081 ), 1082 "GENERATED": lambda self: self._parse_generated_as_identity(), 1083 "IDENTITY": lambda self: self._parse_auto_increment(), 1084 "INLINE": lambda self: self._parse_inline(), 1085 "LIKE": lambda self: self._parse_create_like(), 1086 "NOT": lambda self: self._parse_not_constraint(), 1087 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1088 "ON": lambda self: ( 1089 self._match(TokenType.UPDATE) 1090 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1091 ) 1092 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1093 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1094 "PERIOD": lambda self: self._parse_period_for_system_time(), 1095 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1096 "REFERENCES": lambda self: self._parse_references(match=False), 1097 "TITLE": lambda self: self.expression( 1098 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1099 ), 1100 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1101 "UNIQUE": lambda self: self._parse_unique(), 1102 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1103 "WATERMARK": lambda self: self.expression( 1104 exp.WatermarkColumnConstraint, 1105 this=self._match(TokenType.FOR) and self._parse_column(), 1106 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1107 ), 1108 "WITH": lambda self: self.expression( 1109 exp.Properties, expressions=self._parse_wrapped_properties() 1110 ), 1111 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1112 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1113 } 1114 1115 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1116 klass = ( 1117 exp.PartitionedByBucket 1118 if self._prev.text.upper() == "BUCKET" 1119 else exp.PartitionByTruncate 1120 ) 1121 1122 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1123 this, expression = seq_get(args, 0), seq_get(args, 1) 1124 1125 if isinstance(this, exp.Literal): 1126 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1127 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1128 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1129 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1130 # 1131 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1132 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1133 this, expression = expression, this 1134 1135 return self.expression(klass, this=this, expression=expression) 1136 1137 ALTER_PARSERS = { 1138 "ADD": lambda self: self._parse_alter_table_add(), 1139 "AS": lambda self: self._parse_select(), 1140 "ALTER": lambda self: self._parse_alter_table_alter(), 1141 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1142 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1143 "DROP": lambda self: self._parse_alter_table_drop(), 1144 "RENAME": lambda self: self._parse_alter_table_rename(), 1145 "SET": lambda self: self._parse_alter_table_set(), 1146 "SWAP": lambda self: self.expression( 1147 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1148 ), 1149 } 1150 1151 ALTER_ALTER_PARSERS = { 1152 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1153 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1154 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1155 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1156 } 1157 1158 SCHEMA_UNNAMED_CONSTRAINTS = { 1159 "CHECK", 1160 "EXCLUDE", 1161 "FOREIGN KEY", 1162 "LIKE", 1163 "PERIOD", 1164 "PRIMARY KEY", 1165 "UNIQUE", 1166 "WATERMARK", 1167 "BUCKET", 1168 "TRUNCATE", 1169 } 1170 1171 NO_PAREN_FUNCTION_PARSERS = { 1172 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1173 "CASE": lambda self: self._parse_case(), 1174 "CONNECT_BY_ROOT": lambda self: self.expression( 1175 exp.ConnectByRoot, this=self._parse_column() 1176 ), 1177 "IF": lambda self: self._parse_if(), 1178 } 1179 1180 INVALID_FUNC_NAME_TOKENS = { 1181 TokenType.IDENTIFIER, 1182 TokenType.STRING, 1183 } 1184 1185 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1186 1187 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1188 1189 FUNCTION_PARSERS = { 1190 **{ 1191 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1192 }, 1193 **{ 1194 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1195 }, 1196 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1197 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1198 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1199 "DECODE": lambda self: self._parse_decode(), 1200 "EXTRACT": lambda self: self._parse_extract(), 1201 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1202 "GAP_FILL": lambda self: self._parse_gap_fill(), 1203 "JSON_OBJECT": lambda self: self._parse_json_object(), 1204 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1205 "JSON_TABLE": lambda self: self._parse_json_table(), 1206 "MATCH": lambda self: self._parse_match_against(), 1207 "NORMALIZE": lambda self: self._parse_normalize(), 1208 "OPENJSON": lambda self: self._parse_open_json(), 1209 "OVERLAY": lambda self: self._parse_overlay(), 1210 "POSITION": lambda self: self._parse_position(), 1211 "PREDICT": lambda self: self._parse_predict(), 1212 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1213 "STRING_AGG": lambda self: self._parse_string_agg(), 1214 "SUBSTRING": lambda self: self._parse_substring(), 1215 "TRIM": lambda self: self._parse_trim(), 1216 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1217 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1218 "XMLELEMENT": lambda self: self.expression( 1219 exp.XMLElement, 1220 this=self._match_text_seq("NAME") and self._parse_id_var(), 1221 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1222 ), 1223 "XMLTABLE": lambda self: self._parse_xml_table(), 1224 } 1225 1226 QUERY_MODIFIER_PARSERS = { 1227 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1228 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1229 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1230 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1231 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1232 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1233 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1234 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1235 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1236 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1237 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1238 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1239 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1240 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1241 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1242 TokenType.CLUSTER_BY: lambda self: ( 1243 "cluster", 1244 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1245 ), 1246 TokenType.DISTRIBUTE_BY: lambda self: ( 1247 "distribute", 1248 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1249 ), 1250 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1251 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1252 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1253 } 1254 1255 SET_PARSERS = { 1256 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1257 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1258 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1259 "TRANSACTION": lambda self: self._parse_set_transaction(), 1260 } 1261 1262 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1263 1264 TYPE_LITERAL_PARSERS = { 1265 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1266 } 1267 1268 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1269 1270 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1271 1272 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1273 1274 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1275 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1276 "ISOLATION": ( 1277 ("LEVEL", "REPEATABLE", "READ"), 1278 ("LEVEL", "READ", "COMMITTED"), 1279 ("LEVEL", "READ", "UNCOMITTED"), 1280 ("LEVEL", "SERIALIZABLE"), 1281 ), 1282 "READ": ("WRITE", "ONLY"), 1283 } 1284 1285 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1286 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1287 ) 1288 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1289 1290 CREATE_SEQUENCE: OPTIONS_TYPE = { 1291 "SCALE": ("EXTEND", "NOEXTEND"), 1292 "SHARD": ("EXTEND", "NOEXTEND"), 1293 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1294 **dict.fromkeys( 1295 ( 1296 "SESSION", 1297 "GLOBAL", 1298 "KEEP", 1299 "NOKEEP", 1300 "ORDER", 1301 "NOORDER", 1302 "NOCACHE", 1303 "CYCLE", 1304 "NOCYCLE", 1305 "NOMINVALUE", 1306 "NOMAXVALUE", 1307 "NOSCALE", 1308 "NOSHARD", 1309 ), 1310 tuple(), 1311 ), 1312 } 1313 1314 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1315 1316 USABLES: OPTIONS_TYPE = dict.fromkeys( 1317 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1318 ) 1319 1320 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1321 1322 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1323 "TYPE": ("EVOLUTION",), 1324 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1325 } 1326 1327 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1328 1329 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1330 1331 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1332 "NOT": ("ENFORCED",), 1333 "MATCH": ( 1334 "FULL", 1335 "PARTIAL", 1336 "SIMPLE", 1337 ), 1338 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1339 "USING": ( 1340 "BTREE", 1341 "HASH", 1342 ), 1343 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1344 } 1345 1346 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1347 1348 CLONE_KEYWORDS = {"CLONE", "COPY"} 1349 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1350 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1351 1352 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1353 1354 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1355 1356 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1357 1358 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1359 1360 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1361 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1362 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1363 1364 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1365 1366 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1367 1368 ADD_CONSTRAINT_TOKENS = { 1369 TokenType.CONSTRAINT, 1370 TokenType.FOREIGN_KEY, 1371 TokenType.INDEX, 1372 TokenType.KEY, 1373 TokenType.PRIMARY_KEY, 1374 TokenType.UNIQUE, 1375 } 1376 1377 DISTINCT_TOKENS = {TokenType.DISTINCT} 1378 1379 NULL_TOKENS = {TokenType.NULL} 1380 1381 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1382 1383 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1384 1385 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1386 1387 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1388 1389 ODBC_DATETIME_LITERALS = { 1390 "d": exp.Date, 1391 "t": exp.Time, 1392 "ts": exp.Timestamp, 1393 } 1394 1395 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1396 1397 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1398 1399 # The style options for the DESCRIBE statement 1400 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1401 1402 # The style options for the ANALYZE statement 1403 ANALYZE_STYLES = { 1404 "BUFFER_USAGE_LIMIT", 1405 "FULL", 1406 "LOCAL", 1407 "NO_WRITE_TO_BINLOG", 1408 "SAMPLE", 1409 "SKIP_LOCKED", 1410 "VERBOSE", 1411 } 1412 1413 ANALYZE_EXPRESSION_PARSERS = { 1414 "ALL": lambda self: self._parse_analyze_columns(), 1415 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1416 "DELETE": lambda self: self._parse_analyze_delete(), 1417 "DROP": lambda self: self._parse_analyze_histogram(), 1418 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1419 "LIST": lambda self: self._parse_analyze_list(), 1420 "PREDICATE": lambda self: self._parse_analyze_columns(), 1421 "UPDATE": lambda self: self._parse_analyze_histogram(), 1422 "VALIDATE": lambda self: self._parse_analyze_validate(), 1423 } 1424 1425 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1426 1427 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1428 1429 OPERATION_MODIFIERS: t.Set[str] = set() 1430 1431 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1432 1433 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1434 1435 STRICT_CAST = True 1436 1437 PREFIXED_PIVOT_COLUMNS = False 1438 IDENTIFY_PIVOT_STRINGS = False 1439 1440 LOG_DEFAULTS_TO_LN = False 1441 1442 # Whether ADD is present for each column added by ALTER TABLE 1443 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1444 1445 # Whether the table sample clause expects CSV syntax 1446 TABLESAMPLE_CSV = False 1447 1448 # The default method used for table sampling 1449 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1450 1451 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1452 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1453 1454 # Whether the TRIM function expects the characters to trim as its first argument 1455 TRIM_PATTERN_FIRST = False 1456 1457 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1458 STRING_ALIASES = False 1459 1460 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1461 MODIFIERS_ATTACHED_TO_SET_OP = True 1462 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1463 1464 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1465 NO_PAREN_IF_COMMANDS = True 1466 1467 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1468 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1469 1470 # Whether the `:` operator is used to extract a value from a VARIANT column 1471 COLON_IS_VARIANT_EXTRACT = False 1472 1473 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1474 # If this is True and '(' is not found, the keyword will be treated as an identifier 1475 VALUES_FOLLOWED_BY_PAREN = True 1476 1477 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1478 SUPPORTS_IMPLICIT_UNNEST = False 1479 1480 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1481 INTERVAL_SPANS = True 1482 1483 # Whether a PARTITION clause can follow a table reference 1484 SUPPORTS_PARTITION_SELECTION = False 1485 1486 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1487 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1488 1489 # Whether the 'AS' keyword is optional in the CTE definition syntax 1490 OPTIONAL_ALIAS_TOKEN_CTE = True 1491 1492 __slots__ = ( 1493 "error_level", 1494 "error_message_context", 1495 "max_errors", 1496 "dialect", 1497 "sql", 1498 "errors", 1499 "_tokens", 1500 "_index", 1501 "_curr", 1502 "_next", 1503 "_prev", 1504 "_prev_comments", 1505 ) 1506 1507 # Autofilled 1508 SHOW_TRIE: t.Dict = {} 1509 SET_TRIE: t.Dict = {} 1510 1511 def __init__( 1512 self, 1513 error_level: t.Optional[ErrorLevel] = None, 1514 error_message_context: int = 100, 1515 max_errors: int = 3, 1516 dialect: DialectType = None, 1517 ): 1518 from sqlglot.dialects import Dialect 1519 1520 self.error_level = error_level or ErrorLevel.IMMEDIATE 1521 self.error_message_context = error_message_context 1522 self.max_errors = max_errors 1523 self.dialect = Dialect.get_or_raise(dialect) 1524 self.reset() 1525 1526 def reset(self): 1527 self.sql = "" 1528 self.errors = [] 1529 self._tokens = [] 1530 self._index = 0 1531 self._curr = None 1532 self._next = None 1533 self._prev = None 1534 self._prev_comments = None 1535 1536 def parse( 1537 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1538 ) -> t.List[t.Optional[exp.Expression]]: 1539 """ 1540 Parses a list of tokens and returns a list of syntax trees, one tree 1541 per parsed SQL statement. 1542 1543 Args: 1544 raw_tokens: The list of tokens. 1545 sql: The original SQL string, used to produce helpful debug messages. 1546 1547 Returns: 1548 The list of the produced syntax trees. 1549 """ 1550 return self._parse( 1551 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1552 ) 1553 1554 def parse_into( 1555 self, 1556 expression_types: exp.IntoType, 1557 raw_tokens: t.List[Token], 1558 sql: t.Optional[str] = None, 1559 ) -> t.List[t.Optional[exp.Expression]]: 1560 """ 1561 Parses a list of tokens into a given Expression type. If a collection of Expression 1562 types is given instead, this method will try to parse the token list into each one 1563 of them, stopping at the first for which the parsing succeeds. 1564 1565 Args: 1566 expression_types: The expression type(s) to try and parse the token list into. 1567 raw_tokens: The list of tokens. 1568 sql: The original SQL string, used to produce helpful debug messages. 1569 1570 Returns: 1571 The target Expression. 1572 """ 1573 errors = [] 1574 for expression_type in ensure_list(expression_types): 1575 parser = self.EXPRESSION_PARSERS.get(expression_type) 1576 if not parser: 1577 raise TypeError(f"No parser registered for {expression_type}") 1578 1579 try: 1580 return self._parse(parser, raw_tokens, sql) 1581 except ParseError as e: 1582 e.errors[0]["into_expression"] = expression_type 1583 errors.append(e) 1584 1585 raise ParseError( 1586 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1587 errors=merge_errors(errors), 1588 ) from errors[-1] 1589 1590 def _parse( 1591 self, 1592 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1593 raw_tokens: t.List[Token], 1594 sql: t.Optional[str] = None, 1595 ) -> t.List[t.Optional[exp.Expression]]: 1596 self.reset() 1597 self.sql = sql or "" 1598 1599 total = len(raw_tokens) 1600 chunks: t.List[t.List[Token]] = [[]] 1601 1602 for i, token in enumerate(raw_tokens): 1603 if token.token_type == TokenType.SEMICOLON: 1604 if token.comments: 1605 chunks.append([token]) 1606 1607 if i < total - 1: 1608 chunks.append([]) 1609 else: 1610 chunks[-1].append(token) 1611 1612 expressions = [] 1613 1614 for tokens in chunks: 1615 self._index = -1 1616 self._tokens = tokens 1617 self._advance() 1618 1619 expressions.append(parse_method(self)) 1620 1621 if self._index < len(self._tokens): 1622 self.raise_error("Invalid expression / Unexpected token") 1623 1624 self.check_errors() 1625 1626 return expressions 1627 1628 def check_errors(self) -> None: 1629 """Logs or raises any found errors, depending on the chosen error level setting.""" 1630 if self.error_level == ErrorLevel.WARN: 1631 for error in self.errors: 1632 logger.error(str(error)) 1633 elif self.error_level == ErrorLevel.RAISE and self.errors: 1634 raise ParseError( 1635 concat_messages(self.errors, self.max_errors), 1636 errors=merge_errors(self.errors), 1637 ) 1638 1639 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1640 """ 1641 Appends an error in the list of recorded errors or raises it, depending on the chosen 1642 error level setting. 1643 """ 1644 token = token or self._curr or self._prev or Token.string("") 1645 start = token.start 1646 end = token.end + 1 1647 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1648 highlight = self.sql[start:end] 1649 end_context = self.sql[end : end + self.error_message_context] 1650 1651 error = ParseError.new( 1652 f"{message}. Line {token.line}, Col: {token.col}.\n" 1653 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1654 description=message, 1655 line=token.line, 1656 col=token.col, 1657 start_context=start_context, 1658 highlight=highlight, 1659 end_context=end_context, 1660 ) 1661 1662 if self.error_level == ErrorLevel.IMMEDIATE: 1663 raise error 1664 1665 self.errors.append(error) 1666 1667 def expression( 1668 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1669 ) -> E: 1670 """ 1671 Creates a new, validated Expression. 1672 1673 Args: 1674 exp_class: The expression class to instantiate. 1675 comments: An optional list of comments to attach to the expression. 1676 kwargs: The arguments to set for the expression along with their respective values. 1677 1678 Returns: 1679 The target expression. 1680 """ 1681 instance = exp_class(**kwargs) 1682 instance.add_comments(comments) if comments else self._add_comments(instance) 1683 return self.validate_expression(instance) 1684 1685 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1686 if expression and self._prev_comments: 1687 expression.add_comments(self._prev_comments) 1688 self._prev_comments = None 1689 1690 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1691 """ 1692 Validates an Expression, making sure that all its mandatory arguments are set. 1693 1694 Args: 1695 expression: The expression to validate. 1696 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1697 1698 Returns: 1699 The validated expression. 1700 """ 1701 if self.error_level != ErrorLevel.IGNORE: 1702 for error_message in expression.error_messages(args): 1703 self.raise_error(error_message) 1704 1705 return expression 1706 1707 def _find_sql(self, start: Token, end: Token) -> str: 1708 return self.sql[start.start : end.end + 1] 1709 1710 def _is_connected(self) -> bool: 1711 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1712 1713 def _advance(self, times: int = 1) -> None: 1714 self._index += times 1715 self._curr = seq_get(self._tokens, self._index) 1716 self._next = seq_get(self._tokens, self._index + 1) 1717 1718 if self._index > 0: 1719 self._prev = self._tokens[self._index - 1] 1720 self._prev_comments = self._prev.comments 1721 else: 1722 self._prev = None 1723 self._prev_comments = None 1724 1725 def _retreat(self, index: int) -> None: 1726 if index != self._index: 1727 self._advance(index - self._index) 1728 1729 def _warn_unsupported(self) -> None: 1730 if len(self._tokens) <= 1: 1731 return 1732 1733 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1734 # interested in emitting a warning for the one being currently processed. 1735 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1736 1737 logger.warning( 1738 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1739 ) 1740 1741 def _parse_command(self) -> exp.Command: 1742 self._warn_unsupported() 1743 return self.expression( 1744 exp.Command, 1745 comments=self._prev_comments, 1746 this=self._prev.text.upper(), 1747 expression=self._parse_string(), 1748 ) 1749 1750 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1751 """ 1752 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1753 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1754 solve this by setting & resetting the parser state accordingly 1755 """ 1756 index = self._index 1757 error_level = self.error_level 1758 1759 self.error_level = ErrorLevel.IMMEDIATE 1760 try: 1761 this = parse_method() 1762 except ParseError: 1763 this = None 1764 finally: 1765 if not this or retreat: 1766 self._retreat(index) 1767 self.error_level = error_level 1768 1769 return this 1770 1771 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1772 start = self._prev 1773 exists = self._parse_exists() if allow_exists else None 1774 1775 self._match(TokenType.ON) 1776 1777 materialized = self._match_text_seq("MATERIALIZED") 1778 kind = self._match_set(self.CREATABLES) and self._prev 1779 if not kind: 1780 return self._parse_as_command(start) 1781 1782 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1783 this = self._parse_user_defined_function(kind=kind.token_type) 1784 elif kind.token_type == TokenType.TABLE: 1785 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1786 elif kind.token_type == TokenType.COLUMN: 1787 this = self._parse_column() 1788 else: 1789 this = self._parse_id_var() 1790 1791 self._match(TokenType.IS) 1792 1793 return self.expression( 1794 exp.Comment, 1795 this=this, 1796 kind=kind.text, 1797 expression=self._parse_string(), 1798 exists=exists, 1799 materialized=materialized, 1800 ) 1801 1802 def _parse_to_table( 1803 self, 1804 ) -> exp.ToTableProperty: 1805 table = self._parse_table_parts(schema=True) 1806 return self.expression(exp.ToTableProperty, this=table) 1807 1808 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1809 def _parse_ttl(self) -> exp.Expression: 1810 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1811 this = self._parse_bitwise() 1812 1813 if self._match_text_seq("DELETE"): 1814 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1815 if self._match_text_seq("RECOMPRESS"): 1816 return self.expression( 1817 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1818 ) 1819 if self._match_text_seq("TO", "DISK"): 1820 return self.expression( 1821 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1822 ) 1823 if self._match_text_seq("TO", "VOLUME"): 1824 return self.expression( 1825 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1826 ) 1827 1828 return this 1829 1830 expressions = self._parse_csv(_parse_ttl_action) 1831 where = self._parse_where() 1832 group = self._parse_group() 1833 1834 aggregates = None 1835 if group and self._match(TokenType.SET): 1836 aggregates = self._parse_csv(self._parse_set_item) 1837 1838 return self.expression( 1839 exp.MergeTreeTTL, 1840 expressions=expressions, 1841 where=where, 1842 group=group, 1843 aggregates=aggregates, 1844 ) 1845 1846 def _parse_statement(self) -> t.Optional[exp.Expression]: 1847 if self._curr is None: 1848 return None 1849 1850 if self._match_set(self.STATEMENT_PARSERS): 1851 comments = self._prev_comments 1852 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1853 stmt.add_comments(comments, prepend=True) 1854 return stmt 1855 1856 if self._match_set(self.dialect.tokenizer.COMMANDS): 1857 return self._parse_command() 1858 1859 expression = self._parse_expression() 1860 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1861 return self._parse_query_modifiers(expression) 1862 1863 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1864 start = self._prev 1865 temporary = self._match(TokenType.TEMPORARY) 1866 materialized = self._match_text_seq("MATERIALIZED") 1867 1868 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1869 if not kind: 1870 return self._parse_as_command(start) 1871 1872 concurrently = self._match_text_seq("CONCURRENTLY") 1873 if_exists = exists or self._parse_exists() 1874 1875 if kind == "COLUMN": 1876 this = self._parse_column() 1877 else: 1878 this = self._parse_table_parts( 1879 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1880 ) 1881 1882 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1883 1884 if self._match(TokenType.L_PAREN, advance=False): 1885 expressions = self._parse_wrapped_csv(self._parse_types) 1886 else: 1887 expressions = None 1888 1889 return self.expression( 1890 exp.Drop, 1891 exists=if_exists, 1892 this=this, 1893 expressions=expressions, 1894 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1895 temporary=temporary, 1896 materialized=materialized, 1897 cascade=self._match_text_seq("CASCADE"), 1898 constraints=self._match_text_seq("CONSTRAINTS"), 1899 purge=self._match_text_seq("PURGE"), 1900 cluster=cluster, 1901 concurrently=concurrently, 1902 ) 1903 1904 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1905 return ( 1906 self._match_text_seq("IF") 1907 and (not not_ or self._match(TokenType.NOT)) 1908 and self._match(TokenType.EXISTS) 1909 ) 1910 1911 def _parse_create(self) -> exp.Create | exp.Command: 1912 # Note: this can't be None because we've matched a statement parser 1913 start = self._prev 1914 1915 replace = ( 1916 start.token_type == TokenType.REPLACE 1917 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1918 or self._match_pair(TokenType.OR, TokenType.ALTER) 1919 ) 1920 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1921 1922 unique = self._match(TokenType.UNIQUE) 1923 1924 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1925 clustered = True 1926 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1927 "COLUMNSTORE" 1928 ): 1929 clustered = False 1930 else: 1931 clustered = None 1932 1933 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1934 self._advance() 1935 1936 properties = None 1937 create_token = self._match_set(self.CREATABLES) and self._prev 1938 1939 if not create_token: 1940 # exp.Properties.Location.POST_CREATE 1941 properties = self._parse_properties() 1942 create_token = self._match_set(self.CREATABLES) and self._prev 1943 1944 if not properties or not create_token: 1945 return self._parse_as_command(start) 1946 1947 concurrently = self._match_text_seq("CONCURRENTLY") 1948 exists = self._parse_exists(not_=True) 1949 this = None 1950 expression: t.Optional[exp.Expression] = None 1951 indexes = None 1952 no_schema_binding = None 1953 begin = None 1954 end = None 1955 clone = None 1956 1957 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1958 nonlocal properties 1959 if properties and temp_props: 1960 properties.expressions.extend(temp_props.expressions) 1961 elif temp_props: 1962 properties = temp_props 1963 1964 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1965 this = self._parse_user_defined_function(kind=create_token.token_type) 1966 1967 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1968 extend_props(self._parse_properties()) 1969 1970 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1971 extend_props(self._parse_properties()) 1972 1973 if not expression: 1974 if self._match(TokenType.COMMAND): 1975 expression = self._parse_as_command(self._prev) 1976 else: 1977 begin = self._match(TokenType.BEGIN) 1978 return_ = self._match_text_seq("RETURN") 1979 1980 if self._match(TokenType.STRING, advance=False): 1981 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1982 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1983 expression = self._parse_string() 1984 extend_props(self._parse_properties()) 1985 else: 1986 expression = self._parse_user_defined_function_expression() 1987 1988 end = self._match_text_seq("END") 1989 1990 if return_: 1991 expression = self.expression(exp.Return, this=expression) 1992 elif create_token.token_type == TokenType.INDEX: 1993 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1994 if not self._match(TokenType.ON): 1995 index = self._parse_id_var() 1996 anonymous = False 1997 else: 1998 index = None 1999 anonymous = True 2000 2001 this = self._parse_index(index=index, anonymous=anonymous) 2002 elif create_token.token_type in self.DB_CREATABLES: 2003 table_parts = self._parse_table_parts( 2004 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2005 ) 2006 2007 # exp.Properties.Location.POST_NAME 2008 self._match(TokenType.COMMA) 2009 extend_props(self._parse_properties(before=True)) 2010 2011 this = self._parse_schema(this=table_parts) 2012 2013 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2014 extend_props(self._parse_properties()) 2015 2016 has_alias = self._match(TokenType.ALIAS) 2017 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2018 # exp.Properties.Location.POST_ALIAS 2019 extend_props(self._parse_properties()) 2020 2021 if create_token.token_type == TokenType.SEQUENCE: 2022 expression = self._parse_types() 2023 extend_props(self._parse_properties()) 2024 else: 2025 expression = self._parse_ddl_select() 2026 2027 # Some dialects also support using a table as an alias instead of a SELECT. 2028 # Here we fallback to this as an alternative. 2029 if not expression and has_alias: 2030 expression = self._try_parse(self._parse_table_parts) 2031 2032 if create_token.token_type == TokenType.TABLE: 2033 # exp.Properties.Location.POST_EXPRESSION 2034 extend_props(self._parse_properties()) 2035 2036 indexes = [] 2037 while True: 2038 index = self._parse_index() 2039 2040 # exp.Properties.Location.POST_INDEX 2041 extend_props(self._parse_properties()) 2042 if not index: 2043 break 2044 else: 2045 self._match(TokenType.COMMA) 2046 indexes.append(index) 2047 elif create_token.token_type == TokenType.VIEW: 2048 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2049 no_schema_binding = True 2050 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2051 extend_props(self._parse_properties()) 2052 2053 shallow = self._match_text_seq("SHALLOW") 2054 2055 if self._match_texts(self.CLONE_KEYWORDS): 2056 copy = self._prev.text.lower() == "copy" 2057 clone = self.expression( 2058 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2059 ) 2060 2061 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2062 return self._parse_as_command(start) 2063 2064 create_kind_text = create_token.text.upper() 2065 return self.expression( 2066 exp.Create, 2067 this=this, 2068 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2069 replace=replace, 2070 refresh=refresh, 2071 unique=unique, 2072 expression=expression, 2073 exists=exists, 2074 properties=properties, 2075 indexes=indexes, 2076 no_schema_binding=no_schema_binding, 2077 begin=begin, 2078 end=end, 2079 clone=clone, 2080 concurrently=concurrently, 2081 clustered=clustered, 2082 ) 2083 2084 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2085 seq = exp.SequenceProperties() 2086 2087 options = [] 2088 index = self._index 2089 2090 while self._curr: 2091 self._match(TokenType.COMMA) 2092 if self._match_text_seq("INCREMENT"): 2093 self._match_text_seq("BY") 2094 self._match_text_seq("=") 2095 seq.set("increment", self._parse_term()) 2096 elif self._match_text_seq("MINVALUE"): 2097 seq.set("minvalue", self._parse_term()) 2098 elif self._match_text_seq("MAXVALUE"): 2099 seq.set("maxvalue", self._parse_term()) 2100 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2101 self._match_text_seq("=") 2102 seq.set("start", self._parse_term()) 2103 elif self._match_text_seq("CACHE"): 2104 # T-SQL allows empty CACHE which is initialized dynamically 2105 seq.set("cache", self._parse_number() or True) 2106 elif self._match_text_seq("OWNED", "BY"): 2107 # "OWNED BY NONE" is the default 2108 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2109 else: 2110 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2111 if opt: 2112 options.append(opt) 2113 else: 2114 break 2115 2116 seq.set("options", options if options else None) 2117 return None if self._index == index else seq 2118 2119 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2120 # only used for teradata currently 2121 self._match(TokenType.COMMA) 2122 2123 kwargs = { 2124 "no": self._match_text_seq("NO"), 2125 "dual": self._match_text_seq("DUAL"), 2126 "before": self._match_text_seq("BEFORE"), 2127 "default": self._match_text_seq("DEFAULT"), 2128 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2129 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2130 "after": self._match_text_seq("AFTER"), 2131 "minimum": self._match_texts(("MIN", "MINIMUM")), 2132 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2133 } 2134 2135 if self._match_texts(self.PROPERTY_PARSERS): 2136 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2137 try: 2138 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2139 except TypeError: 2140 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2141 2142 return None 2143 2144 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2145 return self._parse_wrapped_csv(self._parse_property) 2146 2147 def _parse_property(self) -> t.Optional[exp.Expression]: 2148 if self._match_texts(self.PROPERTY_PARSERS): 2149 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2150 2151 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2152 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2153 2154 if self._match_text_seq("COMPOUND", "SORTKEY"): 2155 return self._parse_sortkey(compound=True) 2156 2157 if self._match_text_seq("SQL", "SECURITY"): 2158 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2159 2160 index = self._index 2161 key = self._parse_column() 2162 2163 if not self._match(TokenType.EQ): 2164 self._retreat(index) 2165 return self._parse_sequence_properties() 2166 2167 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2168 if isinstance(key, exp.Column): 2169 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2170 2171 value = self._parse_bitwise() or self._parse_var(any_token=True) 2172 2173 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2174 if isinstance(value, exp.Column): 2175 value = exp.var(value.name) 2176 2177 return self.expression(exp.Property, this=key, value=value) 2178 2179 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2180 if self._match_text_seq("BY"): 2181 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2182 2183 self._match(TokenType.ALIAS) 2184 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2185 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2186 2187 return self.expression( 2188 exp.FileFormatProperty, 2189 this=( 2190 self.expression( 2191 exp.InputOutputFormat, 2192 input_format=input_format, 2193 output_format=output_format, 2194 ) 2195 if input_format or output_format 2196 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2197 ), 2198 ) 2199 2200 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2201 field = self._parse_field() 2202 if isinstance(field, exp.Identifier) and not field.quoted: 2203 field = exp.var(field) 2204 2205 return field 2206 2207 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2208 self._match(TokenType.EQ) 2209 self._match(TokenType.ALIAS) 2210 2211 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2212 2213 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2214 properties = [] 2215 while True: 2216 if before: 2217 prop = self._parse_property_before() 2218 else: 2219 prop = self._parse_property() 2220 if not prop: 2221 break 2222 for p in ensure_list(prop): 2223 properties.append(p) 2224 2225 if properties: 2226 return self.expression(exp.Properties, expressions=properties) 2227 2228 return None 2229 2230 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2231 return self.expression( 2232 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2233 ) 2234 2235 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2236 if self._match_texts(("DEFINER", "INVOKER")): 2237 security_specifier = self._prev.text.upper() 2238 return self.expression(exp.SecurityProperty, this=security_specifier) 2239 return None 2240 2241 def _parse_settings_property(self) -> exp.SettingsProperty: 2242 return self.expression( 2243 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2244 ) 2245 2246 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2247 if self._index >= 2: 2248 pre_volatile_token = self._tokens[self._index - 2] 2249 else: 2250 pre_volatile_token = None 2251 2252 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2253 return exp.VolatileProperty() 2254 2255 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2256 2257 def _parse_retention_period(self) -> exp.Var: 2258 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2259 number = self._parse_number() 2260 number_str = f"{number} " if number else "" 2261 unit = self._parse_var(any_token=True) 2262 return exp.var(f"{number_str}{unit}") 2263 2264 def _parse_system_versioning_property( 2265 self, with_: bool = False 2266 ) -> exp.WithSystemVersioningProperty: 2267 self._match(TokenType.EQ) 2268 prop = self.expression( 2269 exp.WithSystemVersioningProperty, 2270 **{ # type: ignore 2271 "on": True, 2272 "with": with_, 2273 }, 2274 ) 2275 2276 if self._match_text_seq("OFF"): 2277 prop.set("on", False) 2278 return prop 2279 2280 self._match(TokenType.ON) 2281 if self._match(TokenType.L_PAREN): 2282 while self._curr and not self._match(TokenType.R_PAREN): 2283 if self._match_text_seq("HISTORY_TABLE", "="): 2284 prop.set("this", self._parse_table_parts()) 2285 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2286 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2287 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2288 prop.set("retention_period", self._parse_retention_period()) 2289 2290 self._match(TokenType.COMMA) 2291 2292 return prop 2293 2294 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2295 self._match(TokenType.EQ) 2296 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2297 prop = self.expression(exp.DataDeletionProperty, on=on) 2298 2299 if self._match(TokenType.L_PAREN): 2300 while self._curr and not self._match(TokenType.R_PAREN): 2301 if self._match_text_seq("FILTER_COLUMN", "="): 2302 prop.set("filter_column", self._parse_column()) 2303 elif self._match_text_seq("RETENTION_PERIOD", "="): 2304 prop.set("retention_period", self._parse_retention_period()) 2305 2306 self._match(TokenType.COMMA) 2307 2308 return prop 2309 2310 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2311 kind = "HASH" 2312 expressions: t.Optional[t.List[exp.Expression]] = None 2313 if self._match_text_seq("BY", "HASH"): 2314 expressions = self._parse_wrapped_csv(self._parse_id_var) 2315 elif self._match_text_seq("BY", "RANDOM"): 2316 kind = "RANDOM" 2317 2318 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2319 buckets: t.Optional[exp.Expression] = None 2320 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2321 buckets = self._parse_number() 2322 2323 return self.expression( 2324 exp.DistributedByProperty, 2325 expressions=expressions, 2326 kind=kind, 2327 buckets=buckets, 2328 order=self._parse_order(), 2329 ) 2330 2331 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2332 self._match_text_seq("KEY") 2333 expressions = self._parse_wrapped_id_vars() 2334 return self.expression(expr_type, expressions=expressions) 2335 2336 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2337 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2338 prop = self._parse_system_versioning_property(with_=True) 2339 self._match_r_paren() 2340 return prop 2341 2342 if self._match(TokenType.L_PAREN, advance=False): 2343 return self._parse_wrapped_properties() 2344 2345 if self._match_text_seq("JOURNAL"): 2346 return self._parse_withjournaltable() 2347 2348 if self._match_texts(self.VIEW_ATTRIBUTES): 2349 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2350 2351 if self._match_text_seq("DATA"): 2352 return self._parse_withdata(no=False) 2353 elif self._match_text_seq("NO", "DATA"): 2354 return self._parse_withdata(no=True) 2355 2356 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2357 return self._parse_serde_properties(with_=True) 2358 2359 if self._match(TokenType.SCHEMA): 2360 return self.expression( 2361 exp.WithSchemaBindingProperty, 2362 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2363 ) 2364 2365 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2366 return self.expression( 2367 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2368 ) 2369 2370 if not self._next: 2371 return None 2372 2373 return self._parse_withisolatedloading() 2374 2375 def _parse_procedure_option(self) -> exp.Expression | None: 2376 if self._match_text_seq("EXECUTE", "AS"): 2377 return self.expression( 2378 exp.ExecuteAsProperty, 2379 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2380 or self._parse_string(), 2381 ) 2382 2383 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2384 2385 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2386 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2387 self._match(TokenType.EQ) 2388 2389 user = self._parse_id_var() 2390 self._match(TokenType.PARAMETER) 2391 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2392 2393 if not user or not host: 2394 return None 2395 2396 return exp.DefinerProperty(this=f"{user}@{host}") 2397 2398 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2399 self._match(TokenType.TABLE) 2400 self._match(TokenType.EQ) 2401 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2402 2403 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2404 return self.expression(exp.LogProperty, no=no) 2405 2406 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2407 return self.expression(exp.JournalProperty, **kwargs) 2408 2409 def _parse_checksum(self) -> exp.ChecksumProperty: 2410 self._match(TokenType.EQ) 2411 2412 on = None 2413 if self._match(TokenType.ON): 2414 on = True 2415 elif self._match_text_seq("OFF"): 2416 on = False 2417 2418 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2419 2420 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2421 return self.expression( 2422 exp.Cluster, 2423 expressions=( 2424 self._parse_wrapped_csv(self._parse_ordered) 2425 if wrapped 2426 else self._parse_csv(self._parse_ordered) 2427 ), 2428 ) 2429 2430 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2431 self._match_text_seq("BY") 2432 2433 self._match_l_paren() 2434 expressions = self._parse_csv(self._parse_column) 2435 self._match_r_paren() 2436 2437 if self._match_text_seq("SORTED", "BY"): 2438 self._match_l_paren() 2439 sorted_by = self._parse_csv(self._parse_ordered) 2440 self._match_r_paren() 2441 else: 2442 sorted_by = None 2443 2444 self._match(TokenType.INTO) 2445 buckets = self._parse_number() 2446 self._match_text_seq("BUCKETS") 2447 2448 return self.expression( 2449 exp.ClusteredByProperty, 2450 expressions=expressions, 2451 sorted_by=sorted_by, 2452 buckets=buckets, 2453 ) 2454 2455 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2456 if not self._match_text_seq("GRANTS"): 2457 self._retreat(self._index - 1) 2458 return None 2459 2460 return self.expression(exp.CopyGrantsProperty) 2461 2462 def _parse_freespace(self) -> exp.FreespaceProperty: 2463 self._match(TokenType.EQ) 2464 return self.expression( 2465 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2466 ) 2467 2468 def _parse_mergeblockratio( 2469 self, no: bool = False, default: bool = False 2470 ) -> exp.MergeBlockRatioProperty: 2471 if self._match(TokenType.EQ): 2472 return self.expression( 2473 exp.MergeBlockRatioProperty, 2474 this=self._parse_number(), 2475 percent=self._match(TokenType.PERCENT), 2476 ) 2477 2478 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2479 2480 def _parse_datablocksize( 2481 self, 2482 default: t.Optional[bool] = None, 2483 minimum: t.Optional[bool] = None, 2484 maximum: t.Optional[bool] = None, 2485 ) -> exp.DataBlocksizeProperty: 2486 self._match(TokenType.EQ) 2487 size = self._parse_number() 2488 2489 units = None 2490 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2491 units = self._prev.text 2492 2493 return self.expression( 2494 exp.DataBlocksizeProperty, 2495 size=size, 2496 units=units, 2497 default=default, 2498 minimum=minimum, 2499 maximum=maximum, 2500 ) 2501 2502 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2503 self._match(TokenType.EQ) 2504 always = self._match_text_seq("ALWAYS") 2505 manual = self._match_text_seq("MANUAL") 2506 never = self._match_text_seq("NEVER") 2507 default = self._match_text_seq("DEFAULT") 2508 2509 autotemp = None 2510 if self._match_text_seq("AUTOTEMP"): 2511 autotemp = self._parse_schema() 2512 2513 return self.expression( 2514 exp.BlockCompressionProperty, 2515 always=always, 2516 manual=manual, 2517 never=never, 2518 default=default, 2519 autotemp=autotemp, 2520 ) 2521 2522 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2523 index = self._index 2524 no = self._match_text_seq("NO") 2525 concurrent = self._match_text_seq("CONCURRENT") 2526 2527 if not self._match_text_seq("ISOLATED", "LOADING"): 2528 self._retreat(index) 2529 return None 2530 2531 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2532 return self.expression( 2533 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2534 ) 2535 2536 def _parse_locking(self) -> exp.LockingProperty: 2537 if self._match(TokenType.TABLE): 2538 kind = "TABLE" 2539 elif self._match(TokenType.VIEW): 2540 kind = "VIEW" 2541 elif self._match(TokenType.ROW): 2542 kind = "ROW" 2543 elif self._match_text_seq("DATABASE"): 2544 kind = "DATABASE" 2545 else: 2546 kind = None 2547 2548 if kind in ("DATABASE", "TABLE", "VIEW"): 2549 this = self._parse_table_parts() 2550 else: 2551 this = None 2552 2553 if self._match(TokenType.FOR): 2554 for_or_in = "FOR" 2555 elif self._match(TokenType.IN): 2556 for_or_in = "IN" 2557 else: 2558 for_or_in = None 2559 2560 if self._match_text_seq("ACCESS"): 2561 lock_type = "ACCESS" 2562 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2563 lock_type = "EXCLUSIVE" 2564 elif self._match_text_seq("SHARE"): 2565 lock_type = "SHARE" 2566 elif self._match_text_seq("READ"): 2567 lock_type = "READ" 2568 elif self._match_text_seq("WRITE"): 2569 lock_type = "WRITE" 2570 elif self._match_text_seq("CHECKSUM"): 2571 lock_type = "CHECKSUM" 2572 else: 2573 lock_type = None 2574 2575 override = self._match_text_seq("OVERRIDE") 2576 2577 return self.expression( 2578 exp.LockingProperty, 2579 this=this, 2580 kind=kind, 2581 for_or_in=for_or_in, 2582 lock_type=lock_type, 2583 override=override, 2584 ) 2585 2586 def _parse_partition_by(self) -> t.List[exp.Expression]: 2587 if self._match(TokenType.PARTITION_BY): 2588 return self._parse_csv(self._parse_assignment) 2589 return [] 2590 2591 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2592 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2593 if self._match_text_seq("MINVALUE"): 2594 return exp.var("MINVALUE") 2595 if self._match_text_seq("MAXVALUE"): 2596 return exp.var("MAXVALUE") 2597 return self._parse_bitwise() 2598 2599 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2600 expression = None 2601 from_expressions = None 2602 to_expressions = None 2603 2604 if self._match(TokenType.IN): 2605 this = self._parse_wrapped_csv(self._parse_bitwise) 2606 elif self._match(TokenType.FROM): 2607 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2608 self._match_text_seq("TO") 2609 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2610 elif self._match_text_seq("WITH", "(", "MODULUS"): 2611 this = self._parse_number() 2612 self._match_text_seq(",", "REMAINDER") 2613 expression = self._parse_number() 2614 self._match_r_paren() 2615 else: 2616 self.raise_error("Failed to parse partition bound spec.") 2617 2618 return self.expression( 2619 exp.PartitionBoundSpec, 2620 this=this, 2621 expression=expression, 2622 from_expressions=from_expressions, 2623 to_expressions=to_expressions, 2624 ) 2625 2626 # https://www.postgresql.org/docs/current/sql-createtable.html 2627 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2628 if not self._match_text_seq("OF"): 2629 self._retreat(self._index - 1) 2630 return None 2631 2632 this = self._parse_table(schema=True) 2633 2634 if self._match(TokenType.DEFAULT): 2635 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2636 elif self._match_text_seq("FOR", "VALUES"): 2637 expression = self._parse_partition_bound_spec() 2638 else: 2639 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2640 2641 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2642 2643 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2644 self._match(TokenType.EQ) 2645 return self.expression( 2646 exp.PartitionedByProperty, 2647 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2648 ) 2649 2650 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2651 if self._match_text_seq("AND", "STATISTICS"): 2652 statistics = True 2653 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2654 statistics = False 2655 else: 2656 statistics = None 2657 2658 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2659 2660 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2661 if self._match_text_seq("SQL"): 2662 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2663 return None 2664 2665 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2666 if self._match_text_seq("SQL", "DATA"): 2667 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2668 return None 2669 2670 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2671 if self._match_text_seq("PRIMARY", "INDEX"): 2672 return exp.NoPrimaryIndexProperty() 2673 if self._match_text_seq("SQL"): 2674 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2675 return None 2676 2677 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2678 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2679 return exp.OnCommitProperty() 2680 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2681 return exp.OnCommitProperty(delete=True) 2682 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2683 2684 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2685 if self._match_text_seq("SQL", "DATA"): 2686 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2687 return None 2688 2689 def _parse_distkey(self) -> exp.DistKeyProperty: 2690 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2691 2692 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2693 table = self._parse_table(schema=True) 2694 2695 options = [] 2696 while self._match_texts(("INCLUDING", "EXCLUDING")): 2697 this = self._prev.text.upper() 2698 2699 id_var = self._parse_id_var() 2700 if not id_var: 2701 return None 2702 2703 options.append( 2704 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2705 ) 2706 2707 return self.expression(exp.LikeProperty, this=table, expressions=options) 2708 2709 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2710 return self.expression( 2711 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2712 ) 2713 2714 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2715 self._match(TokenType.EQ) 2716 return self.expression( 2717 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2718 ) 2719 2720 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2721 self._match_text_seq("WITH", "CONNECTION") 2722 return self.expression( 2723 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2724 ) 2725 2726 def _parse_returns(self) -> exp.ReturnsProperty: 2727 value: t.Optional[exp.Expression] 2728 null = None 2729 is_table = self._match(TokenType.TABLE) 2730 2731 if is_table: 2732 if self._match(TokenType.LT): 2733 value = self.expression( 2734 exp.Schema, 2735 this="TABLE", 2736 expressions=self._parse_csv(self._parse_struct_types), 2737 ) 2738 if not self._match(TokenType.GT): 2739 self.raise_error("Expecting >") 2740 else: 2741 value = self._parse_schema(exp.var("TABLE")) 2742 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2743 null = True 2744 value = None 2745 else: 2746 value = self._parse_types() 2747 2748 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2749 2750 def _parse_describe(self) -> exp.Describe: 2751 kind = self._match_set(self.CREATABLES) and self._prev.text 2752 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2753 if self._match(TokenType.DOT): 2754 style = None 2755 self._retreat(self._index - 2) 2756 2757 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2758 2759 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2760 this = self._parse_statement() 2761 else: 2762 this = self._parse_table(schema=True) 2763 2764 properties = self._parse_properties() 2765 expressions = properties.expressions if properties else None 2766 partition = self._parse_partition() 2767 return self.expression( 2768 exp.Describe, 2769 this=this, 2770 style=style, 2771 kind=kind, 2772 expressions=expressions, 2773 partition=partition, 2774 format=format, 2775 ) 2776 2777 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2778 kind = self._prev.text.upper() 2779 expressions = [] 2780 2781 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2782 if self._match(TokenType.WHEN): 2783 expression = self._parse_disjunction() 2784 self._match(TokenType.THEN) 2785 else: 2786 expression = None 2787 2788 else_ = self._match(TokenType.ELSE) 2789 2790 if not self._match(TokenType.INTO): 2791 return None 2792 2793 return self.expression( 2794 exp.ConditionalInsert, 2795 this=self.expression( 2796 exp.Insert, 2797 this=self._parse_table(schema=True), 2798 expression=self._parse_derived_table_values(), 2799 ), 2800 expression=expression, 2801 else_=else_, 2802 ) 2803 2804 expression = parse_conditional_insert() 2805 while expression is not None: 2806 expressions.append(expression) 2807 expression = parse_conditional_insert() 2808 2809 return self.expression( 2810 exp.MultitableInserts, 2811 kind=kind, 2812 comments=comments, 2813 expressions=expressions, 2814 source=self._parse_table(), 2815 ) 2816 2817 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2818 comments = [] 2819 hint = self._parse_hint() 2820 overwrite = self._match(TokenType.OVERWRITE) 2821 ignore = self._match(TokenType.IGNORE) 2822 local = self._match_text_seq("LOCAL") 2823 alternative = None 2824 is_function = None 2825 2826 if self._match_text_seq("DIRECTORY"): 2827 this: t.Optional[exp.Expression] = self.expression( 2828 exp.Directory, 2829 this=self._parse_var_or_string(), 2830 local=local, 2831 row_format=self._parse_row_format(match_row=True), 2832 ) 2833 else: 2834 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2835 comments += ensure_list(self._prev_comments) 2836 return self._parse_multitable_inserts(comments) 2837 2838 if self._match(TokenType.OR): 2839 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2840 2841 self._match(TokenType.INTO) 2842 comments += ensure_list(self._prev_comments) 2843 self._match(TokenType.TABLE) 2844 is_function = self._match(TokenType.FUNCTION) 2845 2846 this = ( 2847 self._parse_table(schema=True, parse_partition=True) 2848 if not is_function 2849 else self._parse_function() 2850 ) 2851 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2852 this.set("alias", self._parse_table_alias()) 2853 2854 returning = self._parse_returning() 2855 2856 return self.expression( 2857 exp.Insert, 2858 comments=comments, 2859 hint=hint, 2860 is_function=is_function, 2861 this=this, 2862 stored=self._match_text_seq("STORED") and self._parse_stored(), 2863 by_name=self._match_text_seq("BY", "NAME"), 2864 exists=self._parse_exists(), 2865 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2866 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2867 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2868 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2869 conflict=self._parse_on_conflict(), 2870 returning=returning or self._parse_returning(), 2871 overwrite=overwrite, 2872 alternative=alternative, 2873 ignore=ignore, 2874 source=self._match(TokenType.TABLE) and self._parse_table(), 2875 ) 2876 2877 def _parse_kill(self) -> exp.Kill: 2878 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2879 2880 return self.expression( 2881 exp.Kill, 2882 this=self._parse_primary(), 2883 kind=kind, 2884 ) 2885 2886 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2887 conflict = self._match_text_seq("ON", "CONFLICT") 2888 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2889 2890 if not conflict and not duplicate: 2891 return None 2892 2893 conflict_keys = None 2894 constraint = None 2895 2896 if conflict: 2897 if self._match_text_seq("ON", "CONSTRAINT"): 2898 constraint = self._parse_id_var() 2899 elif self._match(TokenType.L_PAREN): 2900 conflict_keys = self._parse_csv(self._parse_id_var) 2901 self._match_r_paren() 2902 2903 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2904 if self._prev.token_type == TokenType.UPDATE: 2905 self._match(TokenType.SET) 2906 expressions = self._parse_csv(self._parse_equality) 2907 else: 2908 expressions = None 2909 2910 return self.expression( 2911 exp.OnConflict, 2912 duplicate=duplicate, 2913 expressions=expressions, 2914 action=action, 2915 conflict_keys=conflict_keys, 2916 constraint=constraint, 2917 where=self._parse_where(), 2918 ) 2919 2920 def _parse_returning(self) -> t.Optional[exp.Returning]: 2921 if not self._match(TokenType.RETURNING): 2922 return None 2923 return self.expression( 2924 exp.Returning, 2925 expressions=self._parse_csv(self._parse_expression), 2926 into=self._match(TokenType.INTO) and self._parse_table_part(), 2927 ) 2928 2929 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2930 if not self._match(TokenType.FORMAT): 2931 return None 2932 return self._parse_row_format() 2933 2934 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2935 index = self._index 2936 with_ = with_ or self._match_text_seq("WITH") 2937 2938 if not self._match(TokenType.SERDE_PROPERTIES): 2939 self._retreat(index) 2940 return None 2941 return self.expression( 2942 exp.SerdeProperties, 2943 **{ # type: ignore 2944 "expressions": self._parse_wrapped_properties(), 2945 "with": with_, 2946 }, 2947 ) 2948 2949 def _parse_row_format( 2950 self, match_row: bool = False 2951 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2952 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2953 return None 2954 2955 if self._match_text_seq("SERDE"): 2956 this = self._parse_string() 2957 2958 serde_properties = self._parse_serde_properties() 2959 2960 return self.expression( 2961 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2962 ) 2963 2964 self._match_text_seq("DELIMITED") 2965 2966 kwargs = {} 2967 2968 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2969 kwargs["fields"] = self._parse_string() 2970 if self._match_text_seq("ESCAPED", "BY"): 2971 kwargs["escaped"] = self._parse_string() 2972 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2973 kwargs["collection_items"] = self._parse_string() 2974 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2975 kwargs["map_keys"] = self._parse_string() 2976 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2977 kwargs["lines"] = self._parse_string() 2978 if self._match_text_seq("NULL", "DEFINED", "AS"): 2979 kwargs["null"] = self._parse_string() 2980 2981 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2982 2983 def _parse_load(self) -> exp.LoadData | exp.Command: 2984 if self._match_text_seq("DATA"): 2985 local = self._match_text_seq("LOCAL") 2986 self._match_text_seq("INPATH") 2987 inpath = self._parse_string() 2988 overwrite = self._match(TokenType.OVERWRITE) 2989 self._match_pair(TokenType.INTO, TokenType.TABLE) 2990 2991 return self.expression( 2992 exp.LoadData, 2993 this=self._parse_table(schema=True), 2994 local=local, 2995 overwrite=overwrite, 2996 inpath=inpath, 2997 partition=self._parse_partition(), 2998 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2999 serde=self._match_text_seq("SERDE") and self._parse_string(), 3000 ) 3001 return self._parse_as_command(self._prev) 3002 3003 def _parse_delete(self) -> exp.Delete: 3004 # This handles MySQL's "Multiple-Table Syntax" 3005 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3006 tables = None 3007 if not self._match(TokenType.FROM, advance=False): 3008 tables = self._parse_csv(self._parse_table) or None 3009 3010 returning = self._parse_returning() 3011 3012 return self.expression( 3013 exp.Delete, 3014 tables=tables, 3015 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3016 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3017 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3018 where=self._parse_where(), 3019 returning=returning or self._parse_returning(), 3020 limit=self._parse_limit(), 3021 ) 3022 3023 def _parse_update(self) -> exp.Update: 3024 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3025 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3026 returning = self._parse_returning() 3027 return self.expression( 3028 exp.Update, 3029 **{ # type: ignore 3030 "this": this, 3031 "expressions": expressions, 3032 "from": self._parse_from(joins=True), 3033 "where": self._parse_where(), 3034 "returning": returning or self._parse_returning(), 3035 "order": self._parse_order(), 3036 "limit": self._parse_limit(), 3037 }, 3038 ) 3039 3040 def _parse_use(self) -> exp.Use: 3041 return self.expression( 3042 exp.Use, 3043 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3044 this=self._parse_table(schema=False), 3045 ) 3046 3047 def _parse_uncache(self) -> exp.Uncache: 3048 if not self._match(TokenType.TABLE): 3049 self.raise_error("Expecting TABLE after UNCACHE") 3050 3051 return self.expression( 3052 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3053 ) 3054 3055 def _parse_cache(self) -> exp.Cache: 3056 lazy = self._match_text_seq("LAZY") 3057 self._match(TokenType.TABLE) 3058 table = self._parse_table(schema=True) 3059 3060 options = [] 3061 if self._match_text_seq("OPTIONS"): 3062 self._match_l_paren() 3063 k = self._parse_string() 3064 self._match(TokenType.EQ) 3065 v = self._parse_string() 3066 options = [k, v] 3067 self._match_r_paren() 3068 3069 self._match(TokenType.ALIAS) 3070 return self.expression( 3071 exp.Cache, 3072 this=table, 3073 lazy=lazy, 3074 options=options, 3075 expression=self._parse_select(nested=True), 3076 ) 3077 3078 def _parse_partition(self) -> t.Optional[exp.Partition]: 3079 if not self._match_texts(self.PARTITION_KEYWORDS): 3080 return None 3081 3082 return self.expression( 3083 exp.Partition, 3084 subpartition=self._prev.text.upper() == "SUBPARTITION", 3085 expressions=self._parse_wrapped_csv(self._parse_assignment), 3086 ) 3087 3088 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3089 def _parse_value_expression() -> t.Optional[exp.Expression]: 3090 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3091 return exp.var(self._prev.text.upper()) 3092 return self._parse_expression() 3093 3094 if self._match(TokenType.L_PAREN): 3095 expressions = self._parse_csv(_parse_value_expression) 3096 self._match_r_paren() 3097 return self.expression(exp.Tuple, expressions=expressions) 3098 3099 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3100 expression = self._parse_expression() 3101 if expression: 3102 return self.expression(exp.Tuple, expressions=[expression]) 3103 return None 3104 3105 def _parse_projections(self) -> t.List[exp.Expression]: 3106 return self._parse_expressions() 3107 3108 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3109 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3110 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3111 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3112 ) 3113 elif self._match(TokenType.FROM): 3114 from_ = self._parse_from(skip_from_token=True) 3115 # Support parentheses for duckdb FROM-first syntax 3116 select = self._parse_select() 3117 if select: 3118 select.set("from", from_) 3119 this = select 3120 else: 3121 this = exp.select("*").from_(t.cast(exp.From, from_)) 3122 else: 3123 this = ( 3124 self._parse_table() 3125 if table 3126 else self._parse_select(nested=True, parse_set_operation=False) 3127 ) 3128 3129 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3130 # in case a modifier (e.g. join) is following 3131 if table and isinstance(this, exp.Values) and this.alias: 3132 alias = this.args["alias"].pop() 3133 this = exp.Table(this=this, alias=alias) 3134 3135 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3136 3137 return this 3138 3139 def _parse_select( 3140 self, 3141 nested: bool = False, 3142 table: bool = False, 3143 parse_subquery_alias: bool = True, 3144 parse_set_operation: bool = True, 3145 ) -> t.Optional[exp.Expression]: 3146 cte = self._parse_with() 3147 3148 if cte: 3149 this = self._parse_statement() 3150 3151 if not this: 3152 self.raise_error("Failed to parse any statement following CTE") 3153 return cte 3154 3155 if "with" in this.arg_types: 3156 this.set("with", cte) 3157 else: 3158 self.raise_error(f"{this.key} does not support CTE") 3159 this = cte 3160 3161 return this 3162 3163 # duckdb supports leading with FROM x 3164 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3165 3166 if self._match(TokenType.SELECT): 3167 comments = self._prev_comments 3168 3169 hint = self._parse_hint() 3170 3171 if self._next and not self._next.token_type == TokenType.DOT: 3172 all_ = self._match(TokenType.ALL) 3173 distinct = self._match_set(self.DISTINCT_TOKENS) 3174 else: 3175 all_, distinct = None, None 3176 3177 kind = ( 3178 self._match(TokenType.ALIAS) 3179 and self._match_texts(("STRUCT", "VALUE")) 3180 and self._prev.text.upper() 3181 ) 3182 3183 if distinct: 3184 distinct = self.expression( 3185 exp.Distinct, 3186 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3187 ) 3188 3189 if all_ and distinct: 3190 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3191 3192 operation_modifiers = [] 3193 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3194 operation_modifiers.append(exp.var(self._prev.text.upper())) 3195 3196 limit = self._parse_limit(top=True) 3197 projections = self._parse_projections() 3198 3199 this = self.expression( 3200 exp.Select, 3201 kind=kind, 3202 hint=hint, 3203 distinct=distinct, 3204 expressions=projections, 3205 limit=limit, 3206 operation_modifiers=operation_modifiers or None, 3207 ) 3208 this.comments = comments 3209 3210 into = self._parse_into() 3211 if into: 3212 this.set("into", into) 3213 3214 if not from_: 3215 from_ = self._parse_from() 3216 3217 if from_: 3218 this.set("from", from_) 3219 3220 this = self._parse_query_modifiers(this) 3221 elif (table or nested) and self._match(TokenType.L_PAREN): 3222 this = self._parse_wrapped_select(table=table) 3223 3224 # We return early here so that the UNION isn't attached to the subquery by the 3225 # following call to _parse_set_operations, but instead becomes the parent node 3226 self._match_r_paren() 3227 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3228 elif self._match(TokenType.VALUES, advance=False): 3229 this = self._parse_derived_table_values() 3230 elif from_: 3231 this = exp.select("*").from_(from_.this, copy=False) 3232 elif self._match(TokenType.SUMMARIZE): 3233 table = self._match(TokenType.TABLE) 3234 this = self._parse_select() or self._parse_string() or self._parse_table() 3235 return self.expression(exp.Summarize, this=this, table=table) 3236 elif self._match(TokenType.DESCRIBE): 3237 this = self._parse_describe() 3238 elif self._match_text_seq("STREAM"): 3239 this = self._parse_function() 3240 if this: 3241 this = self.expression(exp.Stream, this=this) 3242 else: 3243 self._retreat(self._index - 1) 3244 else: 3245 this = None 3246 3247 return self._parse_set_operations(this) if parse_set_operation else this 3248 3249 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3250 self._match_text_seq("SEARCH") 3251 3252 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3253 3254 if not kind: 3255 return None 3256 3257 self._match_text_seq("FIRST", "BY") 3258 3259 return self.expression( 3260 exp.RecursiveWithSearch, 3261 kind=kind, 3262 this=self._parse_id_var(), 3263 expression=self._match_text_seq("SET") and self._parse_id_var(), 3264 using=self._match_text_seq("USING") and self._parse_id_var(), 3265 ) 3266 3267 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3268 if not skip_with_token and not self._match(TokenType.WITH): 3269 return None 3270 3271 comments = self._prev_comments 3272 recursive = self._match(TokenType.RECURSIVE) 3273 3274 last_comments = None 3275 expressions = [] 3276 while True: 3277 cte = self._parse_cte() 3278 if isinstance(cte, exp.CTE): 3279 expressions.append(cte) 3280 if last_comments: 3281 cte.add_comments(last_comments) 3282 3283 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3284 break 3285 else: 3286 self._match(TokenType.WITH) 3287 3288 last_comments = self._prev_comments 3289 3290 return self.expression( 3291 exp.With, 3292 comments=comments, 3293 expressions=expressions, 3294 recursive=recursive, 3295 search=self._parse_recursive_with_search(), 3296 ) 3297 3298 def _parse_cte(self) -> t.Optional[exp.CTE]: 3299 index = self._index 3300 3301 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3302 if not alias or not alias.this: 3303 self.raise_error("Expected CTE to have alias") 3304 3305 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3306 self._retreat(index) 3307 return None 3308 3309 comments = self._prev_comments 3310 3311 if self._match_text_seq("NOT", "MATERIALIZED"): 3312 materialized = False 3313 elif self._match_text_seq("MATERIALIZED"): 3314 materialized = True 3315 else: 3316 materialized = None 3317 3318 cte = self.expression( 3319 exp.CTE, 3320 this=self._parse_wrapped(self._parse_statement), 3321 alias=alias, 3322 materialized=materialized, 3323 comments=comments, 3324 ) 3325 3326 if isinstance(cte.this, exp.Values): 3327 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3328 3329 return cte 3330 3331 def _parse_table_alias( 3332 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3333 ) -> t.Optional[exp.TableAlias]: 3334 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3335 # so this section tries to parse the clause version and if it fails, it treats the token 3336 # as an identifier (alias) 3337 if self._can_parse_limit_or_offset(): 3338 return None 3339 3340 any_token = self._match(TokenType.ALIAS) 3341 alias = ( 3342 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3343 or self._parse_string_as_identifier() 3344 ) 3345 3346 index = self._index 3347 if self._match(TokenType.L_PAREN): 3348 columns = self._parse_csv(self._parse_function_parameter) 3349 self._match_r_paren() if columns else self._retreat(index) 3350 else: 3351 columns = None 3352 3353 if not alias and not columns: 3354 return None 3355 3356 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3357 3358 # We bubble up comments from the Identifier to the TableAlias 3359 if isinstance(alias, exp.Identifier): 3360 table_alias.add_comments(alias.pop_comments()) 3361 3362 return table_alias 3363 3364 def _parse_subquery( 3365 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3366 ) -> t.Optional[exp.Subquery]: 3367 if not this: 3368 return None 3369 3370 return self.expression( 3371 exp.Subquery, 3372 this=this, 3373 pivots=self._parse_pivots(), 3374 alias=self._parse_table_alias() if parse_alias else None, 3375 sample=self._parse_table_sample(), 3376 ) 3377 3378 def _implicit_unnests_to_explicit(self, this: E) -> E: 3379 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3380 3381 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3382 for i, join in enumerate(this.args.get("joins") or []): 3383 table = join.this 3384 normalized_table = table.copy() 3385 normalized_table.meta["maybe_column"] = True 3386 normalized_table = _norm(normalized_table, dialect=self.dialect) 3387 3388 if isinstance(table, exp.Table) and not join.args.get("on"): 3389 if normalized_table.parts[0].name in refs: 3390 table_as_column = table.to_column() 3391 unnest = exp.Unnest(expressions=[table_as_column]) 3392 3393 # Table.to_column creates a parent Alias node that we want to convert to 3394 # a TableAlias and attach to the Unnest, so it matches the parser's output 3395 if isinstance(table.args.get("alias"), exp.TableAlias): 3396 table_as_column.replace(table_as_column.this) 3397 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3398 3399 table.replace(unnest) 3400 3401 refs.add(normalized_table.alias_or_name) 3402 3403 return this 3404 3405 def _parse_query_modifiers( 3406 self, this: t.Optional[exp.Expression] 3407 ) -> t.Optional[exp.Expression]: 3408 if isinstance(this, self.MODIFIABLES): 3409 for join in self._parse_joins(): 3410 this.append("joins", join) 3411 for lateral in iter(self._parse_lateral, None): 3412 this.append("laterals", lateral) 3413 3414 while True: 3415 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3416 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3417 key, expression = parser(self) 3418 3419 if expression: 3420 this.set(key, expression) 3421 if key == "limit": 3422 offset = expression.args.pop("offset", None) 3423 3424 if offset: 3425 offset = exp.Offset(expression=offset) 3426 this.set("offset", offset) 3427 3428 limit_by_expressions = expression.expressions 3429 expression.set("expressions", None) 3430 offset.set("expressions", limit_by_expressions) 3431 continue 3432 break 3433 3434 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3435 this = self._implicit_unnests_to_explicit(this) 3436 3437 return this 3438 3439 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3440 start = self._curr 3441 while self._curr: 3442 self._advance() 3443 3444 end = self._tokens[self._index - 1] 3445 return exp.Hint(expressions=[self._find_sql(start, end)]) 3446 3447 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3448 return self._parse_function_call() 3449 3450 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3451 start_index = self._index 3452 should_fallback_to_string = False 3453 3454 hints = [] 3455 try: 3456 for hint in iter( 3457 lambda: self._parse_csv( 3458 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3459 ), 3460 [], 3461 ): 3462 hints.extend(hint) 3463 except ParseError: 3464 should_fallback_to_string = True 3465 3466 if should_fallback_to_string or self._curr: 3467 self._retreat(start_index) 3468 return self._parse_hint_fallback_to_string() 3469 3470 return self.expression(exp.Hint, expressions=hints) 3471 3472 def _parse_hint(self) -> t.Optional[exp.Hint]: 3473 if self._match(TokenType.HINT) and self._prev_comments: 3474 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3475 3476 return None 3477 3478 def _parse_into(self) -> t.Optional[exp.Into]: 3479 if not self._match(TokenType.INTO): 3480 return None 3481 3482 temp = self._match(TokenType.TEMPORARY) 3483 unlogged = self._match_text_seq("UNLOGGED") 3484 self._match(TokenType.TABLE) 3485 3486 return self.expression( 3487 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3488 ) 3489 3490 def _parse_from( 3491 self, joins: bool = False, skip_from_token: bool = False 3492 ) -> t.Optional[exp.From]: 3493 if not skip_from_token and not self._match(TokenType.FROM): 3494 return None 3495 3496 return self.expression( 3497 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3498 ) 3499 3500 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3501 return self.expression( 3502 exp.MatchRecognizeMeasure, 3503 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3504 this=self._parse_expression(), 3505 ) 3506 3507 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3508 if not self._match(TokenType.MATCH_RECOGNIZE): 3509 return None 3510 3511 self._match_l_paren() 3512 3513 partition = self._parse_partition_by() 3514 order = self._parse_order() 3515 3516 measures = ( 3517 self._parse_csv(self._parse_match_recognize_measure) 3518 if self._match_text_seq("MEASURES") 3519 else None 3520 ) 3521 3522 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3523 rows = exp.var("ONE ROW PER MATCH") 3524 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3525 text = "ALL ROWS PER MATCH" 3526 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3527 text += " SHOW EMPTY MATCHES" 3528 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3529 text += " OMIT EMPTY MATCHES" 3530 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3531 text += " WITH UNMATCHED ROWS" 3532 rows = exp.var(text) 3533 else: 3534 rows = None 3535 3536 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3537 text = "AFTER MATCH SKIP" 3538 if self._match_text_seq("PAST", "LAST", "ROW"): 3539 text += " PAST LAST ROW" 3540 elif self._match_text_seq("TO", "NEXT", "ROW"): 3541 text += " TO NEXT ROW" 3542 elif self._match_text_seq("TO", "FIRST"): 3543 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3544 elif self._match_text_seq("TO", "LAST"): 3545 text += f" TO LAST {self._advance_any().text}" # type: ignore 3546 after = exp.var(text) 3547 else: 3548 after = None 3549 3550 if self._match_text_seq("PATTERN"): 3551 self._match_l_paren() 3552 3553 if not self._curr: 3554 self.raise_error("Expecting )", self._curr) 3555 3556 paren = 1 3557 start = self._curr 3558 3559 while self._curr and paren > 0: 3560 if self._curr.token_type == TokenType.L_PAREN: 3561 paren += 1 3562 if self._curr.token_type == TokenType.R_PAREN: 3563 paren -= 1 3564 3565 end = self._prev 3566 self._advance() 3567 3568 if paren > 0: 3569 self.raise_error("Expecting )", self._curr) 3570 3571 pattern = exp.var(self._find_sql(start, end)) 3572 else: 3573 pattern = None 3574 3575 define = ( 3576 self._parse_csv(self._parse_name_as_expression) 3577 if self._match_text_seq("DEFINE") 3578 else None 3579 ) 3580 3581 self._match_r_paren() 3582 3583 return self.expression( 3584 exp.MatchRecognize, 3585 partition_by=partition, 3586 order=order, 3587 measures=measures, 3588 rows=rows, 3589 after=after, 3590 pattern=pattern, 3591 define=define, 3592 alias=self._parse_table_alias(), 3593 ) 3594 3595 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3596 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3597 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3598 cross_apply = False 3599 3600 if cross_apply is not None: 3601 this = self._parse_select(table=True) 3602 view = None 3603 outer = None 3604 elif self._match(TokenType.LATERAL): 3605 this = self._parse_select(table=True) 3606 view = self._match(TokenType.VIEW) 3607 outer = self._match(TokenType.OUTER) 3608 else: 3609 return None 3610 3611 if not this: 3612 this = ( 3613 self._parse_unnest() 3614 or self._parse_function() 3615 or self._parse_id_var(any_token=False) 3616 ) 3617 3618 while self._match(TokenType.DOT): 3619 this = exp.Dot( 3620 this=this, 3621 expression=self._parse_function() or self._parse_id_var(any_token=False), 3622 ) 3623 3624 ordinality: t.Optional[bool] = None 3625 3626 if view: 3627 table = self._parse_id_var(any_token=False) 3628 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3629 table_alias: t.Optional[exp.TableAlias] = self.expression( 3630 exp.TableAlias, this=table, columns=columns 3631 ) 3632 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3633 # We move the alias from the lateral's child node to the lateral itself 3634 table_alias = this.args["alias"].pop() 3635 else: 3636 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3637 table_alias = self._parse_table_alias() 3638 3639 return self.expression( 3640 exp.Lateral, 3641 this=this, 3642 view=view, 3643 outer=outer, 3644 alias=table_alias, 3645 cross_apply=cross_apply, 3646 ordinality=ordinality, 3647 ) 3648 3649 def _parse_join_parts( 3650 self, 3651 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3652 return ( 3653 self._match_set(self.JOIN_METHODS) and self._prev, 3654 self._match_set(self.JOIN_SIDES) and self._prev, 3655 self._match_set(self.JOIN_KINDS) and self._prev, 3656 ) 3657 3658 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3659 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3660 this = self._parse_column() 3661 if isinstance(this, exp.Column): 3662 return this.this 3663 return this 3664 3665 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3666 3667 def _parse_join( 3668 self, skip_join_token: bool = False, parse_bracket: bool = False 3669 ) -> t.Optional[exp.Join]: 3670 if self._match(TokenType.COMMA): 3671 table = self._try_parse(self._parse_table) 3672 if table: 3673 return self.expression(exp.Join, this=table) 3674 return None 3675 3676 index = self._index 3677 method, side, kind = self._parse_join_parts() 3678 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3679 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3680 3681 if not skip_join_token and not join: 3682 self._retreat(index) 3683 kind = None 3684 method = None 3685 side = None 3686 3687 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3688 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3689 3690 if not skip_join_token and not join and not outer_apply and not cross_apply: 3691 return None 3692 3693 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3694 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3695 kwargs["expressions"] = self._parse_csv( 3696 lambda: self._parse_table(parse_bracket=parse_bracket) 3697 ) 3698 3699 if method: 3700 kwargs["method"] = method.text 3701 if side: 3702 kwargs["side"] = side.text 3703 if kind: 3704 kwargs["kind"] = kind.text 3705 if hint: 3706 kwargs["hint"] = hint 3707 3708 if self._match(TokenType.MATCH_CONDITION): 3709 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3710 3711 if self._match(TokenType.ON): 3712 kwargs["on"] = self._parse_assignment() 3713 elif self._match(TokenType.USING): 3714 kwargs["using"] = self._parse_using_identifiers() 3715 elif ( 3716 not (outer_apply or cross_apply) 3717 and not isinstance(kwargs["this"], exp.Unnest) 3718 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3719 ): 3720 index = self._index 3721 joins: t.Optional[list] = list(self._parse_joins()) 3722 3723 if joins and self._match(TokenType.ON): 3724 kwargs["on"] = self._parse_assignment() 3725 elif joins and self._match(TokenType.USING): 3726 kwargs["using"] = self._parse_using_identifiers() 3727 else: 3728 joins = None 3729 self._retreat(index) 3730 3731 kwargs["this"].set("joins", joins if joins else None) 3732 3733 comments = [c for token in (method, side, kind) if token for c in token.comments] 3734 return self.expression(exp.Join, comments=comments, **kwargs) 3735 3736 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3737 this = self._parse_assignment() 3738 3739 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3740 return this 3741 3742 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3743 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3744 3745 return this 3746 3747 def _parse_index_params(self) -> exp.IndexParameters: 3748 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3749 3750 if self._match(TokenType.L_PAREN, advance=False): 3751 columns = self._parse_wrapped_csv(self._parse_with_operator) 3752 else: 3753 columns = None 3754 3755 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3756 partition_by = self._parse_partition_by() 3757 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3758 tablespace = ( 3759 self._parse_var(any_token=True) 3760 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3761 else None 3762 ) 3763 where = self._parse_where() 3764 3765 on = self._parse_field() if self._match(TokenType.ON) else None 3766 3767 return self.expression( 3768 exp.IndexParameters, 3769 using=using, 3770 columns=columns, 3771 include=include, 3772 partition_by=partition_by, 3773 where=where, 3774 with_storage=with_storage, 3775 tablespace=tablespace, 3776 on=on, 3777 ) 3778 3779 def _parse_index( 3780 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3781 ) -> t.Optional[exp.Index]: 3782 if index or anonymous: 3783 unique = None 3784 primary = None 3785 amp = None 3786 3787 self._match(TokenType.ON) 3788 self._match(TokenType.TABLE) # hive 3789 table = self._parse_table_parts(schema=True) 3790 else: 3791 unique = self._match(TokenType.UNIQUE) 3792 primary = self._match_text_seq("PRIMARY") 3793 amp = self._match_text_seq("AMP") 3794 3795 if not self._match(TokenType.INDEX): 3796 return None 3797 3798 index = self._parse_id_var() 3799 table = None 3800 3801 params = self._parse_index_params() 3802 3803 return self.expression( 3804 exp.Index, 3805 this=index, 3806 table=table, 3807 unique=unique, 3808 primary=primary, 3809 amp=amp, 3810 params=params, 3811 ) 3812 3813 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3814 hints: t.List[exp.Expression] = [] 3815 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3816 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3817 hints.append( 3818 self.expression( 3819 exp.WithTableHint, 3820 expressions=self._parse_csv( 3821 lambda: self._parse_function() or self._parse_var(any_token=True) 3822 ), 3823 ) 3824 ) 3825 self._match_r_paren() 3826 else: 3827 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3828 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3829 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3830 3831 self._match_set((TokenType.INDEX, TokenType.KEY)) 3832 if self._match(TokenType.FOR): 3833 hint.set("target", self._advance_any() and self._prev.text.upper()) 3834 3835 hint.set("expressions", self._parse_wrapped_id_vars()) 3836 hints.append(hint) 3837 3838 return hints or None 3839 3840 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3841 return ( 3842 (not schema and self._parse_function(optional_parens=False)) 3843 or self._parse_id_var(any_token=False) 3844 or self._parse_string_as_identifier() 3845 or self._parse_placeholder() 3846 ) 3847 3848 def _parse_table_parts( 3849 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3850 ) -> exp.Table: 3851 catalog = None 3852 db = None 3853 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3854 3855 while self._match(TokenType.DOT): 3856 if catalog: 3857 # This allows nesting the table in arbitrarily many dot expressions if needed 3858 table = self.expression( 3859 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3860 ) 3861 else: 3862 catalog = db 3863 db = table 3864 # "" used for tsql FROM a..b case 3865 table = self._parse_table_part(schema=schema) or "" 3866 3867 if ( 3868 wildcard 3869 and self._is_connected() 3870 and (isinstance(table, exp.Identifier) or not table) 3871 and self._match(TokenType.STAR) 3872 ): 3873 if isinstance(table, exp.Identifier): 3874 table.args["this"] += "*" 3875 else: 3876 table = exp.Identifier(this="*") 3877 3878 # We bubble up comments from the Identifier to the Table 3879 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3880 3881 if is_db_reference: 3882 catalog = db 3883 db = table 3884 table = None 3885 3886 if not table and not is_db_reference: 3887 self.raise_error(f"Expected table name but got {self._curr}") 3888 if not db and is_db_reference: 3889 self.raise_error(f"Expected database name but got {self._curr}") 3890 3891 table = self.expression( 3892 exp.Table, 3893 comments=comments, 3894 this=table, 3895 db=db, 3896 catalog=catalog, 3897 ) 3898 3899 changes = self._parse_changes() 3900 if changes: 3901 table.set("changes", changes) 3902 3903 at_before = self._parse_historical_data() 3904 if at_before: 3905 table.set("when", at_before) 3906 3907 pivots = self._parse_pivots() 3908 if pivots: 3909 table.set("pivots", pivots) 3910 3911 return table 3912 3913 def _parse_table( 3914 self, 3915 schema: bool = False, 3916 joins: bool = False, 3917 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3918 parse_bracket: bool = False, 3919 is_db_reference: bool = False, 3920 parse_partition: bool = False, 3921 ) -> t.Optional[exp.Expression]: 3922 lateral = self._parse_lateral() 3923 if lateral: 3924 return lateral 3925 3926 unnest = self._parse_unnest() 3927 if unnest: 3928 return unnest 3929 3930 values = self._parse_derived_table_values() 3931 if values: 3932 return values 3933 3934 subquery = self._parse_select(table=True) 3935 if subquery: 3936 if not subquery.args.get("pivots"): 3937 subquery.set("pivots", self._parse_pivots()) 3938 return subquery 3939 3940 bracket = parse_bracket and self._parse_bracket(None) 3941 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3942 3943 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3944 self._parse_table 3945 ) 3946 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3947 3948 only = self._match(TokenType.ONLY) 3949 3950 this = t.cast( 3951 exp.Expression, 3952 bracket 3953 or rows_from 3954 or self._parse_bracket( 3955 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3956 ), 3957 ) 3958 3959 if only: 3960 this.set("only", only) 3961 3962 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3963 self._match_text_seq("*") 3964 3965 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3966 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3967 this.set("partition", self._parse_partition()) 3968 3969 if schema: 3970 return self._parse_schema(this=this) 3971 3972 version = self._parse_version() 3973 3974 if version: 3975 this.set("version", version) 3976 3977 if self.dialect.ALIAS_POST_TABLESAMPLE: 3978 this.set("sample", self._parse_table_sample()) 3979 3980 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3981 if alias: 3982 this.set("alias", alias) 3983 3984 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3985 return self.expression( 3986 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3987 ) 3988 3989 this.set("hints", self._parse_table_hints()) 3990 3991 if not this.args.get("pivots"): 3992 this.set("pivots", self._parse_pivots()) 3993 3994 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3995 this.set("sample", self._parse_table_sample()) 3996 3997 if joins: 3998 for join in self._parse_joins(): 3999 this.append("joins", join) 4000 4001 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4002 this.set("ordinality", True) 4003 this.set("alias", self._parse_table_alias()) 4004 4005 return this 4006 4007 def _parse_version(self) -> t.Optional[exp.Version]: 4008 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4009 this = "TIMESTAMP" 4010 elif self._match(TokenType.VERSION_SNAPSHOT): 4011 this = "VERSION" 4012 else: 4013 return None 4014 4015 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4016 kind = self._prev.text.upper() 4017 start = self._parse_bitwise() 4018 self._match_texts(("TO", "AND")) 4019 end = self._parse_bitwise() 4020 expression: t.Optional[exp.Expression] = self.expression( 4021 exp.Tuple, expressions=[start, end] 4022 ) 4023 elif self._match_text_seq("CONTAINED", "IN"): 4024 kind = "CONTAINED IN" 4025 expression = self.expression( 4026 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4027 ) 4028 elif self._match(TokenType.ALL): 4029 kind = "ALL" 4030 expression = None 4031 else: 4032 self._match_text_seq("AS", "OF") 4033 kind = "AS OF" 4034 expression = self._parse_type() 4035 4036 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4037 4038 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4039 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4040 index = self._index 4041 historical_data = None 4042 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4043 this = self._prev.text.upper() 4044 kind = ( 4045 self._match(TokenType.L_PAREN) 4046 and self._match_texts(self.HISTORICAL_DATA_KIND) 4047 and self._prev.text.upper() 4048 ) 4049 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4050 4051 if expression: 4052 self._match_r_paren() 4053 historical_data = self.expression( 4054 exp.HistoricalData, this=this, kind=kind, expression=expression 4055 ) 4056 else: 4057 self._retreat(index) 4058 4059 return historical_data 4060 4061 def _parse_changes(self) -> t.Optional[exp.Changes]: 4062 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4063 return None 4064 4065 information = self._parse_var(any_token=True) 4066 self._match_r_paren() 4067 4068 return self.expression( 4069 exp.Changes, 4070 information=information, 4071 at_before=self._parse_historical_data(), 4072 end=self._parse_historical_data(), 4073 ) 4074 4075 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4076 if not self._match(TokenType.UNNEST): 4077 return None 4078 4079 expressions = self._parse_wrapped_csv(self._parse_equality) 4080 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4081 4082 alias = self._parse_table_alias() if with_alias else None 4083 4084 if alias: 4085 if self.dialect.UNNEST_COLUMN_ONLY: 4086 if alias.args.get("columns"): 4087 self.raise_error("Unexpected extra column alias in unnest.") 4088 4089 alias.set("columns", [alias.this]) 4090 alias.set("this", None) 4091 4092 columns = alias.args.get("columns") or [] 4093 if offset and len(expressions) < len(columns): 4094 offset = columns.pop() 4095 4096 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4097 self._match(TokenType.ALIAS) 4098 offset = self._parse_id_var( 4099 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4100 ) or exp.to_identifier("offset") 4101 4102 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4103 4104 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4105 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4106 if not is_derived and not ( 4107 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4108 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4109 ): 4110 return None 4111 4112 expressions = self._parse_csv(self._parse_value) 4113 alias = self._parse_table_alias() 4114 4115 if is_derived: 4116 self._match_r_paren() 4117 4118 return self.expression( 4119 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4120 ) 4121 4122 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4123 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4124 as_modifier and self._match_text_seq("USING", "SAMPLE") 4125 ): 4126 return None 4127 4128 bucket_numerator = None 4129 bucket_denominator = None 4130 bucket_field = None 4131 percent = None 4132 size = None 4133 seed = None 4134 4135 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4136 matched_l_paren = self._match(TokenType.L_PAREN) 4137 4138 if self.TABLESAMPLE_CSV: 4139 num = None 4140 expressions = self._parse_csv(self._parse_primary) 4141 else: 4142 expressions = None 4143 num = ( 4144 self._parse_factor() 4145 if self._match(TokenType.NUMBER, advance=False) 4146 else self._parse_primary() or self._parse_placeholder() 4147 ) 4148 4149 if self._match_text_seq("BUCKET"): 4150 bucket_numerator = self._parse_number() 4151 self._match_text_seq("OUT", "OF") 4152 bucket_denominator = bucket_denominator = self._parse_number() 4153 self._match(TokenType.ON) 4154 bucket_field = self._parse_field() 4155 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4156 percent = num 4157 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4158 size = num 4159 else: 4160 percent = num 4161 4162 if matched_l_paren: 4163 self._match_r_paren() 4164 4165 if self._match(TokenType.L_PAREN): 4166 method = self._parse_var(upper=True) 4167 seed = self._match(TokenType.COMMA) and self._parse_number() 4168 self._match_r_paren() 4169 elif self._match_texts(("SEED", "REPEATABLE")): 4170 seed = self._parse_wrapped(self._parse_number) 4171 4172 if not method and self.DEFAULT_SAMPLING_METHOD: 4173 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4174 4175 return self.expression( 4176 exp.TableSample, 4177 expressions=expressions, 4178 method=method, 4179 bucket_numerator=bucket_numerator, 4180 bucket_denominator=bucket_denominator, 4181 bucket_field=bucket_field, 4182 percent=percent, 4183 size=size, 4184 seed=seed, 4185 ) 4186 4187 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4188 return list(iter(self._parse_pivot, None)) or None 4189 4190 def _parse_joins(self) -> t.Iterator[exp.Join]: 4191 return iter(self._parse_join, None) 4192 4193 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4194 if not self._match(TokenType.INTO): 4195 return None 4196 4197 return self.expression( 4198 exp.UnpivotColumns, 4199 this=self._match_text_seq("NAME") and self._parse_column(), 4200 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4201 ) 4202 4203 # https://duckdb.org/docs/sql/statements/pivot 4204 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4205 def _parse_on() -> t.Optional[exp.Expression]: 4206 this = self._parse_bitwise() 4207 4208 if self._match(TokenType.IN): 4209 # PIVOT ... ON col IN (row_val1, row_val2) 4210 return self._parse_in(this) 4211 if self._match(TokenType.ALIAS, advance=False): 4212 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4213 return self._parse_alias(this) 4214 4215 return this 4216 4217 this = self._parse_table() 4218 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4219 into = self._parse_unpivot_columns() 4220 using = self._match(TokenType.USING) and self._parse_csv( 4221 lambda: self._parse_alias(self._parse_function()) 4222 ) 4223 group = self._parse_group() 4224 4225 return self.expression( 4226 exp.Pivot, 4227 this=this, 4228 expressions=expressions, 4229 using=using, 4230 group=group, 4231 unpivot=is_unpivot, 4232 into=into, 4233 ) 4234 4235 def _parse_pivot_in(self) -> exp.In: 4236 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4237 this = self._parse_select_or_expression() 4238 4239 self._match(TokenType.ALIAS) 4240 alias = self._parse_bitwise() 4241 if alias: 4242 if isinstance(alias, exp.Column) and not alias.db: 4243 alias = alias.this 4244 return self.expression(exp.PivotAlias, this=this, alias=alias) 4245 4246 return this 4247 4248 value = self._parse_column() 4249 4250 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4251 self.raise_error("Expecting IN (") 4252 4253 if self._match(TokenType.ANY): 4254 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4255 else: 4256 exprs = self._parse_csv(_parse_aliased_expression) 4257 4258 self._match_r_paren() 4259 return self.expression(exp.In, this=value, expressions=exprs) 4260 4261 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4262 index = self._index 4263 include_nulls = None 4264 4265 if self._match(TokenType.PIVOT): 4266 unpivot = False 4267 elif self._match(TokenType.UNPIVOT): 4268 unpivot = True 4269 4270 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4271 if self._match_text_seq("INCLUDE", "NULLS"): 4272 include_nulls = True 4273 elif self._match_text_seq("EXCLUDE", "NULLS"): 4274 include_nulls = False 4275 else: 4276 return None 4277 4278 expressions = [] 4279 4280 if not self._match(TokenType.L_PAREN): 4281 self._retreat(index) 4282 return None 4283 4284 if unpivot: 4285 expressions = self._parse_csv(self._parse_column) 4286 else: 4287 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4288 4289 if not expressions: 4290 self.raise_error("Failed to parse PIVOT's aggregation list") 4291 4292 if not self._match(TokenType.FOR): 4293 self.raise_error("Expecting FOR") 4294 4295 fields = [] 4296 while True: 4297 field = self._try_parse(self._parse_pivot_in) 4298 if not field: 4299 break 4300 fields.append(field) 4301 4302 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4303 self._parse_bitwise 4304 ) 4305 4306 group = self._parse_group() 4307 4308 self._match_r_paren() 4309 4310 pivot = self.expression( 4311 exp.Pivot, 4312 expressions=expressions, 4313 fields=fields, 4314 unpivot=unpivot, 4315 include_nulls=include_nulls, 4316 default_on_null=default_on_null, 4317 group=group, 4318 ) 4319 4320 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4321 pivot.set("alias", self._parse_table_alias()) 4322 4323 if not unpivot: 4324 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4325 4326 columns: t.List[exp.Expression] = [] 4327 all_fields = [] 4328 for pivot_field in pivot.fields: 4329 pivot_field_expressions = pivot_field.expressions 4330 4331 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4332 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4333 continue 4334 4335 all_fields.append( 4336 [ 4337 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4338 for fld in pivot_field_expressions 4339 ] 4340 ) 4341 4342 if all_fields: 4343 if names: 4344 all_fields.append(names) 4345 4346 # Generate all possible combinations of the pivot columns 4347 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4348 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4349 for fld_parts_tuple in itertools.product(*all_fields): 4350 fld_parts = list(fld_parts_tuple) 4351 4352 if names and self.PREFIXED_PIVOT_COLUMNS: 4353 # Move the "name" to the front of the list 4354 fld_parts.insert(0, fld_parts.pop(-1)) 4355 4356 columns.append(exp.to_identifier("_".join(fld_parts))) 4357 4358 pivot.set("columns", columns) 4359 4360 return pivot 4361 4362 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4363 return [agg.alias for agg in aggregations if agg.alias] 4364 4365 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4366 if not skip_where_token and not self._match(TokenType.PREWHERE): 4367 return None 4368 4369 return self.expression( 4370 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4371 ) 4372 4373 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4374 if not skip_where_token and not self._match(TokenType.WHERE): 4375 return None 4376 4377 return self.expression( 4378 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4379 ) 4380 4381 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4382 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4383 return None 4384 4385 elements: t.Dict[str, t.Any] = defaultdict(list) 4386 4387 if self._match(TokenType.ALL): 4388 elements["all"] = True 4389 elif self._match(TokenType.DISTINCT): 4390 elements["all"] = False 4391 4392 while True: 4393 index = self._index 4394 4395 elements["expressions"].extend( 4396 self._parse_csv( 4397 lambda: None 4398 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4399 else self._parse_assignment() 4400 ) 4401 ) 4402 4403 before_with_index = self._index 4404 with_prefix = self._match(TokenType.WITH) 4405 4406 if self._match(TokenType.ROLLUP): 4407 elements["rollup"].append( 4408 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4409 ) 4410 elif self._match(TokenType.CUBE): 4411 elements["cube"].append( 4412 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4413 ) 4414 elif self._match(TokenType.GROUPING_SETS): 4415 elements["grouping_sets"].append( 4416 self.expression( 4417 exp.GroupingSets, 4418 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4419 ) 4420 ) 4421 elif self._match_text_seq("TOTALS"): 4422 elements["totals"] = True # type: ignore 4423 4424 if before_with_index <= self._index <= before_with_index + 1: 4425 self._retreat(before_with_index) 4426 break 4427 4428 if index == self._index: 4429 break 4430 4431 return self.expression(exp.Group, **elements) # type: ignore 4432 4433 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4434 return self.expression( 4435 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4436 ) 4437 4438 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4439 if self._match(TokenType.L_PAREN): 4440 grouping_set = self._parse_csv(self._parse_column) 4441 self._match_r_paren() 4442 return self.expression(exp.Tuple, expressions=grouping_set) 4443 4444 return self._parse_column() 4445 4446 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4447 if not skip_having_token and not self._match(TokenType.HAVING): 4448 return None 4449 return self.expression(exp.Having, this=self._parse_assignment()) 4450 4451 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4452 if not self._match(TokenType.QUALIFY): 4453 return None 4454 return self.expression(exp.Qualify, this=self._parse_assignment()) 4455 4456 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4457 if skip_start_token: 4458 start = None 4459 elif self._match(TokenType.START_WITH): 4460 start = self._parse_assignment() 4461 else: 4462 return None 4463 4464 self._match(TokenType.CONNECT_BY) 4465 nocycle = self._match_text_seq("NOCYCLE") 4466 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4467 exp.Prior, this=self._parse_bitwise() 4468 ) 4469 connect = self._parse_assignment() 4470 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4471 4472 if not start and self._match(TokenType.START_WITH): 4473 start = self._parse_assignment() 4474 4475 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4476 4477 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4478 this = self._parse_id_var(any_token=True) 4479 if self._match(TokenType.ALIAS): 4480 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4481 return this 4482 4483 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4484 if self._match_text_seq("INTERPOLATE"): 4485 return self._parse_wrapped_csv(self._parse_name_as_expression) 4486 return None 4487 4488 def _parse_order( 4489 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4490 ) -> t.Optional[exp.Expression]: 4491 siblings = None 4492 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4493 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4494 return this 4495 4496 siblings = True 4497 4498 return self.expression( 4499 exp.Order, 4500 this=this, 4501 expressions=self._parse_csv(self._parse_ordered), 4502 siblings=siblings, 4503 ) 4504 4505 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4506 if not self._match(token): 4507 return None 4508 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4509 4510 def _parse_ordered( 4511 self, parse_method: t.Optional[t.Callable] = None 4512 ) -> t.Optional[exp.Ordered]: 4513 this = parse_method() if parse_method else self._parse_assignment() 4514 if not this: 4515 return None 4516 4517 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4518 this = exp.var("ALL") 4519 4520 asc = self._match(TokenType.ASC) 4521 desc = self._match(TokenType.DESC) or (asc and False) 4522 4523 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4524 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4525 4526 nulls_first = is_nulls_first or False 4527 explicitly_null_ordered = is_nulls_first or is_nulls_last 4528 4529 if ( 4530 not explicitly_null_ordered 4531 and ( 4532 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4533 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4534 ) 4535 and self.dialect.NULL_ORDERING != "nulls_are_last" 4536 ): 4537 nulls_first = True 4538 4539 if self._match_text_seq("WITH", "FILL"): 4540 with_fill = self.expression( 4541 exp.WithFill, 4542 **{ # type: ignore 4543 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4544 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4545 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4546 "interpolate": self._parse_interpolate(), 4547 }, 4548 ) 4549 else: 4550 with_fill = None 4551 4552 return self.expression( 4553 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4554 ) 4555 4556 def _parse_limit_options(self) -> exp.LimitOptions: 4557 percent = self._match(TokenType.PERCENT) 4558 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4559 self._match_text_seq("ONLY") 4560 with_ties = self._match_text_seq("WITH", "TIES") 4561 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4562 4563 def _parse_limit( 4564 self, 4565 this: t.Optional[exp.Expression] = None, 4566 top: bool = False, 4567 skip_limit_token: bool = False, 4568 ) -> t.Optional[exp.Expression]: 4569 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4570 comments = self._prev_comments 4571 if top: 4572 limit_paren = self._match(TokenType.L_PAREN) 4573 expression = self._parse_term() if limit_paren else self._parse_number() 4574 4575 if limit_paren: 4576 self._match_r_paren() 4577 4578 limit_options = self._parse_limit_options() 4579 else: 4580 limit_options = None 4581 expression = self._parse_term() 4582 4583 if self._match(TokenType.COMMA): 4584 offset = expression 4585 expression = self._parse_term() 4586 else: 4587 offset = None 4588 4589 limit_exp = self.expression( 4590 exp.Limit, 4591 this=this, 4592 expression=expression, 4593 offset=offset, 4594 comments=comments, 4595 limit_options=limit_options, 4596 expressions=self._parse_limit_by(), 4597 ) 4598 4599 return limit_exp 4600 4601 if self._match(TokenType.FETCH): 4602 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4603 direction = self._prev.text.upper() if direction else "FIRST" 4604 4605 count = self._parse_field(tokens=self.FETCH_TOKENS) 4606 4607 return self.expression( 4608 exp.Fetch, 4609 direction=direction, 4610 count=count, 4611 limit_options=self._parse_limit_options(), 4612 ) 4613 4614 return this 4615 4616 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4617 if not self._match(TokenType.OFFSET): 4618 return this 4619 4620 count = self._parse_term() 4621 self._match_set((TokenType.ROW, TokenType.ROWS)) 4622 4623 return self.expression( 4624 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4625 ) 4626 4627 def _can_parse_limit_or_offset(self) -> bool: 4628 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4629 return False 4630 4631 index = self._index 4632 result = bool( 4633 self._try_parse(self._parse_limit, retreat=True) 4634 or self._try_parse(self._parse_offset, retreat=True) 4635 ) 4636 self._retreat(index) 4637 return result 4638 4639 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4640 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4641 4642 def _parse_locks(self) -> t.List[exp.Lock]: 4643 locks = [] 4644 while True: 4645 if self._match_text_seq("FOR", "UPDATE"): 4646 update = True 4647 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4648 "LOCK", "IN", "SHARE", "MODE" 4649 ): 4650 update = False 4651 else: 4652 break 4653 4654 expressions = None 4655 if self._match_text_seq("OF"): 4656 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4657 4658 wait: t.Optional[bool | exp.Expression] = None 4659 if self._match_text_seq("NOWAIT"): 4660 wait = True 4661 elif self._match_text_seq("WAIT"): 4662 wait = self._parse_primary() 4663 elif self._match_text_seq("SKIP", "LOCKED"): 4664 wait = False 4665 4666 locks.append( 4667 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4668 ) 4669 4670 return locks 4671 4672 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4673 start = self._index 4674 _, side_token, kind_token = self._parse_join_parts() 4675 4676 side = side_token.text if side_token else None 4677 kind = kind_token.text if kind_token else None 4678 4679 if not self._match_set(self.SET_OPERATIONS): 4680 self._retreat(start) 4681 return None 4682 4683 token_type = self._prev.token_type 4684 4685 if token_type == TokenType.UNION: 4686 operation: t.Type[exp.SetOperation] = exp.Union 4687 elif token_type == TokenType.EXCEPT: 4688 operation = exp.Except 4689 else: 4690 operation = exp.Intersect 4691 4692 comments = self._prev.comments 4693 4694 if self._match(TokenType.DISTINCT): 4695 distinct: t.Optional[bool] = True 4696 elif self._match(TokenType.ALL): 4697 distinct = False 4698 else: 4699 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4700 if distinct is None: 4701 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4702 4703 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4704 "STRICT", "CORRESPONDING" 4705 ) 4706 if self._match_text_seq("CORRESPONDING"): 4707 by_name = True 4708 if not side and not kind: 4709 kind = "INNER" 4710 4711 on_column_list = None 4712 if by_name and self._match_texts(("ON", "BY")): 4713 on_column_list = self._parse_wrapped_csv(self._parse_column) 4714 4715 expression = self._parse_select(nested=True, parse_set_operation=False) 4716 4717 return self.expression( 4718 operation, 4719 comments=comments, 4720 this=this, 4721 distinct=distinct, 4722 by_name=by_name, 4723 expression=expression, 4724 side=side, 4725 kind=kind, 4726 on=on_column_list, 4727 ) 4728 4729 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4730 while this: 4731 setop = self.parse_set_operation(this) 4732 if not setop: 4733 break 4734 this = setop 4735 4736 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4737 expression = this.expression 4738 4739 if expression: 4740 for arg in self.SET_OP_MODIFIERS: 4741 expr = expression.args.get(arg) 4742 if expr: 4743 this.set(arg, expr.pop()) 4744 4745 return this 4746 4747 def _parse_expression(self) -> t.Optional[exp.Expression]: 4748 return self._parse_alias(self._parse_assignment()) 4749 4750 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4751 this = self._parse_disjunction() 4752 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4753 # This allows us to parse <non-identifier token> := <expr> 4754 this = exp.column( 4755 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4756 ) 4757 4758 while self._match_set(self.ASSIGNMENT): 4759 if isinstance(this, exp.Column) and len(this.parts) == 1: 4760 this = this.this 4761 4762 this = self.expression( 4763 self.ASSIGNMENT[self._prev.token_type], 4764 this=this, 4765 comments=self._prev_comments, 4766 expression=self._parse_assignment(), 4767 ) 4768 4769 return this 4770 4771 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4772 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4773 4774 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4775 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4776 4777 def _parse_equality(self) -> t.Optional[exp.Expression]: 4778 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4779 4780 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4781 return self._parse_tokens(self._parse_range, self.COMPARISON) 4782 4783 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4784 this = this or self._parse_bitwise() 4785 negate = self._match(TokenType.NOT) 4786 4787 if self._match_set(self.RANGE_PARSERS): 4788 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4789 if not expression: 4790 return this 4791 4792 this = expression 4793 elif self._match(TokenType.ISNULL): 4794 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4795 4796 # Postgres supports ISNULL and NOTNULL for conditions. 4797 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4798 if self._match(TokenType.NOTNULL): 4799 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4800 this = self.expression(exp.Not, this=this) 4801 4802 if negate: 4803 this = self._negate_range(this) 4804 4805 if self._match(TokenType.IS): 4806 this = self._parse_is(this) 4807 4808 return this 4809 4810 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4811 if not this: 4812 return this 4813 4814 return self.expression(exp.Not, this=this) 4815 4816 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4817 index = self._index - 1 4818 negate = self._match(TokenType.NOT) 4819 4820 if self._match_text_seq("DISTINCT", "FROM"): 4821 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4822 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4823 4824 if self._match(TokenType.JSON): 4825 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4826 4827 if self._match_text_seq("WITH"): 4828 _with = True 4829 elif self._match_text_seq("WITHOUT"): 4830 _with = False 4831 else: 4832 _with = None 4833 4834 unique = self._match(TokenType.UNIQUE) 4835 self._match_text_seq("KEYS") 4836 expression: t.Optional[exp.Expression] = self.expression( 4837 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4838 ) 4839 else: 4840 expression = self._parse_primary() or self._parse_null() 4841 if not expression: 4842 self._retreat(index) 4843 return None 4844 4845 this = self.expression(exp.Is, this=this, expression=expression) 4846 return self.expression(exp.Not, this=this) if negate else this 4847 4848 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4849 unnest = self._parse_unnest(with_alias=False) 4850 if unnest: 4851 this = self.expression(exp.In, this=this, unnest=unnest) 4852 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4853 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4854 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4855 4856 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4857 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4858 else: 4859 this = self.expression(exp.In, this=this, expressions=expressions) 4860 4861 if matched_l_paren: 4862 self._match_r_paren(this) 4863 elif not self._match(TokenType.R_BRACKET, expression=this): 4864 self.raise_error("Expecting ]") 4865 else: 4866 this = self.expression(exp.In, this=this, field=self._parse_column()) 4867 4868 return this 4869 4870 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4871 low = self._parse_bitwise() 4872 self._match(TokenType.AND) 4873 high = self._parse_bitwise() 4874 return self.expression(exp.Between, this=this, low=low, high=high) 4875 4876 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4877 if not self._match(TokenType.ESCAPE): 4878 return this 4879 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4880 4881 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4882 index = self._index 4883 4884 if not self._match(TokenType.INTERVAL) and match_interval: 4885 return None 4886 4887 if self._match(TokenType.STRING, advance=False): 4888 this = self._parse_primary() 4889 else: 4890 this = self._parse_term() 4891 4892 if not this or ( 4893 isinstance(this, exp.Column) 4894 and not this.table 4895 and not this.this.quoted 4896 and this.name.upper() == "IS" 4897 ): 4898 self._retreat(index) 4899 return None 4900 4901 unit = self._parse_function() or ( 4902 not self._match(TokenType.ALIAS, advance=False) 4903 and self._parse_var(any_token=True, upper=True) 4904 ) 4905 4906 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4907 # each INTERVAL expression into this canonical form so it's easy to transpile 4908 if this and this.is_number: 4909 this = exp.Literal.string(this.to_py()) 4910 elif this and this.is_string: 4911 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4912 if parts and unit: 4913 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4914 unit = None 4915 self._retreat(self._index - 1) 4916 4917 if len(parts) == 1: 4918 this = exp.Literal.string(parts[0][0]) 4919 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4920 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4921 unit = self.expression( 4922 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4923 ) 4924 4925 interval = self.expression(exp.Interval, this=this, unit=unit) 4926 4927 index = self._index 4928 self._match(TokenType.PLUS) 4929 4930 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4931 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4932 return self.expression( 4933 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4934 ) 4935 4936 self._retreat(index) 4937 return interval 4938 4939 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4940 this = self._parse_term() 4941 4942 while True: 4943 if self._match_set(self.BITWISE): 4944 this = self.expression( 4945 self.BITWISE[self._prev.token_type], 4946 this=this, 4947 expression=self._parse_term(), 4948 ) 4949 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4950 this = self.expression( 4951 exp.DPipe, 4952 this=this, 4953 expression=self._parse_term(), 4954 safe=not self.dialect.STRICT_STRING_CONCAT, 4955 ) 4956 elif self._match(TokenType.DQMARK): 4957 this = self.expression( 4958 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4959 ) 4960 elif self._match_pair(TokenType.LT, TokenType.LT): 4961 this = self.expression( 4962 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4963 ) 4964 elif self._match_pair(TokenType.GT, TokenType.GT): 4965 this = self.expression( 4966 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4967 ) 4968 else: 4969 break 4970 4971 return this 4972 4973 def _parse_term(self) -> t.Optional[exp.Expression]: 4974 this = self._parse_factor() 4975 4976 while self._match_set(self.TERM): 4977 klass = self.TERM[self._prev.token_type] 4978 comments = self._prev_comments 4979 expression = self._parse_factor() 4980 4981 this = self.expression(klass, this=this, comments=comments, expression=expression) 4982 4983 if isinstance(this, exp.Collate): 4984 expr = this.expression 4985 4986 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4987 # fallback to Identifier / Var 4988 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4989 ident = expr.this 4990 if isinstance(ident, exp.Identifier): 4991 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4992 4993 return this 4994 4995 def _parse_factor(self) -> t.Optional[exp.Expression]: 4996 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4997 this = parse_method() 4998 4999 while self._match_set(self.FACTOR): 5000 klass = self.FACTOR[self._prev.token_type] 5001 comments = self._prev_comments 5002 expression = parse_method() 5003 5004 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5005 self._retreat(self._index - 1) 5006 return this 5007 5008 this = self.expression(klass, this=this, comments=comments, expression=expression) 5009 5010 if isinstance(this, exp.Div): 5011 this.args["typed"] = self.dialect.TYPED_DIVISION 5012 this.args["safe"] = self.dialect.SAFE_DIVISION 5013 5014 return this 5015 5016 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5017 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5018 5019 def _parse_unary(self) -> t.Optional[exp.Expression]: 5020 if self._match_set(self.UNARY_PARSERS): 5021 return self.UNARY_PARSERS[self._prev.token_type](self) 5022 return self._parse_at_time_zone(self._parse_type()) 5023 5024 def _parse_type( 5025 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5026 ) -> t.Optional[exp.Expression]: 5027 interval = parse_interval and self._parse_interval() 5028 if interval: 5029 return interval 5030 5031 index = self._index 5032 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5033 5034 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5035 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5036 if isinstance(data_type, exp.Cast): 5037 # This constructor can contain ops directly after it, for instance struct unnesting: 5038 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5039 return self._parse_column_ops(data_type) 5040 5041 if data_type: 5042 index2 = self._index 5043 this = self._parse_primary() 5044 5045 if isinstance(this, exp.Literal): 5046 this = self._parse_column_ops(this) 5047 5048 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5049 if parser: 5050 return parser(self, this, data_type) 5051 5052 return self.expression(exp.Cast, this=this, to=data_type) 5053 5054 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5055 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5056 # 5057 # If the index difference here is greater than 1, that means the parser itself must have 5058 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5059 # 5060 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5061 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5062 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5063 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5064 # 5065 # In these cases, we don't really want to return the converted type, but instead retreat 5066 # and try to parse a Column or Identifier in the section below. 5067 if data_type.expressions and index2 - index > 1: 5068 self._retreat(index2) 5069 return self._parse_column_ops(data_type) 5070 5071 self._retreat(index) 5072 5073 if fallback_to_identifier: 5074 return self._parse_id_var() 5075 5076 this = self._parse_column() 5077 return this and self._parse_column_ops(this) 5078 5079 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5080 this = self._parse_type() 5081 if not this: 5082 return None 5083 5084 if isinstance(this, exp.Column) and not this.table: 5085 this = exp.var(this.name.upper()) 5086 5087 return self.expression( 5088 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5089 ) 5090 5091 def _parse_types( 5092 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5093 ) -> t.Optional[exp.Expression]: 5094 index = self._index 5095 5096 this: t.Optional[exp.Expression] = None 5097 prefix = self._match_text_seq("SYSUDTLIB", ".") 5098 5099 if not self._match_set(self.TYPE_TOKENS): 5100 identifier = allow_identifiers and self._parse_id_var( 5101 any_token=False, tokens=(TokenType.VAR,) 5102 ) 5103 if isinstance(identifier, exp.Identifier): 5104 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5105 5106 if len(tokens) != 1: 5107 self.raise_error("Unexpected identifier", self._prev) 5108 5109 if tokens[0].token_type in self.TYPE_TOKENS: 5110 self._prev = tokens[0] 5111 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5112 type_name = identifier.name 5113 5114 while self._match(TokenType.DOT): 5115 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5116 5117 this = exp.DataType.build(type_name, udt=True) 5118 else: 5119 self._retreat(self._index - 1) 5120 return None 5121 else: 5122 return None 5123 5124 type_token = self._prev.token_type 5125 5126 if type_token == TokenType.PSEUDO_TYPE: 5127 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5128 5129 if type_token == TokenType.OBJECT_IDENTIFIER: 5130 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5131 5132 # https://materialize.com/docs/sql/types/map/ 5133 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5134 key_type = self._parse_types( 5135 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5136 ) 5137 if not self._match(TokenType.FARROW): 5138 self._retreat(index) 5139 return None 5140 5141 value_type = self._parse_types( 5142 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5143 ) 5144 if not self._match(TokenType.R_BRACKET): 5145 self._retreat(index) 5146 return None 5147 5148 return exp.DataType( 5149 this=exp.DataType.Type.MAP, 5150 expressions=[key_type, value_type], 5151 nested=True, 5152 prefix=prefix, 5153 ) 5154 5155 nested = type_token in self.NESTED_TYPE_TOKENS 5156 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5157 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5158 expressions = None 5159 maybe_func = False 5160 5161 if self._match(TokenType.L_PAREN): 5162 if is_struct: 5163 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5164 elif nested: 5165 expressions = self._parse_csv( 5166 lambda: self._parse_types( 5167 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5168 ) 5169 ) 5170 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5171 this = expressions[0] 5172 this.set("nullable", True) 5173 self._match_r_paren() 5174 return this 5175 elif type_token in self.ENUM_TYPE_TOKENS: 5176 expressions = self._parse_csv(self._parse_equality) 5177 elif is_aggregate: 5178 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5179 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5180 ) 5181 if not func_or_ident: 5182 return None 5183 expressions = [func_or_ident] 5184 if self._match(TokenType.COMMA): 5185 expressions.extend( 5186 self._parse_csv( 5187 lambda: self._parse_types( 5188 check_func=check_func, 5189 schema=schema, 5190 allow_identifiers=allow_identifiers, 5191 ) 5192 ) 5193 ) 5194 else: 5195 expressions = self._parse_csv(self._parse_type_size) 5196 5197 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5198 if type_token == TokenType.VECTOR and len(expressions) == 2: 5199 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5200 5201 if not expressions or not self._match(TokenType.R_PAREN): 5202 self._retreat(index) 5203 return None 5204 5205 maybe_func = True 5206 5207 values: t.Optional[t.List[exp.Expression]] = None 5208 5209 if nested and self._match(TokenType.LT): 5210 if is_struct: 5211 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5212 else: 5213 expressions = self._parse_csv( 5214 lambda: self._parse_types( 5215 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5216 ) 5217 ) 5218 5219 if not self._match(TokenType.GT): 5220 self.raise_error("Expecting >") 5221 5222 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5223 values = self._parse_csv(self._parse_assignment) 5224 if not values and is_struct: 5225 values = None 5226 self._retreat(self._index - 1) 5227 else: 5228 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5229 5230 if type_token in self.TIMESTAMPS: 5231 if self._match_text_seq("WITH", "TIME", "ZONE"): 5232 maybe_func = False 5233 tz_type = ( 5234 exp.DataType.Type.TIMETZ 5235 if type_token in self.TIMES 5236 else exp.DataType.Type.TIMESTAMPTZ 5237 ) 5238 this = exp.DataType(this=tz_type, expressions=expressions) 5239 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5240 maybe_func = False 5241 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5242 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5243 maybe_func = False 5244 elif type_token == TokenType.INTERVAL: 5245 unit = self._parse_var(upper=True) 5246 if unit: 5247 if self._match_text_seq("TO"): 5248 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5249 5250 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5251 else: 5252 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5253 elif type_token == TokenType.VOID: 5254 this = exp.DataType(this=exp.DataType.Type.NULL) 5255 5256 if maybe_func and check_func: 5257 index2 = self._index 5258 peek = self._parse_string() 5259 5260 if not peek: 5261 self._retreat(index) 5262 return None 5263 5264 self._retreat(index2) 5265 5266 if not this: 5267 if self._match_text_seq("UNSIGNED"): 5268 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5269 if not unsigned_type_token: 5270 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5271 5272 type_token = unsigned_type_token or type_token 5273 5274 this = exp.DataType( 5275 this=exp.DataType.Type[type_token.value], 5276 expressions=expressions, 5277 nested=nested, 5278 prefix=prefix, 5279 ) 5280 5281 # Empty arrays/structs are allowed 5282 if values is not None: 5283 cls = exp.Struct if is_struct else exp.Array 5284 this = exp.cast(cls(expressions=values), this, copy=False) 5285 5286 elif expressions: 5287 this.set("expressions", expressions) 5288 5289 # https://materialize.com/docs/sql/types/list/#type-name 5290 while self._match(TokenType.LIST): 5291 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5292 5293 index = self._index 5294 5295 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5296 matched_array = self._match(TokenType.ARRAY) 5297 5298 while self._curr: 5299 datatype_token = self._prev.token_type 5300 matched_l_bracket = self._match(TokenType.L_BRACKET) 5301 5302 if (not matched_l_bracket and not matched_array) or ( 5303 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5304 ): 5305 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5306 # not to be confused with the fixed size array parsing 5307 break 5308 5309 matched_array = False 5310 values = self._parse_csv(self._parse_assignment) or None 5311 if ( 5312 values 5313 and not schema 5314 and ( 5315 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5316 ) 5317 ): 5318 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5319 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5320 self._retreat(index) 5321 break 5322 5323 this = exp.DataType( 5324 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5325 ) 5326 self._match(TokenType.R_BRACKET) 5327 5328 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5329 converter = self.TYPE_CONVERTERS.get(this.this) 5330 if converter: 5331 this = converter(t.cast(exp.DataType, this)) 5332 5333 return this 5334 5335 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5336 index = self._index 5337 5338 if ( 5339 self._curr 5340 and self._next 5341 and self._curr.token_type in self.TYPE_TOKENS 5342 and self._next.token_type in self.TYPE_TOKENS 5343 ): 5344 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5345 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5346 this = self._parse_id_var() 5347 else: 5348 this = ( 5349 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5350 or self._parse_id_var() 5351 ) 5352 5353 self._match(TokenType.COLON) 5354 5355 if ( 5356 type_required 5357 and not isinstance(this, exp.DataType) 5358 and not self._match_set(self.TYPE_TOKENS, advance=False) 5359 ): 5360 self._retreat(index) 5361 return self._parse_types() 5362 5363 return self._parse_column_def(this) 5364 5365 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5366 if not self._match_text_seq("AT", "TIME", "ZONE"): 5367 return this 5368 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5369 5370 def _parse_column(self) -> t.Optional[exp.Expression]: 5371 this = self._parse_column_reference() 5372 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5373 5374 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5375 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5376 5377 return column 5378 5379 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5380 this = self._parse_field() 5381 if ( 5382 not this 5383 and self._match(TokenType.VALUES, advance=False) 5384 and self.VALUES_FOLLOWED_BY_PAREN 5385 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5386 ): 5387 this = self._parse_id_var() 5388 5389 if isinstance(this, exp.Identifier): 5390 # We bubble up comments from the Identifier to the Column 5391 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5392 5393 return this 5394 5395 def _parse_colon_as_variant_extract( 5396 self, this: t.Optional[exp.Expression] 5397 ) -> t.Optional[exp.Expression]: 5398 casts = [] 5399 json_path = [] 5400 escape = None 5401 5402 while self._match(TokenType.COLON): 5403 start_index = self._index 5404 5405 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5406 path = self._parse_column_ops( 5407 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5408 ) 5409 5410 # The cast :: operator has a lower precedence than the extraction operator :, so 5411 # we rearrange the AST appropriately to avoid casting the JSON path 5412 while isinstance(path, exp.Cast): 5413 casts.append(path.to) 5414 path = path.this 5415 5416 if casts: 5417 dcolon_offset = next( 5418 i 5419 for i, t in enumerate(self._tokens[start_index:]) 5420 if t.token_type == TokenType.DCOLON 5421 ) 5422 end_token = self._tokens[start_index + dcolon_offset - 1] 5423 else: 5424 end_token = self._prev 5425 5426 if path: 5427 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5428 # it'll roundtrip to a string literal in GET_PATH 5429 if isinstance(path, exp.Identifier) and path.quoted: 5430 escape = True 5431 5432 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5433 5434 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5435 # Databricks transforms it back to the colon/dot notation 5436 if json_path: 5437 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5438 5439 if json_path_expr: 5440 json_path_expr.set("escape", escape) 5441 5442 this = self.expression( 5443 exp.JSONExtract, 5444 this=this, 5445 expression=json_path_expr, 5446 variant_extract=True, 5447 ) 5448 5449 while casts: 5450 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5451 5452 return this 5453 5454 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5455 return self._parse_types() 5456 5457 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5458 this = self._parse_bracket(this) 5459 5460 while self._match_set(self.COLUMN_OPERATORS): 5461 op_token = self._prev.token_type 5462 op = self.COLUMN_OPERATORS.get(op_token) 5463 5464 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5465 field = self._parse_dcolon() 5466 if not field: 5467 self.raise_error("Expected type") 5468 elif op and self._curr: 5469 field = self._parse_column_reference() or self._parse_bracket() 5470 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5471 field = self._parse_column_ops(field) 5472 else: 5473 field = self._parse_field(any_token=True, anonymous_func=True) 5474 5475 if isinstance(field, (exp.Func, exp.Window)) and this: 5476 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5477 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5478 this = exp.replace_tree( 5479 this, 5480 lambda n: ( 5481 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5482 if n.table 5483 else n.this 5484 ) 5485 if isinstance(n, exp.Column) 5486 else n, 5487 ) 5488 5489 if op: 5490 this = op(self, this, field) 5491 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5492 this = self.expression( 5493 exp.Column, 5494 comments=this.comments, 5495 this=field, 5496 table=this.this, 5497 db=this.args.get("table"), 5498 catalog=this.args.get("db"), 5499 ) 5500 elif isinstance(field, exp.Window): 5501 # Move the exp.Dot's to the window's function 5502 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5503 field.set("this", window_func) 5504 this = field 5505 else: 5506 this = self.expression(exp.Dot, this=this, expression=field) 5507 5508 if field and field.comments: 5509 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5510 5511 this = self._parse_bracket(this) 5512 5513 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5514 5515 def _parse_primary(self) -> t.Optional[exp.Expression]: 5516 if self._match_set(self.PRIMARY_PARSERS): 5517 token_type = self._prev.token_type 5518 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5519 5520 if token_type == TokenType.STRING: 5521 expressions = [primary] 5522 while self._match(TokenType.STRING): 5523 expressions.append(exp.Literal.string(self._prev.text)) 5524 5525 if len(expressions) > 1: 5526 return self.expression(exp.Concat, expressions=expressions) 5527 5528 return primary 5529 5530 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5531 return exp.Literal.number(f"0.{self._prev.text}") 5532 5533 if self._match(TokenType.L_PAREN): 5534 comments = self._prev_comments 5535 query = self._parse_select() 5536 5537 if query: 5538 expressions = [query] 5539 else: 5540 expressions = self._parse_expressions() 5541 5542 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5543 5544 if not this and self._match(TokenType.R_PAREN, advance=False): 5545 this = self.expression(exp.Tuple) 5546 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5547 this = self._parse_subquery(this=this, parse_alias=False) 5548 elif isinstance(this, exp.Subquery): 5549 this = self._parse_subquery( 5550 this=self._parse_set_operations(this), parse_alias=False 5551 ) 5552 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5553 this = self.expression(exp.Tuple, expressions=expressions) 5554 else: 5555 this = self.expression(exp.Paren, this=this) 5556 5557 if this: 5558 this.add_comments(comments) 5559 5560 self._match_r_paren(expression=this) 5561 return this 5562 5563 return None 5564 5565 def _parse_field( 5566 self, 5567 any_token: bool = False, 5568 tokens: t.Optional[t.Collection[TokenType]] = None, 5569 anonymous_func: bool = False, 5570 ) -> t.Optional[exp.Expression]: 5571 if anonymous_func: 5572 field = ( 5573 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5574 or self._parse_primary() 5575 ) 5576 else: 5577 field = self._parse_primary() or self._parse_function( 5578 anonymous=anonymous_func, any_token=any_token 5579 ) 5580 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5581 5582 def _parse_function( 5583 self, 5584 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5585 anonymous: bool = False, 5586 optional_parens: bool = True, 5587 any_token: bool = False, 5588 ) -> t.Optional[exp.Expression]: 5589 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5590 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5591 fn_syntax = False 5592 if ( 5593 self._match(TokenType.L_BRACE, advance=False) 5594 and self._next 5595 and self._next.text.upper() == "FN" 5596 ): 5597 self._advance(2) 5598 fn_syntax = True 5599 5600 func = self._parse_function_call( 5601 functions=functions, 5602 anonymous=anonymous, 5603 optional_parens=optional_parens, 5604 any_token=any_token, 5605 ) 5606 5607 if fn_syntax: 5608 self._match(TokenType.R_BRACE) 5609 5610 return func 5611 5612 def _parse_function_call( 5613 self, 5614 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5615 anonymous: bool = False, 5616 optional_parens: bool = True, 5617 any_token: bool = False, 5618 ) -> t.Optional[exp.Expression]: 5619 if not self._curr: 5620 return None 5621 5622 comments = self._curr.comments 5623 token = self._curr 5624 token_type = self._curr.token_type 5625 this = self._curr.text 5626 upper = this.upper() 5627 5628 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5629 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5630 self._advance() 5631 return self._parse_window(parser(self)) 5632 5633 if not self._next or self._next.token_type != TokenType.L_PAREN: 5634 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5635 self._advance() 5636 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5637 5638 return None 5639 5640 if any_token: 5641 if token_type in self.RESERVED_TOKENS: 5642 return None 5643 elif token_type not in self.FUNC_TOKENS: 5644 return None 5645 5646 self._advance(2) 5647 5648 parser = self.FUNCTION_PARSERS.get(upper) 5649 if parser and not anonymous: 5650 this = parser(self) 5651 else: 5652 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5653 5654 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5655 this = self.expression( 5656 subquery_predicate, comments=comments, this=self._parse_select() 5657 ) 5658 self._match_r_paren() 5659 return this 5660 5661 if functions is None: 5662 functions = self.FUNCTIONS 5663 5664 function = functions.get(upper) 5665 known_function = function and not anonymous 5666 5667 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5668 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5669 5670 post_func_comments = self._curr and self._curr.comments 5671 if known_function and post_func_comments: 5672 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5673 # call we'll construct it as exp.Anonymous, even if it's "known" 5674 if any( 5675 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5676 for comment in post_func_comments 5677 ): 5678 known_function = False 5679 5680 if alias and known_function: 5681 args = self._kv_to_prop_eq(args) 5682 5683 if known_function: 5684 func_builder = t.cast(t.Callable, function) 5685 5686 if "dialect" in func_builder.__code__.co_varnames: 5687 func = func_builder(args, dialect=self.dialect) 5688 else: 5689 func = func_builder(args) 5690 5691 func = self.validate_expression(func, args) 5692 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5693 func.meta["name"] = this 5694 5695 this = func 5696 else: 5697 if token_type == TokenType.IDENTIFIER: 5698 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5699 this = self.expression(exp.Anonymous, this=this, expressions=args) 5700 5701 if isinstance(this, exp.Expression): 5702 this.add_comments(comments) 5703 5704 self._match_r_paren(this) 5705 return self._parse_window(this) 5706 5707 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5708 return expression 5709 5710 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5711 transformed = [] 5712 5713 for index, e in enumerate(expressions): 5714 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5715 if isinstance(e, exp.Alias): 5716 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5717 5718 if not isinstance(e, exp.PropertyEQ): 5719 e = self.expression( 5720 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5721 ) 5722 5723 if isinstance(e.this, exp.Column): 5724 e.this.replace(e.this.this) 5725 else: 5726 e = self._to_prop_eq(e, index) 5727 5728 transformed.append(e) 5729 5730 return transformed 5731 5732 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5733 return self._parse_statement() 5734 5735 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5736 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5737 5738 def _parse_user_defined_function( 5739 self, kind: t.Optional[TokenType] = None 5740 ) -> t.Optional[exp.Expression]: 5741 this = self._parse_table_parts(schema=True) 5742 5743 if not self._match(TokenType.L_PAREN): 5744 return this 5745 5746 expressions = self._parse_csv(self._parse_function_parameter) 5747 self._match_r_paren() 5748 return self.expression( 5749 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5750 ) 5751 5752 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5753 literal = self._parse_primary() 5754 if literal: 5755 return self.expression(exp.Introducer, this=token.text, expression=literal) 5756 5757 return self._identifier_expression(token) 5758 5759 def _parse_session_parameter(self) -> exp.SessionParameter: 5760 kind = None 5761 this = self._parse_id_var() or self._parse_primary() 5762 5763 if this and self._match(TokenType.DOT): 5764 kind = this.name 5765 this = self._parse_var() or self._parse_primary() 5766 5767 return self.expression(exp.SessionParameter, this=this, kind=kind) 5768 5769 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5770 return self._parse_id_var() 5771 5772 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5773 index = self._index 5774 5775 if self._match(TokenType.L_PAREN): 5776 expressions = t.cast( 5777 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5778 ) 5779 5780 if not self._match(TokenType.R_PAREN): 5781 self._retreat(index) 5782 else: 5783 expressions = [self._parse_lambda_arg()] 5784 5785 if self._match_set(self.LAMBDAS): 5786 return self.LAMBDAS[self._prev.token_type](self, expressions) 5787 5788 self._retreat(index) 5789 5790 this: t.Optional[exp.Expression] 5791 5792 if self._match(TokenType.DISTINCT): 5793 this = self.expression( 5794 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5795 ) 5796 else: 5797 this = self._parse_select_or_expression(alias=alias) 5798 5799 return self._parse_limit( 5800 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5801 ) 5802 5803 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5804 index = self._index 5805 if not self._match(TokenType.L_PAREN): 5806 return this 5807 5808 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5809 # expr can be of both types 5810 if self._match_set(self.SELECT_START_TOKENS): 5811 self._retreat(index) 5812 return this 5813 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5814 self._match_r_paren() 5815 return self.expression(exp.Schema, this=this, expressions=args) 5816 5817 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5818 return self._parse_column_def(self._parse_field(any_token=True)) 5819 5820 def _parse_column_def( 5821 self, this: t.Optional[exp.Expression], computed_column: bool = True 5822 ) -> t.Optional[exp.Expression]: 5823 # column defs are not really columns, they're identifiers 5824 if isinstance(this, exp.Column): 5825 this = this.this 5826 5827 if not computed_column: 5828 self._match(TokenType.ALIAS) 5829 5830 kind = self._parse_types(schema=True) 5831 5832 if self._match_text_seq("FOR", "ORDINALITY"): 5833 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5834 5835 constraints: t.List[exp.Expression] = [] 5836 5837 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5838 ("ALIAS", "MATERIALIZED") 5839 ): 5840 persisted = self._prev.text.upper() == "MATERIALIZED" 5841 constraint_kind = exp.ComputedColumnConstraint( 5842 this=self._parse_assignment(), 5843 persisted=persisted or self._match_text_seq("PERSISTED"), 5844 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5845 ) 5846 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5847 elif ( 5848 kind 5849 and self._match(TokenType.ALIAS, advance=False) 5850 and ( 5851 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5852 or (self._next and self._next.token_type == TokenType.L_PAREN) 5853 ) 5854 ): 5855 self._advance() 5856 constraints.append( 5857 self.expression( 5858 exp.ColumnConstraint, 5859 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5860 ) 5861 ) 5862 5863 while True: 5864 constraint = self._parse_column_constraint() 5865 if not constraint: 5866 break 5867 constraints.append(constraint) 5868 5869 if not kind and not constraints: 5870 return this 5871 5872 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5873 5874 def _parse_auto_increment( 5875 self, 5876 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5877 start = None 5878 increment = None 5879 5880 if self._match(TokenType.L_PAREN, advance=False): 5881 args = self._parse_wrapped_csv(self._parse_bitwise) 5882 start = seq_get(args, 0) 5883 increment = seq_get(args, 1) 5884 elif self._match_text_seq("START"): 5885 start = self._parse_bitwise() 5886 self._match_text_seq("INCREMENT") 5887 increment = self._parse_bitwise() 5888 5889 if start and increment: 5890 return exp.GeneratedAsIdentityColumnConstraint( 5891 start=start, increment=increment, this=False 5892 ) 5893 5894 return exp.AutoIncrementColumnConstraint() 5895 5896 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5897 if not self._match_text_seq("REFRESH"): 5898 self._retreat(self._index - 1) 5899 return None 5900 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5901 5902 def _parse_compress(self) -> exp.CompressColumnConstraint: 5903 if self._match(TokenType.L_PAREN, advance=False): 5904 return self.expression( 5905 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5906 ) 5907 5908 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5909 5910 def _parse_generated_as_identity( 5911 self, 5912 ) -> ( 5913 exp.GeneratedAsIdentityColumnConstraint 5914 | exp.ComputedColumnConstraint 5915 | exp.GeneratedAsRowColumnConstraint 5916 ): 5917 if self._match_text_seq("BY", "DEFAULT"): 5918 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5919 this = self.expression( 5920 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5921 ) 5922 else: 5923 self._match_text_seq("ALWAYS") 5924 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5925 5926 self._match(TokenType.ALIAS) 5927 5928 if self._match_text_seq("ROW"): 5929 start = self._match_text_seq("START") 5930 if not start: 5931 self._match(TokenType.END) 5932 hidden = self._match_text_seq("HIDDEN") 5933 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5934 5935 identity = self._match_text_seq("IDENTITY") 5936 5937 if self._match(TokenType.L_PAREN): 5938 if self._match(TokenType.START_WITH): 5939 this.set("start", self._parse_bitwise()) 5940 if self._match_text_seq("INCREMENT", "BY"): 5941 this.set("increment", self._parse_bitwise()) 5942 if self._match_text_seq("MINVALUE"): 5943 this.set("minvalue", self._parse_bitwise()) 5944 if self._match_text_seq("MAXVALUE"): 5945 this.set("maxvalue", self._parse_bitwise()) 5946 5947 if self._match_text_seq("CYCLE"): 5948 this.set("cycle", True) 5949 elif self._match_text_seq("NO", "CYCLE"): 5950 this.set("cycle", False) 5951 5952 if not identity: 5953 this.set("expression", self._parse_range()) 5954 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5955 args = self._parse_csv(self._parse_bitwise) 5956 this.set("start", seq_get(args, 0)) 5957 this.set("increment", seq_get(args, 1)) 5958 5959 self._match_r_paren() 5960 5961 return this 5962 5963 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5964 self._match_text_seq("LENGTH") 5965 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5966 5967 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5968 if self._match_text_seq("NULL"): 5969 return self.expression(exp.NotNullColumnConstraint) 5970 if self._match_text_seq("CASESPECIFIC"): 5971 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5972 if self._match_text_seq("FOR", "REPLICATION"): 5973 return self.expression(exp.NotForReplicationColumnConstraint) 5974 5975 # Unconsume the `NOT` token 5976 self._retreat(self._index - 1) 5977 return None 5978 5979 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5980 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5981 5982 procedure_option_follows = ( 5983 self._match(TokenType.WITH, advance=False) 5984 and self._next 5985 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5986 ) 5987 5988 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5989 return self.expression( 5990 exp.ColumnConstraint, 5991 this=this, 5992 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5993 ) 5994 5995 return this 5996 5997 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5998 if not self._match(TokenType.CONSTRAINT): 5999 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6000 6001 return self.expression( 6002 exp.Constraint, 6003 this=self._parse_id_var(), 6004 expressions=self._parse_unnamed_constraints(), 6005 ) 6006 6007 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6008 constraints = [] 6009 while True: 6010 constraint = self._parse_unnamed_constraint() or self._parse_function() 6011 if not constraint: 6012 break 6013 constraints.append(constraint) 6014 6015 return constraints 6016 6017 def _parse_unnamed_constraint( 6018 self, constraints: t.Optional[t.Collection[str]] = None 6019 ) -> t.Optional[exp.Expression]: 6020 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6021 constraints or self.CONSTRAINT_PARSERS 6022 ): 6023 return None 6024 6025 constraint = self._prev.text.upper() 6026 if constraint not in self.CONSTRAINT_PARSERS: 6027 self.raise_error(f"No parser found for schema constraint {constraint}.") 6028 6029 return self.CONSTRAINT_PARSERS[constraint](self) 6030 6031 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6032 return self._parse_id_var(any_token=False) 6033 6034 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6035 self._match_text_seq("KEY") 6036 return self.expression( 6037 exp.UniqueColumnConstraint, 6038 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6039 this=self._parse_schema(self._parse_unique_key()), 6040 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6041 on_conflict=self._parse_on_conflict(), 6042 options=self._parse_key_constraint_options(), 6043 ) 6044 6045 def _parse_key_constraint_options(self) -> t.List[str]: 6046 options = [] 6047 while True: 6048 if not self._curr: 6049 break 6050 6051 if self._match(TokenType.ON): 6052 action = None 6053 on = self._advance_any() and self._prev.text 6054 6055 if self._match_text_seq("NO", "ACTION"): 6056 action = "NO ACTION" 6057 elif self._match_text_seq("CASCADE"): 6058 action = "CASCADE" 6059 elif self._match_text_seq("RESTRICT"): 6060 action = "RESTRICT" 6061 elif self._match_pair(TokenType.SET, TokenType.NULL): 6062 action = "SET NULL" 6063 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6064 action = "SET DEFAULT" 6065 else: 6066 self.raise_error("Invalid key constraint") 6067 6068 options.append(f"ON {on} {action}") 6069 else: 6070 var = self._parse_var_from_options( 6071 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6072 ) 6073 if not var: 6074 break 6075 options.append(var.name) 6076 6077 return options 6078 6079 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6080 if match and not self._match(TokenType.REFERENCES): 6081 return None 6082 6083 expressions = None 6084 this = self._parse_table(schema=True) 6085 options = self._parse_key_constraint_options() 6086 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6087 6088 def _parse_foreign_key(self) -> exp.ForeignKey: 6089 expressions = self._parse_wrapped_id_vars() 6090 reference = self._parse_references() 6091 on_options = {} 6092 6093 while self._match(TokenType.ON): 6094 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6095 self.raise_error("Expected DELETE or UPDATE") 6096 6097 kind = self._prev.text.lower() 6098 6099 if self._match_text_seq("NO", "ACTION"): 6100 action = "NO ACTION" 6101 elif self._match(TokenType.SET): 6102 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6103 action = "SET " + self._prev.text.upper() 6104 else: 6105 self._advance() 6106 action = self._prev.text.upper() 6107 6108 on_options[kind] = action 6109 6110 return self.expression( 6111 exp.ForeignKey, 6112 expressions=expressions, 6113 reference=reference, 6114 options=self._parse_key_constraint_options(), 6115 **on_options, # type: ignore 6116 ) 6117 6118 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6119 return self._parse_ordered() or self._parse_field() 6120 6121 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6122 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6123 self._retreat(self._index - 1) 6124 return None 6125 6126 id_vars = self._parse_wrapped_id_vars() 6127 return self.expression( 6128 exp.PeriodForSystemTimeConstraint, 6129 this=seq_get(id_vars, 0), 6130 expression=seq_get(id_vars, 1), 6131 ) 6132 6133 def _parse_primary_key( 6134 self, wrapped_optional: bool = False, in_props: bool = False 6135 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6136 desc = ( 6137 self._match_set((TokenType.ASC, TokenType.DESC)) 6138 and self._prev.token_type == TokenType.DESC 6139 ) 6140 6141 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6142 return self.expression( 6143 exp.PrimaryKeyColumnConstraint, 6144 desc=desc, 6145 options=self._parse_key_constraint_options(), 6146 ) 6147 6148 expressions = self._parse_wrapped_csv( 6149 self._parse_primary_key_part, optional=wrapped_optional 6150 ) 6151 options = self._parse_key_constraint_options() 6152 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6153 6154 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6155 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6156 6157 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6158 """ 6159 Parses a datetime column in ODBC format. We parse the column into the corresponding 6160 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6161 same as we did for `DATE('yyyy-mm-dd')`. 6162 6163 Reference: 6164 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6165 """ 6166 self._match(TokenType.VAR) 6167 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6168 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6169 if not self._match(TokenType.R_BRACE): 6170 self.raise_error("Expected }") 6171 return expression 6172 6173 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6174 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6175 return this 6176 6177 bracket_kind = self._prev.token_type 6178 if ( 6179 bracket_kind == TokenType.L_BRACE 6180 and self._curr 6181 and self._curr.token_type == TokenType.VAR 6182 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6183 ): 6184 return self._parse_odbc_datetime_literal() 6185 6186 expressions = self._parse_csv( 6187 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6188 ) 6189 6190 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6191 self.raise_error("Expected ]") 6192 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6193 self.raise_error("Expected }") 6194 6195 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6196 if bracket_kind == TokenType.L_BRACE: 6197 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6198 elif not this: 6199 this = build_array_constructor( 6200 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6201 ) 6202 else: 6203 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6204 if constructor_type: 6205 return build_array_constructor( 6206 constructor_type, 6207 args=expressions, 6208 bracket_kind=bracket_kind, 6209 dialect=self.dialect, 6210 ) 6211 6212 expressions = apply_index_offset( 6213 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6214 ) 6215 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6216 6217 self._add_comments(this) 6218 return self._parse_bracket(this) 6219 6220 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6221 if self._match(TokenType.COLON): 6222 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6223 return this 6224 6225 def _parse_case(self) -> t.Optional[exp.Expression]: 6226 ifs = [] 6227 default = None 6228 6229 comments = self._prev_comments 6230 expression = self._parse_assignment() 6231 6232 while self._match(TokenType.WHEN): 6233 this = self._parse_assignment() 6234 self._match(TokenType.THEN) 6235 then = self._parse_assignment() 6236 ifs.append(self.expression(exp.If, this=this, true=then)) 6237 6238 if self._match(TokenType.ELSE): 6239 default = self._parse_assignment() 6240 6241 if not self._match(TokenType.END): 6242 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6243 default = exp.column("interval") 6244 else: 6245 self.raise_error("Expected END after CASE", self._prev) 6246 6247 return self.expression( 6248 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6249 ) 6250 6251 def _parse_if(self) -> t.Optional[exp.Expression]: 6252 if self._match(TokenType.L_PAREN): 6253 args = self._parse_csv( 6254 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6255 ) 6256 this = self.validate_expression(exp.If.from_arg_list(args), args) 6257 self._match_r_paren() 6258 else: 6259 index = self._index - 1 6260 6261 if self.NO_PAREN_IF_COMMANDS and index == 0: 6262 return self._parse_as_command(self._prev) 6263 6264 condition = self._parse_assignment() 6265 6266 if not condition: 6267 self._retreat(index) 6268 return None 6269 6270 self._match(TokenType.THEN) 6271 true = self._parse_assignment() 6272 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6273 self._match(TokenType.END) 6274 this = self.expression(exp.If, this=condition, true=true, false=false) 6275 6276 return this 6277 6278 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6279 if not self._match_text_seq("VALUE", "FOR"): 6280 self._retreat(self._index - 1) 6281 return None 6282 6283 return self.expression( 6284 exp.NextValueFor, 6285 this=self._parse_column(), 6286 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6287 ) 6288 6289 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6290 this = self._parse_function() or self._parse_var_or_string(upper=True) 6291 6292 if self._match(TokenType.FROM): 6293 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6294 6295 if not self._match(TokenType.COMMA): 6296 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6297 6298 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6299 6300 def _parse_gap_fill(self) -> exp.GapFill: 6301 self._match(TokenType.TABLE) 6302 this = self._parse_table() 6303 6304 self._match(TokenType.COMMA) 6305 args = [this, *self._parse_csv(self._parse_lambda)] 6306 6307 gap_fill = exp.GapFill.from_arg_list(args) 6308 return self.validate_expression(gap_fill, args) 6309 6310 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6311 this = self._parse_assignment() 6312 6313 if not self._match(TokenType.ALIAS): 6314 if self._match(TokenType.COMMA): 6315 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6316 6317 self.raise_error("Expected AS after CAST") 6318 6319 fmt = None 6320 to = self._parse_types() 6321 6322 default = self._match(TokenType.DEFAULT) 6323 if default: 6324 default = self._parse_bitwise() 6325 self._match_text_seq("ON", "CONVERSION", "ERROR") 6326 6327 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6328 fmt_string = self._parse_string() 6329 fmt = self._parse_at_time_zone(fmt_string) 6330 6331 if not to: 6332 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6333 if to.this in exp.DataType.TEMPORAL_TYPES: 6334 this = self.expression( 6335 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6336 this=this, 6337 format=exp.Literal.string( 6338 format_time( 6339 fmt_string.this if fmt_string else "", 6340 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6341 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6342 ) 6343 ), 6344 safe=safe, 6345 ) 6346 6347 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6348 this.set("zone", fmt.args["zone"]) 6349 return this 6350 elif not to: 6351 self.raise_error("Expected TYPE after CAST") 6352 elif isinstance(to, exp.Identifier): 6353 to = exp.DataType.build(to.name, udt=True) 6354 elif to.this == exp.DataType.Type.CHAR: 6355 if self._match(TokenType.CHARACTER_SET): 6356 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6357 6358 return self.expression( 6359 exp.Cast if strict else exp.TryCast, 6360 this=this, 6361 to=to, 6362 format=fmt, 6363 safe=safe, 6364 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6365 default=default, 6366 ) 6367 6368 def _parse_string_agg(self) -> exp.GroupConcat: 6369 if self._match(TokenType.DISTINCT): 6370 args: t.List[t.Optional[exp.Expression]] = [ 6371 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6372 ] 6373 if self._match(TokenType.COMMA): 6374 args.extend(self._parse_csv(self._parse_assignment)) 6375 else: 6376 args = self._parse_csv(self._parse_assignment) # type: ignore 6377 6378 if self._match_text_seq("ON", "OVERFLOW"): 6379 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6380 if self._match_text_seq("ERROR"): 6381 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6382 else: 6383 self._match_text_seq("TRUNCATE") 6384 on_overflow = self.expression( 6385 exp.OverflowTruncateBehavior, 6386 this=self._parse_string(), 6387 with_count=( 6388 self._match_text_seq("WITH", "COUNT") 6389 or not self._match_text_seq("WITHOUT", "COUNT") 6390 ), 6391 ) 6392 else: 6393 on_overflow = None 6394 6395 index = self._index 6396 if not self._match(TokenType.R_PAREN) and args: 6397 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6398 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6399 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6400 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6401 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6402 6403 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6404 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6405 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6406 if not self._match_text_seq("WITHIN", "GROUP"): 6407 self._retreat(index) 6408 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6409 6410 # The corresponding match_r_paren will be called in parse_function (caller) 6411 self._match_l_paren() 6412 6413 return self.expression( 6414 exp.GroupConcat, 6415 this=self._parse_order(this=seq_get(args, 0)), 6416 separator=seq_get(args, 1), 6417 on_overflow=on_overflow, 6418 ) 6419 6420 def _parse_convert( 6421 self, strict: bool, safe: t.Optional[bool] = None 6422 ) -> t.Optional[exp.Expression]: 6423 this = self._parse_bitwise() 6424 6425 if self._match(TokenType.USING): 6426 to: t.Optional[exp.Expression] = self.expression( 6427 exp.CharacterSet, this=self._parse_var() 6428 ) 6429 elif self._match(TokenType.COMMA): 6430 to = self._parse_types() 6431 else: 6432 to = None 6433 6434 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6435 6436 def _parse_xml_table(self) -> exp.XMLTable: 6437 namespaces = None 6438 passing = None 6439 columns = None 6440 6441 if self._match_text_seq("XMLNAMESPACES", "("): 6442 namespaces = self._parse_xml_namespace() 6443 self._match_text_seq(")", ",") 6444 6445 this = self._parse_string() 6446 6447 if self._match_text_seq("PASSING"): 6448 # The BY VALUE keywords are optional and are provided for semantic clarity 6449 self._match_text_seq("BY", "VALUE") 6450 passing = self._parse_csv(self._parse_column) 6451 6452 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6453 6454 if self._match_text_seq("COLUMNS"): 6455 columns = self._parse_csv(self._parse_field_def) 6456 6457 return self.expression( 6458 exp.XMLTable, 6459 this=this, 6460 namespaces=namespaces, 6461 passing=passing, 6462 columns=columns, 6463 by_ref=by_ref, 6464 ) 6465 6466 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6467 namespaces = [] 6468 6469 while True: 6470 if self._match(TokenType.DEFAULT): 6471 uri = self._parse_string() 6472 else: 6473 uri = self._parse_alias(self._parse_string()) 6474 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6475 if not self._match(TokenType.COMMA): 6476 break 6477 6478 return namespaces 6479 6480 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6481 """ 6482 There are generally two variants of the DECODE function: 6483 6484 - DECODE(bin, charset) 6485 - DECODE(expression, search, result [, search, result] ... [, default]) 6486 6487 The second variant will always be parsed into a CASE expression. Note that NULL 6488 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6489 instead of relying on pattern matching. 6490 """ 6491 args = self._parse_csv(self._parse_assignment) 6492 6493 if len(args) < 3: 6494 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6495 6496 expression, *expressions = args 6497 if not expression: 6498 return None 6499 6500 ifs = [] 6501 for search, result in zip(expressions[::2], expressions[1::2]): 6502 if not search or not result: 6503 return None 6504 6505 if isinstance(search, exp.Literal): 6506 ifs.append( 6507 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6508 ) 6509 elif isinstance(search, exp.Null): 6510 ifs.append( 6511 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6512 ) 6513 else: 6514 cond = exp.or_( 6515 exp.EQ(this=expression.copy(), expression=search), 6516 exp.and_( 6517 exp.Is(this=expression.copy(), expression=exp.Null()), 6518 exp.Is(this=search.copy(), expression=exp.Null()), 6519 copy=False, 6520 ), 6521 copy=False, 6522 ) 6523 ifs.append(exp.If(this=cond, true=result)) 6524 6525 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6526 6527 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6528 self._match_text_seq("KEY") 6529 key = self._parse_column() 6530 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6531 self._match_text_seq("VALUE") 6532 value = self._parse_bitwise() 6533 6534 if not key and not value: 6535 return None 6536 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6537 6538 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6539 if not this or not self._match_text_seq("FORMAT", "JSON"): 6540 return this 6541 6542 return self.expression(exp.FormatJson, this=this) 6543 6544 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6545 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6546 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6547 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6548 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6549 else: 6550 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6551 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6552 6553 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6554 6555 if not empty and not error and not null: 6556 return None 6557 6558 return self.expression( 6559 exp.OnCondition, 6560 empty=empty, 6561 error=error, 6562 null=null, 6563 ) 6564 6565 def _parse_on_handling( 6566 self, on: str, *values: str 6567 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6568 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6569 for value in values: 6570 if self._match_text_seq(value, "ON", on): 6571 return f"{value} ON {on}" 6572 6573 index = self._index 6574 if self._match(TokenType.DEFAULT): 6575 default_value = self._parse_bitwise() 6576 if self._match_text_seq("ON", on): 6577 return default_value 6578 6579 self._retreat(index) 6580 6581 return None 6582 6583 @t.overload 6584 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6585 6586 @t.overload 6587 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6588 6589 def _parse_json_object(self, agg=False): 6590 star = self._parse_star() 6591 expressions = ( 6592 [star] 6593 if star 6594 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6595 ) 6596 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6597 6598 unique_keys = None 6599 if self._match_text_seq("WITH", "UNIQUE"): 6600 unique_keys = True 6601 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6602 unique_keys = False 6603 6604 self._match_text_seq("KEYS") 6605 6606 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6607 self._parse_type() 6608 ) 6609 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6610 6611 return self.expression( 6612 exp.JSONObjectAgg if agg else exp.JSONObject, 6613 expressions=expressions, 6614 null_handling=null_handling, 6615 unique_keys=unique_keys, 6616 return_type=return_type, 6617 encoding=encoding, 6618 ) 6619 6620 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6621 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6622 if not self._match_text_seq("NESTED"): 6623 this = self._parse_id_var() 6624 kind = self._parse_types(allow_identifiers=False) 6625 nested = None 6626 else: 6627 this = None 6628 kind = None 6629 nested = True 6630 6631 path = self._match_text_seq("PATH") and self._parse_string() 6632 nested_schema = nested and self._parse_json_schema() 6633 6634 return self.expression( 6635 exp.JSONColumnDef, 6636 this=this, 6637 kind=kind, 6638 path=path, 6639 nested_schema=nested_schema, 6640 ) 6641 6642 def _parse_json_schema(self) -> exp.JSONSchema: 6643 self._match_text_seq("COLUMNS") 6644 return self.expression( 6645 exp.JSONSchema, 6646 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6647 ) 6648 6649 def _parse_json_table(self) -> exp.JSONTable: 6650 this = self._parse_format_json(self._parse_bitwise()) 6651 path = self._match(TokenType.COMMA) and self._parse_string() 6652 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6653 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6654 schema = self._parse_json_schema() 6655 6656 return exp.JSONTable( 6657 this=this, 6658 schema=schema, 6659 path=path, 6660 error_handling=error_handling, 6661 empty_handling=empty_handling, 6662 ) 6663 6664 def _parse_match_against(self) -> exp.MatchAgainst: 6665 expressions = self._parse_csv(self._parse_column) 6666 6667 self._match_text_seq(")", "AGAINST", "(") 6668 6669 this = self._parse_string() 6670 6671 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6672 modifier = "IN NATURAL LANGUAGE MODE" 6673 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6674 modifier = f"{modifier} WITH QUERY EXPANSION" 6675 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6676 modifier = "IN BOOLEAN MODE" 6677 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6678 modifier = "WITH QUERY EXPANSION" 6679 else: 6680 modifier = None 6681 6682 return self.expression( 6683 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6684 ) 6685 6686 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6687 def _parse_open_json(self) -> exp.OpenJSON: 6688 this = self._parse_bitwise() 6689 path = self._match(TokenType.COMMA) and self._parse_string() 6690 6691 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6692 this = self._parse_field(any_token=True) 6693 kind = self._parse_types() 6694 path = self._parse_string() 6695 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6696 6697 return self.expression( 6698 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6699 ) 6700 6701 expressions = None 6702 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6703 self._match_l_paren() 6704 expressions = self._parse_csv(_parse_open_json_column_def) 6705 6706 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6707 6708 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6709 args = self._parse_csv(self._parse_bitwise) 6710 6711 if self._match(TokenType.IN): 6712 return self.expression( 6713 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6714 ) 6715 6716 if haystack_first: 6717 haystack = seq_get(args, 0) 6718 needle = seq_get(args, 1) 6719 else: 6720 haystack = seq_get(args, 1) 6721 needle = seq_get(args, 0) 6722 6723 return self.expression( 6724 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6725 ) 6726 6727 def _parse_predict(self) -> exp.Predict: 6728 self._match_text_seq("MODEL") 6729 this = self._parse_table() 6730 6731 self._match(TokenType.COMMA) 6732 self._match_text_seq("TABLE") 6733 6734 return self.expression( 6735 exp.Predict, 6736 this=this, 6737 expression=self._parse_table(), 6738 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6739 ) 6740 6741 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6742 args = self._parse_csv(self._parse_table) 6743 return exp.JoinHint(this=func_name.upper(), expressions=args) 6744 6745 def _parse_substring(self) -> exp.Substring: 6746 # Postgres supports the form: substring(string [from int] [for int]) 6747 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6748 6749 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6750 6751 if self._match(TokenType.FROM): 6752 args.append(self._parse_bitwise()) 6753 if self._match(TokenType.FOR): 6754 if len(args) == 1: 6755 args.append(exp.Literal.number(1)) 6756 args.append(self._parse_bitwise()) 6757 6758 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6759 6760 def _parse_trim(self) -> exp.Trim: 6761 # https://www.w3resource.com/sql/character-functions/trim.php 6762 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6763 6764 position = None 6765 collation = None 6766 expression = None 6767 6768 if self._match_texts(self.TRIM_TYPES): 6769 position = self._prev.text.upper() 6770 6771 this = self._parse_bitwise() 6772 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6773 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6774 expression = self._parse_bitwise() 6775 6776 if invert_order: 6777 this, expression = expression, this 6778 6779 if self._match(TokenType.COLLATE): 6780 collation = self._parse_bitwise() 6781 6782 return self.expression( 6783 exp.Trim, this=this, position=position, expression=expression, collation=collation 6784 ) 6785 6786 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6787 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6788 6789 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6790 return self._parse_window(self._parse_id_var(), alias=True) 6791 6792 def _parse_respect_or_ignore_nulls( 6793 self, this: t.Optional[exp.Expression] 6794 ) -> t.Optional[exp.Expression]: 6795 if self._match_text_seq("IGNORE", "NULLS"): 6796 return self.expression(exp.IgnoreNulls, this=this) 6797 if self._match_text_seq("RESPECT", "NULLS"): 6798 return self.expression(exp.RespectNulls, this=this) 6799 return this 6800 6801 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6802 if self._match(TokenType.HAVING): 6803 self._match_texts(("MAX", "MIN")) 6804 max = self._prev.text.upper() != "MIN" 6805 return self.expression( 6806 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6807 ) 6808 6809 return this 6810 6811 def _parse_window( 6812 self, this: t.Optional[exp.Expression], alias: bool = False 6813 ) -> t.Optional[exp.Expression]: 6814 func = this 6815 comments = func.comments if isinstance(func, exp.Expression) else None 6816 6817 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6818 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6819 if self._match_text_seq("WITHIN", "GROUP"): 6820 order = self._parse_wrapped(self._parse_order) 6821 this = self.expression(exp.WithinGroup, this=this, expression=order) 6822 6823 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6824 self._match(TokenType.WHERE) 6825 this = self.expression( 6826 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6827 ) 6828 self._match_r_paren() 6829 6830 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6831 # Some dialects choose to implement and some do not. 6832 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6833 6834 # There is some code above in _parse_lambda that handles 6835 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6836 6837 # The below changes handle 6838 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6839 6840 # Oracle allows both formats 6841 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6842 # and Snowflake chose to do the same for familiarity 6843 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6844 if isinstance(this, exp.AggFunc): 6845 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6846 6847 if ignore_respect and ignore_respect is not this: 6848 ignore_respect.replace(ignore_respect.this) 6849 this = self.expression(ignore_respect.__class__, this=this) 6850 6851 this = self._parse_respect_or_ignore_nulls(this) 6852 6853 # bigquery select from window x AS (partition by ...) 6854 if alias: 6855 over = None 6856 self._match(TokenType.ALIAS) 6857 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6858 return this 6859 else: 6860 over = self._prev.text.upper() 6861 6862 if comments and isinstance(func, exp.Expression): 6863 func.pop_comments() 6864 6865 if not self._match(TokenType.L_PAREN): 6866 return self.expression( 6867 exp.Window, 6868 comments=comments, 6869 this=this, 6870 alias=self._parse_id_var(False), 6871 over=over, 6872 ) 6873 6874 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6875 6876 first = self._match(TokenType.FIRST) 6877 if self._match_text_seq("LAST"): 6878 first = False 6879 6880 partition, order = self._parse_partition_and_order() 6881 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6882 6883 if kind: 6884 self._match(TokenType.BETWEEN) 6885 start = self._parse_window_spec() 6886 self._match(TokenType.AND) 6887 end = self._parse_window_spec() 6888 6889 spec = self.expression( 6890 exp.WindowSpec, 6891 kind=kind, 6892 start=start["value"], 6893 start_side=start["side"], 6894 end=end["value"], 6895 end_side=end["side"], 6896 ) 6897 else: 6898 spec = None 6899 6900 self._match_r_paren() 6901 6902 window = self.expression( 6903 exp.Window, 6904 comments=comments, 6905 this=this, 6906 partition_by=partition, 6907 order=order, 6908 spec=spec, 6909 alias=window_alias, 6910 over=over, 6911 first=first, 6912 ) 6913 6914 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6915 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6916 return self._parse_window(window, alias=alias) 6917 6918 return window 6919 6920 def _parse_partition_and_order( 6921 self, 6922 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6923 return self._parse_partition_by(), self._parse_order() 6924 6925 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6926 self._match(TokenType.BETWEEN) 6927 6928 return { 6929 "value": ( 6930 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6931 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6932 or self._parse_bitwise() 6933 ), 6934 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6935 } 6936 6937 def _parse_alias( 6938 self, this: t.Optional[exp.Expression], explicit: bool = False 6939 ) -> t.Optional[exp.Expression]: 6940 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6941 # so this section tries to parse the clause version and if it fails, it treats the token 6942 # as an identifier (alias) 6943 if self._can_parse_limit_or_offset(): 6944 return this 6945 6946 any_token = self._match(TokenType.ALIAS) 6947 comments = self._prev_comments or [] 6948 6949 if explicit and not any_token: 6950 return this 6951 6952 if self._match(TokenType.L_PAREN): 6953 aliases = self.expression( 6954 exp.Aliases, 6955 comments=comments, 6956 this=this, 6957 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6958 ) 6959 self._match_r_paren(aliases) 6960 return aliases 6961 6962 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6963 self.STRING_ALIASES and self._parse_string_as_identifier() 6964 ) 6965 6966 if alias: 6967 comments.extend(alias.pop_comments()) 6968 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6969 column = this.this 6970 6971 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6972 if not this.comments and column and column.comments: 6973 this.comments = column.pop_comments() 6974 6975 return this 6976 6977 def _parse_id_var( 6978 self, 6979 any_token: bool = True, 6980 tokens: t.Optional[t.Collection[TokenType]] = None, 6981 ) -> t.Optional[exp.Expression]: 6982 expression = self._parse_identifier() 6983 if not expression and ( 6984 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6985 ): 6986 quoted = self._prev.token_type == TokenType.STRING 6987 expression = self._identifier_expression(quoted=quoted) 6988 6989 return expression 6990 6991 def _parse_string(self) -> t.Optional[exp.Expression]: 6992 if self._match_set(self.STRING_PARSERS): 6993 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6994 return self._parse_placeholder() 6995 6996 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6997 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6998 if output: 6999 output.update_positions(self._prev) 7000 return output 7001 7002 def _parse_number(self) -> t.Optional[exp.Expression]: 7003 if self._match_set(self.NUMERIC_PARSERS): 7004 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7005 return self._parse_placeholder() 7006 7007 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7008 if self._match(TokenType.IDENTIFIER): 7009 return self._identifier_expression(quoted=True) 7010 return self._parse_placeholder() 7011 7012 def _parse_var( 7013 self, 7014 any_token: bool = False, 7015 tokens: t.Optional[t.Collection[TokenType]] = None, 7016 upper: bool = False, 7017 ) -> t.Optional[exp.Expression]: 7018 if ( 7019 (any_token and self._advance_any()) 7020 or self._match(TokenType.VAR) 7021 or (self._match_set(tokens) if tokens else False) 7022 ): 7023 return self.expression( 7024 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7025 ) 7026 return self._parse_placeholder() 7027 7028 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7029 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7030 self._advance() 7031 return self._prev 7032 return None 7033 7034 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7035 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7036 7037 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7038 return self._parse_primary() or self._parse_var(any_token=True) 7039 7040 def _parse_null(self) -> t.Optional[exp.Expression]: 7041 if self._match_set(self.NULL_TOKENS): 7042 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7043 return self._parse_placeholder() 7044 7045 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7046 if self._match(TokenType.TRUE): 7047 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7048 if self._match(TokenType.FALSE): 7049 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7050 return self._parse_placeholder() 7051 7052 def _parse_star(self) -> t.Optional[exp.Expression]: 7053 if self._match(TokenType.STAR): 7054 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7055 return self._parse_placeholder() 7056 7057 def _parse_parameter(self) -> exp.Parameter: 7058 this = self._parse_identifier() or self._parse_primary_or_var() 7059 return self.expression(exp.Parameter, this=this) 7060 7061 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7062 if self._match_set(self.PLACEHOLDER_PARSERS): 7063 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7064 if placeholder: 7065 return placeholder 7066 self._advance(-1) 7067 return None 7068 7069 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7070 if not self._match_texts(keywords): 7071 return None 7072 if self._match(TokenType.L_PAREN, advance=False): 7073 return self._parse_wrapped_csv(self._parse_expression) 7074 7075 expression = self._parse_expression() 7076 return [expression] if expression else None 7077 7078 def _parse_csv( 7079 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7080 ) -> t.List[exp.Expression]: 7081 parse_result = parse_method() 7082 items = [parse_result] if parse_result is not None else [] 7083 7084 while self._match(sep): 7085 self._add_comments(parse_result) 7086 parse_result = parse_method() 7087 if parse_result is not None: 7088 items.append(parse_result) 7089 7090 return items 7091 7092 def _parse_tokens( 7093 self, parse_method: t.Callable, expressions: t.Dict 7094 ) -> t.Optional[exp.Expression]: 7095 this = parse_method() 7096 7097 while self._match_set(expressions): 7098 this = self.expression( 7099 expressions[self._prev.token_type], 7100 this=this, 7101 comments=self._prev_comments, 7102 expression=parse_method(), 7103 ) 7104 7105 return this 7106 7107 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7108 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7109 7110 def _parse_wrapped_csv( 7111 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7112 ) -> t.List[exp.Expression]: 7113 return self._parse_wrapped( 7114 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7115 ) 7116 7117 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7118 wrapped = self._match(TokenType.L_PAREN) 7119 if not wrapped and not optional: 7120 self.raise_error("Expecting (") 7121 parse_result = parse_method() 7122 if wrapped: 7123 self._match_r_paren() 7124 return parse_result 7125 7126 def _parse_expressions(self) -> t.List[exp.Expression]: 7127 return self._parse_csv(self._parse_expression) 7128 7129 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7130 return self._parse_select() or self._parse_set_operations( 7131 self._parse_alias(self._parse_assignment(), explicit=True) 7132 if alias 7133 else self._parse_assignment() 7134 ) 7135 7136 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7137 return self._parse_query_modifiers( 7138 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7139 ) 7140 7141 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7142 this = None 7143 if self._match_texts(self.TRANSACTION_KIND): 7144 this = self._prev.text 7145 7146 self._match_texts(("TRANSACTION", "WORK")) 7147 7148 modes = [] 7149 while True: 7150 mode = [] 7151 while self._match(TokenType.VAR): 7152 mode.append(self._prev.text) 7153 7154 if mode: 7155 modes.append(" ".join(mode)) 7156 if not self._match(TokenType.COMMA): 7157 break 7158 7159 return self.expression(exp.Transaction, this=this, modes=modes) 7160 7161 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7162 chain = None 7163 savepoint = None 7164 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7165 7166 self._match_texts(("TRANSACTION", "WORK")) 7167 7168 if self._match_text_seq("TO"): 7169 self._match_text_seq("SAVEPOINT") 7170 savepoint = self._parse_id_var() 7171 7172 if self._match(TokenType.AND): 7173 chain = not self._match_text_seq("NO") 7174 self._match_text_seq("CHAIN") 7175 7176 if is_rollback: 7177 return self.expression(exp.Rollback, savepoint=savepoint) 7178 7179 return self.expression(exp.Commit, chain=chain) 7180 7181 def _parse_refresh(self) -> exp.Refresh: 7182 self._match(TokenType.TABLE) 7183 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7184 7185 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7186 if not self._match_text_seq("ADD"): 7187 return None 7188 7189 self._match(TokenType.COLUMN) 7190 exists_column = self._parse_exists(not_=True) 7191 expression = self._parse_field_def() 7192 7193 if expression: 7194 expression.set("exists", exists_column) 7195 7196 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7197 if self._match_texts(("FIRST", "AFTER")): 7198 position = self._prev.text 7199 column_position = self.expression( 7200 exp.ColumnPosition, this=self._parse_column(), position=position 7201 ) 7202 expression.set("position", column_position) 7203 7204 return expression 7205 7206 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7207 drop = self._match(TokenType.DROP) and self._parse_drop() 7208 if drop and not isinstance(drop, exp.Command): 7209 drop.set("kind", drop.args.get("kind", "COLUMN")) 7210 return drop 7211 7212 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7213 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7214 return self.expression( 7215 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7216 ) 7217 7218 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7219 index = self._index - 1 7220 7221 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7222 return self._parse_csv( 7223 lambda: self.expression( 7224 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7225 ) 7226 ) 7227 7228 self._retreat(index) 7229 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7230 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7231 7232 if self._match_text_seq("ADD", "COLUMNS"): 7233 schema = self._parse_schema() 7234 if schema: 7235 return [schema] 7236 return [] 7237 7238 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7239 7240 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7241 if self._match_texts(self.ALTER_ALTER_PARSERS): 7242 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7243 7244 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7245 # keyword after ALTER we default to parsing this statement 7246 self._match(TokenType.COLUMN) 7247 column = self._parse_field(any_token=True) 7248 7249 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7250 return self.expression(exp.AlterColumn, this=column, drop=True) 7251 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7252 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7253 if self._match(TokenType.COMMENT): 7254 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7255 if self._match_text_seq("DROP", "NOT", "NULL"): 7256 return self.expression( 7257 exp.AlterColumn, 7258 this=column, 7259 drop=True, 7260 allow_null=True, 7261 ) 7262 if self._match_text_seq("SET", "NOT", "NULL"): 7263 return self.expression( 7264 exp.AlterColumn, 7265 this=column, 7266 allow_null=False, 7267 ) 7268 7269 if self._match_text_seq("SET", "VISIBLE"): 7270 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7271 if self._match_text_seq("SET", "INVISIBLE"): 7272 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7273 7274 self._match_text_seq("SET", "DATA") 7275 self._match_text_seq("TYPE") 7276 return self.expression( 7277 exp.AlterColumn, 7278 this=column, 7279 dtype=self._parse_types(), 7280 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7281 using=self._match(TokenType.USING) and self._parse_assignment(), 7282 ) 7283 7284 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7285 if self._match_texts(("ALL", "EVEN", "AUTO")): 7286 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7287 7288 self._match_text_seq("KEY", "DISTKEY") 7289 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7290 7291 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7292 if compound: 7293 self._match_text_seq("SORTKEY") 7294 7295 if self._match(TokenType.L_PAREN, advance=False): 7296 return self.expression( 7297 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7298 ) 7299 7300 self._match_texts(("AUTO", "NONE")) 7301 return self.expression( 7302 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7303 ) 7304 7305 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7306 index = self._index - 1 7307 7308 partition_exists = self._parse_exists() 7309 if self._match(TokenType.PARTITION, advance=False): 7310 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7311 7312 self._retreat(index) 7313 return self._parse_csv(self._parse_drop_column) 7314 7315 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7316 if self._match(TokenType.COLUMN): 7317 exists = self._parse_exists() 7318 old_column = self._parse_column() 7319 to = self._match_text_seq("TO") 7320 new_column = self._parse_column() 7321 7322 if old_column is None or to is None or new_column is None: 7323 return None 7324 7325 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7326 7327 self._match_text_seq("TO") 7328 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7329 7330 def _parse_alter_table_set(self) -> exp.AlterSet: 7331 alter_set = self.expression(exp.AlterSet) 7332 7333 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7334 "TABLE", "PROPERTIES" 7335 ): 7336 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7337 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7338 alter_set.set("expressions", [self._parse_assignment()]) 7339 elif self._match_texts(("LOGGED", "UNLOGGED")): 7340 alter_set.set("option", exp.var(self._prev.text.upper())) 7341 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7342 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7343 elif self._match_text_seq("LOCATION"): 7344 alter_set.set("location", self._parse_field()) 7345 elif self._match_text_seq("ACCESS", "METHOD"): 7346 alter_set.set("access_method", self._parse_field()) 7347 elif self._match_text_seq("TABLESPACE"): 7348 alter_set.set("tablespace", self._parse_field()) 7349 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7350 alter_set.set("file_format", [self._parse_field()]) 7351 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7352 alter_set.set("file_format", self._parse_wrapped_options()) 7353 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7354 alter_set.set("copy_options", self._parse_wrapped_options()) 7355 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7356 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7357 else: 7358 if self._match_text_seq("SERDE"): 7359 alter_set.set("serde", self._parse_field()) 7360 7361 alter_set.set("expressions", [self._parse_properties()]) 7362 7363 return alter_set 7364 7365 def _parse_alter(self) -> exp.Alter | exp.Command: 7366 start = self._prev 7367 7368 alter_token = self._match_set(self.ALTERABLES) and self._prev 7369 if not alter_token: 7370 return self._parse_as_command(start) 7371 7372 exists = self._parse_exists() 7373 only = self._match_text_seq("ONLY") 7374 this = self._parse_table(schema=True) 7375 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7376 7377 if self._next: 7378 self._advance() 7379 7380 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7381 if parser: 7382 actions = ensure_list(parser(self)) 7383 not_valid = self._match_text_seq("NOT", "VALID") 7384 options = self._parse_csv(self._parse_property) 7385 7386 if not self._curr and actions: 7387 return self.expression( 7388 exp.Alter, 7389 this=this, 7390 kind=alter_token.text.upper(), 7391 exists=exists, 7392 actions=actions, 7393 only=only, 7394 options=options, 7395 cluster=cluster, 7396 not_valid=not_valid, 7397 ) 7398 7399 return self._parse_as_command(start) 7400 7401 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7402 start = self._prev 7403 # https://duckdb.org/docs/sql/statements/analyze 7404 if not self._curr: 7405 return self.expression(exp.Analyze) 7406 7407 options = [] 7408 while self._match_texts(self.ANALYZE_STYLES): 7409 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7410 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7411 else: 7412 options.append(self._prev.text.upper()) 7413 7414 this: t.Optional[exp.Expression] = None 7415 inner_expression: t.Optional[exp.Expression] = None 7416 7417 kind = self._curr and self._curr.text.upper() 7418 7419 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7420 this = self._parse_table_parts() 7421 elif self._match_text_seq("TABLES"): 7422 if self._match_set((TokenType.FROM, TokenType.IN)): 7423 kind = f"{kind} {self._prev.text.upper()}" 7424 this = self._parse_table(schema=True, is_db_reference=True) 7425 elif self._match_text_seq("DATABASE"): 7426 this = self._parse_table(schema=True, is_db_reference=True) 7427 elif self._match_text_seq("CLUSTER"): 7428 this = self._parse_table() 7429 # Try matching inner expr keywords before fallback to parse table. 7430 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7431 kind = None 7432 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7433 else: 7434 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7435 kind = None 7436 this = self._parse_table_parts() 7437 7438 partition = self._try_parse(self._parse_partition) 7439 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7440 return self._parse_as_command(start) 7441 7442 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7443 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7444 "WITH", "ASYNC", "MODE" 7445 ): 7446 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7447 else: 7448 mode = None 7449 7450 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7451 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7452 7453 properties = self._parse_properties() 7454 return self.expression( 7455 exp.Analyze, 7456 kind=kind, 7457 this=this, 7458 mode=mode, 7459 partition=partition, 7460 properties=properties, 7461 expression=inner_expression, 7462 options=options, 7463 ) 7464 7465 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7466 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7467 this = None 7468 kind = self._prev.text.upper() 7469 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7470 expressions = [] 7471 7472 if not self._match_text_seq("STATISTICS"): 7473 self.raise_error("Expecting token STATISTICS") 7474 7475 if self._match_text_seq("NOSCAN"): 7476 this = "NOSCAN" 7477 elif self._match(TokenType.FOR): 7478 if self._match_text_seq("ALL", "COLUMNS"): 7479 this = "FOR ALL COLUMNS" 7480 if self._match_texts("COLUMNS"): 7481 this = "FOR COLUMNS" 7482 expressions = self._parse_csv(self._parse_column_reference) 7483 elif self._match_text_seq("SAMPLE"): 7484 sample = self._parse_number() 7485 expressions = [ 7486 self.expression( 7487 exp.AnalyzeSample, 7488 sample=sample, 7489 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7490 ) 7491 ] 7492 7493 return self.expression( 7494 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7495 ) 7496 7497 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7498 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7499 kind = None 7500 this = None 7501 expression: t.Optional[exp.Expression] = None 7502 if self._match_text_seq("REF", "UPDATE"): 7503 kind = "REF" 7504 this = "UPDATE" 7505 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7506 this = "UPDATE SET DANGLING TO NULL" 7507 elif self._match_text_seq("STRUCTURE"): 7508 kind = "STRUCTURE" 7509 if self._match_text_seq("CASCADE", "FAST"): 7510 this = "CASCADE FAST" 7511 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7512 ("ONLINE", "OFFLINE") 7513 ): 7514 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7515 expression = self._parse_into() 7516 7517 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7518 7519 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7520 this = self._prev.text.upper() 7521 if self._match_text_seq("COLUMNS"): 7522 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7523 return None 7524 7525 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7526 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7527 if self._match_text_seq("STATISTICS"): 7528 return self.expression(exp.AnalyzeDelete, kind=kind) 7529 return None 7530 7531 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7532 if self._match_text_seq("CHAINED", "ROWS"): 7533 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7534 return None 7535 7536 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7537 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7538 this = self._prev.text.upper() 7539 expression: t.Optional[exp.Expression] = None 7540 expressions = [] 7541 update_options = None 7542 7543 if self._match_text_seq("HISTOGRAM", "ON"): 7544 expressions = self._parse_csv(self._parse_column_reference) 7545 with_expressions = [] 7546 while self._match(TokenType.WITH): 7547 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7548 if self._match_texts(("SYNC", "ASYNC")): 7549 if self._match_text_seq("MODE", advance=False): 7550 with_expressions.append(f"{self._prev.text.upper()} MODE") 7551 self._advance() 7552 else: 7553 buckets = self._parse_number() 7554 if self._match_text_seq("BUCKETS"): 7555 with_expressions.append(f"{buckets} BUCKETS") 7556 if with_expressions: 7557 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7558 7559 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7560 TokenType.UPDATE, advance=False 7561 ): 7562 update_options = self._prev.text.upper() 7563 self._advance() 7564 elif self._match_text_seq("USING", "DATA"): 7565 expression = self.expression(exp.UsingData, this=self._parse_string()) 7566 7567 return self.expression( 7568 exp.AnalyzeHistogram, 7569 this=this, 7570 expressions=expressions, 7571 expression=expression, 7572 update_options=update_options, 7573 ) 7574 7575 def _parse_merge(self) -> exp.Merge: 7576 self._match(TokenType.INTO) 7577 target = self._parse_table() 7578 7579 if target and self._match(TokenType.ALIAS, advance=False): 7580 target.set("alias", self._parse_table_alias()) 7581 7582 self._match(TokenType.USING) 7583 using = self._parse_table() 7584 7585 self._match(TokenType.ON) 7586 on = self._parse_assignment() 7587 7588 return self.expression( 7589 exp.Merge, 7590 this=target, 7591 using=using, 7592 on=on, 7593 whens=self._parse_when_matched(), 7594 returning=self._parse_returning(), 7595 ) 7596 7597 def _parse_when_matched(self) -> exp.Whens: 7598 whens = [] 7599 7600 while self._match(TokenType.WHEN): 7601 matched = not self._match(TokenType.NOT) 7602 self._match_text_seq("MATCHED") 7603 source = ( 7604 False 7605 if self._match_text_seq("BY", "TARGET") 7606 else self._match_text_seq("BY", "SOURCE") 7607 ) 7608 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7609 7610 self._match(TokenType.THEN) 7611 7612 if self._match(TokenType.INSERT): 7613 this = self._parse_star() 7614 if this: 7615 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7616 else: 7617 then = self.expression( 7618 exp.Insert, 7619 this=exp.var("ROW") 7620 if self._match_text_seq("ROW") 7621 else self._parse_value(values=False), 7622 expression=self._match_text_seq("VALUES") and self._parse_value(), 7623 ) 7624 elif self._match(TokenType.UPDATE): 7625 expressions = self._parse_star() 7626 if expressions: 7627 then = self.expression(exp.Update, expressions=expressions) 7628 else: 7629 then = self.expression( 7630 exp.Update, 7631 expressions=self._match(TokenType.SET) 7632 and self._parse_csv(self._parse_equality), 7633 ) 7634 elif self._match(TokenType.DELETE): 7635 then = self.expression(exp.Var, this=self._prev.text) 7636 else: 7637 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7638 7639 whens.append( 7640 self.expression( 7641 exp.When, 7642 matched=matched, 7643 source=source, 7644 condition=condition, 7645 then=then, 7646 ) 7647 ) 7648 return self.expression(exp.Whens, expressions=whens) 7649 7650 def _parse_show(self) -> t.Optional[exp.Expression]: 7651 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7652 if parser: 7653 return parser(self) 7654 return self._parse_as_command(self._prev) 7655 7656 def _parse_set_item_assignment( 7657 self, kind: t.Optional[str] = None 7658 ) -> t.Optional[exp.Expression]: 7659 index = self._index 7660 7661 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7662 return self._parse_set_transaction(global_=kind == "GLOBAL") 7663 7664 left = self._parse_primary() or self._parse_column() 7665 assignment_delimiter = self._match_texts(("=", "TO")) 7666 7667 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7668 self._retreat(index) 7669 return None 7670 7671 right = self._parse_statement() or self._parse_id_var() 7672 if isinstance(right, (exp.Column, exp.Identifier)): 7673 right = exp.var(right.name) 7674 7675 this = self.expression(exp.EQ, this=left, expression=right) 7676 return self.expression(exp.SetItem, this=this, kind=kind) 7677 7678 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7679 self._match_text_seq("TRANSACTION") 7680 characteristics = self._parse_csv( 7681 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7682 ) 7683 return self.expression( 7684 exp.SetItem, 7685 expressions=characteristics, 7686 kind="TRANSACTION", 7687 **{"global": global_}, # type: ignore 7688 ) 7689 7690 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7691 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7692 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7693 7694 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7695 index = self._index 7696 set_ = self.expression( 7697 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7698 ) 7699 7700 if self._curr: 7701 self._retreat(index) 7702 return self._parse_as_command(self._prev) 7703 7704 return set_ 7705 7706 def _parse_var_from_options( 7707 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7708 ) -> t.Optional[exp.Var]: 7709 start = self._curr 7710 if not start: 7711 return None 7712 7713 option = start.text.upper() 7714 continuations = options.get(option) 7715 7716 index = self._index 7717 self._advance() 7718 for keywords in continuations or []: 7719 if isinstance(keywords, str): 7720 keywords = (keywords,) 7721 7722 if self._match_text_seq(*keywords): 7723 option = f"{option} {' '.join(keywords)}" 7724 break 7725 else: 7726 if continuations or continuations is None: 7727 if raise_unmatched: 7728 self.raise_error(f"Unknown option {option}") 7729 7730 self._retreat(index) 7731 return None 7732 7733 return exp.var(option) 7734 7735 def _parse_as_command(self, start: Token) -> exp.Command: 7736 while self._curr: 7737 self._advance() 7738 text = self._find_sql(start, self._prev) 7739 size = len(start.text) 7740 self._warn_unsupported() 7741 return exp.Command(this=text[:size], expression=text[size:]) 7742 7743 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7744 settings = [] 7745 7746 self._match_l_paren() 7747 kind = self._parse_id_var() 7748 7749 if self._match(TokenType.L_PAREN): 7750 while True: 7751 key = self._parse_id_var() 7752 value = self._parse_primary() 7753 if not key and value is None: 7754 break 7755 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7756 self._match(TokenType.R_PAREN) 7757 7758 self._match_r_paren() 7759 7760 return self.expression( 7761 exp.DictProperty, 7762 this=this, 7763 kind=kind.this if kind else None, 7764 settings=settings, 7765 ) 7766 7767 def _parse_dict_range(self, this: str) -> exp.DictRange: 7768 self._match_l_paren() 7769 has_min = self._match_text_seq("MIN") 7770 if has_min: 7771 min = self._parse_var() or self._parse_primary() 7772 self._match_text_seq("MAX") 7773 max = self._parse_var() or self._parse_primary() 7774 else: 7775 max = self._parse_var() or self._parse_primary() 7776 min = exp.Literal.number(0) 7777 self._match_r_paren() 7778 return self.expression(exp.DictRange, this=this, min=min, max=max) 7779 7780 def _parse_comprehension( 7781 self, this: t.Optional[exp.Expression] 7782 ) -> t.Optional[exp.Comprehension]: 7783 index = self._index 7784 expression = self._parse_column() 7785 if not self._match(TokenType.IN): 7786 self._retreat(index - 1) 7787 return None 7788 iterator = self._parse_column() 7789 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7790 return self.expression( 7791 exp.Comprehension, 7792 this=this, 7793 expression=expression, 7794 iterator=iterator, 7795 condition=condition, 7796 ) 7797 7798 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7799 if self._match(TokenType.HEREDOC_STRING): 7800 return self.expression(exp.Heredoc, this=self._prev.text) 7801 7802 if not self._match_text_seq("$"): 7803 return None 7804 7805 tags = ["$"] 7806 tag_text = None 7807 7808 if self._is_connected(): 7809 self._advance() 7810 tags.append(self._prev.text.upper()) 7811 else: 7812 self.raise_error("No closing $ found") 7813 7814 if tags[-1] != "$": 7815 if self._is_connected() and self._match_text_seq("$"): 7816 tag_text = tags[-1] 7817 tags.append("$") 7818 else: 7819 self.raise_error("No closing $ found") 7820 7821 heredoc_start = self._curr 7822 7823 while self._curr: 7824 if self._match_text_seq(*tags, advance=False): 7825 this = self._find_sql(heredoc_start, self._prev) 7826 self._advance(len(tags)) 7827 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7828 7829 self._advance() 7830 7831 self.raise_error(f"No closing {''.join(tags)} found") 7832 return None 7833 7834 def _find_parser( 7835 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7836 ) -> t.Optional[t.Callable]: 7837 if not self._curr: 7838 return None 7839 7840 index = self._index 7841 this = [] 7842 while True: 7843 # The current token might be multiple words 7844 curr = self._curr.text.upper() 7845 key = curr.split(" ") 7846 this.append(curr) 7847 7848 self._advance() 7849 result, trie = in_trie(trie, key) 7850 if result == TrieResult.FAILED: 7851 break 7852 7853 if result == TrieResult.EXISTS: 7854 subparser = parsers[" ".join(this)] 7855 return subparser 7856 7857 self._retreat(index) 7858 return None 7859 7860 def _match(self, token_type, advance=True, expression=None): 7861 if not self._curr: 7862 return None 7863 7864 if self._curr.token_type == token_type: 7865 if advance: 7866 self._advance() 7867 self._add_comments(expression) 7868 return True 7869 7870 return None 7871 7872 def _match_set(self, types, advance=True): 7873 if not self._curr: 7874 return None 7875 7876 if self._curr.token_type in types: 7877 if advance: 7878 self._advance() 7879 return True 7880 7881 return None 7882 7883 def _match_pair(self, token_type_a, token_type_b, advance=True): 7884 if not self._curr or not self._next: 7885 return None 7886 7887 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7888 if advance: 7889 self._advance(2) 7890 return True 7891 7892 return None 7893 7894 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7895 if not self._match(TokenType.L_PAREN, expression=expression): 7896 self.raise_error("Expecting (") 7897 7898 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7899 if not self._match(TokenType.R_PAREN, expression=expression): 7900 self.raise_error("Expecting )") 7901 7902 def _match_texts(self, texts, advance=True): 7903 if ( 7904 self._curr 7905 and self._curr.token_type != TokenType.STRING 7906 and self._curr.text.upper() in texts 7907 ): 7908 if advance: 7909 self._advance() 7910 return True 7911 return None 7912 7913 def _match_text_seq(self, *texts, advance=True): 7914 index = self._index 7915 for text in texts: 7916 if ( 7917 self._curr 7918 and self._curr.token_type != TokenType.STRING 7919 and self._curr.text.upper() == text 7920 ): 7921 self._advance() 7922 else: 7923 self._retreat(index) 7924 return None 7925 7926 if not advance: 7927 self._retreat(index) 7928 7929 return True 7930 7931 def _replace_lambda( 7932 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7933 ) -> t.Optional[exp.Expression]: 7934 if not node: 7935 return node 7936 7937 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7938 7939 for column in node.find_all(exp.Column): 7940 typ = lambda_types.get(column.parts[0].name) 7941 if typ is not None: 7942 dot_or_id = column.to_dot() if column.table else column.this 7943 7944 if typ: 7945 dot_or_id = self.expression( 7946 exp.Cast, 7947 this=dot_or_id, 7948 to=typ, 7949 ) 7950 7951 parent = column.parent 7952 7953 while isinstance(parent, exp.Dot): 7954 if not isinstance(parent.parent, exp.Dot): 7955 parent.replace(dot_or_id) 7956 break 7957 parent = parent.parent 7958 else: 7959 if column is node: 7960 node = dot_or_id 7961 else: 7962 column.replace(dot_or_id) 7963 return node 7964 7965 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7966 start = self._prev 7967 7968 # Not to be confused with TRUNCATE(number, decimals) function call 7969 if self._match(TokenType.L_PAREN): 7970 self._retreat(self._index - 2) 7971 return self._parse_function() 7972 7973 # Clickhouse supports TRUNCATE DATABASE as well 7974 is_database = self._match(TokenType.DATABASE) 7975 7976 self._match(TokenType.TABLE) 7977 7978 exists = self._parse_exists(not_=False) 7979 7980 expressions = self._parse_csv( 7981 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7982 ) 7983 7984 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7985 7986 if self._match_text_seq("RESTART", "IDENTITY"): 7987 identity = "RESTART" 7988 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7989 identity = "CONTINUE" 7990 else: 7991 identity = None 7992 7993 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7994 option = self._prev.text 7995 else: 7996 option = None 7997 7998 partition = self._parse_partition() 7999 8000 # Fallback case 8001 if self._curr: 8002 return self._parse_as_command(start) 8003 8004 return self.expression( 8005 exp.TruncateTable, 8006 expressions=expressions, 8007 is_database=is_database, 8008 exists=exists, 8009 cluster=cluster, 8010 identity=identity, 8011 option=option, 8012 partition=partition, 8013 ) 8014 8015 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8016 this = self._parse_ordered(self._parse_opclass) 8017 8018 if not self._match(TokenType.WITH): 8019 return this 8020 8021 op = self._parse_var(any_token=True) 8022 8023 return self.expression(exp.WithOperator, this=this, op=op) 8024 8025 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8026 self._match(TokenType.EQ) 8027 self._match(TokenType.L_PAREN) 8028 8029 opts: t.List[t.Optional[exp.Expression]] = [] 8030 option: exp.Expression | None 8031 while self._curr and not self._match(TokenType.R_PAREN): 8032 if self._match_text_seq("FORMAT_NAME", "="): 8033 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8034 option = self._parse_format_name() 8035 else: 8036 option = self._parse_property() 8037 8038 if option is None: 8039 self.raise_error("Unable to parse option") 8040 break 8041 8042 opts.append(option) 8043 8044 return opts 8045 8046 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8047 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8048 8049 options = [] 8050 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8051 option = self._parse_var(any_token=True) 8052 prev = self._prev.text.upper() 8053 8054 # Different dialects might separate options and values by white space, "=" and "AS" 8055 self._match(TokenType.EQ) 8056 self._match(TokenType.ALIAS) 8057 8058 param = self.expression(exp.CopyParameter, this=option) 8059 8060 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8061 TokenType.L_PAREN, advance=False 8062 ): 8063 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8064 param.set("expressions", self._parse_wrapped_options()) 8065 elif prev == "FILE_FORMAT": 8066 # T-SQL's external file format case 8067 param.set("expression", self._parse_field()) 8068 else: 8069 param.set("expression", self._parse_unquoted_field()) 8070 8071 options.append(param) 8072 self._match(sep) 8073 8074 return options 8075 8076 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8077 expr = self.expression(exp.Credentials) 8078 8079 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8080 expr.set("storage", self._parse_field()) 8081 if self._match_text_seq("CREDENTIALS"): 8082 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8083 creds = ( 8084 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8085 ) 8086 expr.set("credentials", creds) 8087 if self._match_text_seq("ENCRYPTION"): 8088 expr.set("encryption", self._parse_wrapped_options()) 8089 if self._match_text_seq("IAM_ROLE"): 8090 expr.set("iam_role", self._parse_field()) 8091 if self._match_text_seq("REGION"): 8092 expr.set("region", self._parse_field()) 8093 8094 return expr 8095 8096 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8097 return self._parse_field() 8098 8099 def _parse_copy(self) -> exp.Copy | exp.Command: 8100 start = self._prev 8101 8102 self._match(TokenType.INTO) 8103 8104 this = ( 8105 self._parse_select(nested=True, parse_subquery_alias=False) 8106 if self._match(TokenType.L_PAREN, advance=False) 8107 else self._parse_table(schema=True) 8108 ) 8109 8110 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8111 8112 files = self._parse_csv(self._parse_file_location) 8113 credentials = self._parse_credentials() 8114 8115 self._match_text_seq("WITH") 8116 8117 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8118 8119 # Fallback case 8120 if self._curr: 8121 return self._parse_as_command(start) 8122 8123 return self.expression( 8124 exp.Copy, 8125 this=this, 8126 kind=kind, 8127 credentials=credentials, 8128 files=files, 8129 params=params, 8130 ) 8131 8132 def _parse_normalize(self) -> exp.Normalize: 8133 return self.expression( 8134 exp.Normalize, 8135 this=self._parse_bitwise(), 8136 form=self._match(TokenType.COMMA) and self._parse_var(), 8137 ) 8138 8139 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8140 args = self._parse_csv(lambda: self._parse_lambda()) 8141 8142 this = seq_get(args, 0) 8143 decimals = seq_get(args, 1) 8144 8145 return expr_type( 8146 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8147 ) 8148 8149 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8150 if self._match_text_seq("COLUMNS", "(", advance=False): 8151 this = self._parse_function() 8152 if isinstance(this, exp.Columns): 8153 this.set("unpack", True) 8154 return this 8155 8156 return self.expression( 8157 exp.Star, 8158 **{ # type: ignore 8159 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8160 "replace": self._parse_star_op("REPLACE"), 8161 "rename": self._parse_star_op("RENAME"), 8162 }, 8163 ) 8164 8165 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8166 privilege_parts = [] 8167 8168 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8169 # (end of privilege list) or L_PAREN (start of column list) are met 8170 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8171 privilege_parts.append(self._curr.text.upper()) 8172 self._advance() 8173 8174 this = exp.var(" ".join(privilege_parts)) 8175 expressions = ( 8176 self._parse_wrapped_csv(self._parse_column) 8177 if self._match(TokenType.L_PAREN, advance=False) 8178 else None 8179 ) 8180 8181 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8182 8183 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8184 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8185 principal = self._parse_id_var() 8186 8187 if not principal: 8188 return None 8189 8190 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8191 8192 def _parse_grant(self) -> exp.Grant | exp.Command: 8193 start = self._prev 8194 8195 privileges = self._parse_csv(self._parse_grant_privilege) 8196 8197 self._match(TokenType.ON) 8198 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8199 8200 # Attempt to parse the securable e.g. MySQL allows names 8201 # such as "foo.*", "*.*" which are not easily parseable yet 8202 securable = self._try_parse(self._parse_table_parts) 8203 8204 if not securable or not self._match_text_seq("TO"): 8205 return self._parse_as_command(start) 8206 8207 principals = self._parse_csv(self._parse_grant_principal) 8208 8209 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8210 8211 if self._curr: 8212 return self._parse_as_command(start) 8213 8214 return self.expression( 8215 exp.Grant, 8216 privileges=privileges, 8217 kind=kind, 8218 securable=securable, 8219 principals=principals, 8220 grant_option=grant_option, 8221 ) 8222 8223 def _parse_overlay(self) -> exp.Overlay: 8224 return self.expression( 8225 exp.Overlay, 8226 **{ # type: ignore 8227 "this": self._parse_bitwise(), 8228 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8229 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8230 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8231 }, 8232 ) 8233 8234 def _parse_format_name(self) -> exp.Property: 8235 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8236 # for FILE_FORMAT = <format_name> 8237 return self.expression( 8238 exp.Property, 8239 this=exp.var("FORMAT_NAME"), 8240 value=self._parse_string() or self._parse_table_parts(), 8241 ) 8242 8243 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8244 args: t.List[exp.Expression] = [] 8245 8246 if self._match(TokenType.DISTINCT): 8247 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8248 self._match(TokenType.COMMA) 8249 8250 args.extend(self._parse_csv(self._parse_assignment)) 8251 8252 return self.expression( 8253 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8254 ) 8255 8256 def _identifier_expression( 8257 self, token: t.Optional[Token] = None, **kwargs: t.Any 8258 ) -> exp.Identifier: 8259 token = token or self._prev 8260 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8261 expression.update_positions(token) 8262 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1511 def __init__( 1512 self, 1513 error_level: t.Optional[ErrorLevel] = None, 1514 error_message_context: int = 100, 1515 max_errors: int = 3, 1516 dialect: DialectType = None, 1517 ): 1518 from sqlglot.dialects import Dialect 1519 1520 self.error_level = error_level or ErrorLevel.IMMEDIATE 1521 self.error_message_context = error_message_context 1522 self.max_errors = max_errors 1523 self.dialect = Dialect.get_or_raise(dialect) 1524 self.reset()
1536 def parse( 1537 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1538 ) -> t.List[t.Optional[exp.Expression]]: 1539 """ 1540 Parses a list of tokens and returns a list of syntax trees, one tree 1541 per parsed SQL statement. 1542 1543 Args: 1544 raw_tokens: The list of tokens. 1545 sql: The original SQL string, used to produce helpful debug messages. 1546 1547 Returns: 1548 The list of the produced syntax trees. 1549 """ 1550 return self._parse( 1551 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1552 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1554 def parse_into( 1555 self, 1556 expression_types: exp.IntoType, 1557 raw_tokens: t.List[Token], 1558 sql: t.Optional[str] = None, 1559 ) -> t.List[t.Optional[exp.Expression]]: 1560 """ 1561 Parses a list of tokens into a given Expression type. If a collection of Expression 1562 types is given instead, this method will try to parse the token list into each one 1563 of them, stopping at the first for which the parsing succeeds. 1564 1565 Args: 1566 expression_types: The expression type(s) to try and parse the token list into. 1567 raw_tokens: The list of tokens. 1568 sql: The original SQL string, used to produce helpful debug messages. 1569 1570 Returns: 1571 The target Expression. 1572 """ 1573 errors = [] 1574 for expression_type in ensure_list(expression_types): 1575 parser = self.EXPRESSION_PARSERS.get(expression_type) 1576 if not parser: 1577 raise TypeError(f"No parser registered for {expression_type}") 1578 1579 try: 1580 return self._parse(parser, raw_tokens, sql) 1581 except ParseError as e: 1582 e.errors[0]["into_expression"] = expression_type 1583 errors.append(e) 1584 1585 raise ParseError( 1586 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1587 errors=merge_errors(errors), 1588 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1628 def check_errors(self) -> None: 1629 """Logs or raises any found errors, depending on the chosen error level setting.""" 1630 if self.error_level == ErrorLevel.WARN: 1631 for error in self.errors: 1632 logger.error(str(error)) 1633 elif self.error_level == ErrorLevel.RAISE and self.errors: 1634 raise ParseError( 1635 concat_messages(self.errors, self.max_errors), 1636 errors=merge_errors(self.errors), 1637 )
Logs or raises any found errors, depending on the chosen error level setting.
1639 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1640 """ 1641 Appends an error in the list of recorded errors or raises it, depending on the chosen 1642 error level setting. 1643 """ 1644 token = token or self._curr or self._prev or Token.string("") 1645 start = token.start 1646 end = token.end + 1 1647 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1648 highlight = self.sql[start:end] 1649 end_context = self.sql[end : end + self.error_message_context] 1650 1651 error = ParseError.new( 1652 f"{message}. Line {token.line}, Col: {token.col}.\n" 1653 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1654 description=message, 1655 line=token.line, 1656 col=token.col, 1657 start_context=start_context, 1658 highlight=highlight, 1659 end_context=end_context, 1660 ) 1661 1662 if self.error_level == ErrorLevel.IMMEDIATE: 1663 raise error 1664 1665 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1667 def expression( 1668 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1669 ) -> E: 1670 """ 1671 Creates a new, validated Expression. 1672 1673 Args: 1674 exp_class: The expression class to instantiate. 1675 comments: An optional list of comments to attach to the expression. 1676 kwargs: The arguments to set for the expression along with their respective values. 1677 1678 Returns: 1679 The target expression. 1680 """ 1681 instance = exp_class(**kwargs) 1682 instance.add_comments(comments) if comments else self._add_comments(instance) 1683 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1690 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1691 """ 1692 Validates an Expression, making sure that all its mandatory arguments are set. 1693 1694 Args: 1695 expression: The expression to validate. 1696 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1697 1698 Returns: 1699 The validated expression. 1700 """ 1701 if self.error_level != ErrorLevel.IGNORE: 1702 for error_message in expression.error_messages(args): 1703 self.raise_error(error_message) 1704 1705 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4672 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4673 start = self._index 4674 _, side_token, kind_token = self._parse_join_parts() 4675 4676 side = side_token.text if side_token else None 4677 kind = kind_token.text if kind_token else None 4678 4679 if not self._match_set(self.SET_OPERATIONS): 4680 self._retreat(start) 4681 return None 4682 4683 token_type = self._prev.token_type 4684 4685 if token_type == TokenType.UNION: 4686 operation: t.Type[exp.SetOperation] = exp.Union 4687 elif token_type == TokenType.EXCEPT: 4688 operation = exp.Except 4689 else: 4690 operation = exp.Intersect 4691 4692 comments = self._prev.comments 4693 4694 if self._match(TokenType.DISTINCT): 4695 distinct: t.Optional[bool] = True 4696 elif self._match(TokenType.ALL): 4697 distinct = False 4698 else: 4699 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4700 if distinct is None: 4701 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4702 4703 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4704 "STRICT", "CORRESPONDING" 4705 ) 4706 if self._match_text_seq("CORRESPONDING"): 4707 by_name = True 4708 if not side and not kind: 4709 kind = "INNER" 4710 4711 on_column_list = None 4712 if by_name and self._match_texts(("ON", "BY")): 4713 on_column_list = self._parse_wrapped_csv(self._parse_column) 4714 4715 expression = self._parse_select(nested=True, parse_set_operation=False) 4716 4717 return self.expression( 4718 operation, 4719 comments=comments, 4720 this=this, 4721 distinct=distinct, 4722 by_name=by_name, 4723 expression=expression, 4724 side=side, 4725 kind=kind, 4726 on=on_column_list, 4727 )