sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.VOID, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NOTHING, 410 TokenType.NULL, 411 TokenType.NAME, 412 TokenType.TDIGEST, 413 TokenType.DYNAMIC, 414 *ENUM_TYPE_TOKENS, 415 *NESTED_TYPE_TOKENS, 416 *AGGREGATE_TYPE_TOKENS, 417 } 418 419 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 420 TokenType.BIGINT: TokenType.UBIGINT, 421 TokenType.INT: TokenType.UINT, 422 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 423 TokenType.SMALLINT: TokenType.USMALLINT, 424 TokenType.TINYINT: TokenType.UTINYINT, 425 TokenType.DECIMAL: TokenType.UDECIMAL, 426 TokenType.DOUBLE: TokenType.UDOUBLE, 427 } 428 429 SUBQUERY_PREDICATES = { 430 TokenType.ANY: exp.Any, 431 TokenType.ALL: exp.All, 432 TokenType.EXISTS: exp.Exists, 433 TokenType.SOME: exp.Any, 434 } 435 436 RESERVED_TOKENS = { 437 *Tokenizer.SINGLE_TOKENS.values(), 438 TokenType.SELECT, 439 } - {TokenType.IDENTIFIER} 440 441 DB_CREATABLES = { 442 TokenType.DATABASE, 443 TokenType.DICTIONARY, 444 TokenType.FILE_FORMAT, 445 TokenType.MODEL, 446 TokenType.NAMESPACE, 447 TokenType.SCHEMA, 448 TokenType.SEQUENCE, 449 TokenType.SINK, 450 TokenType.SOURCE, 451 TokenType.STAGE, 452 TokenType.STORAGE_INTEGRATION, 453 TokenType.STREAMLIT, 454 TokenType.TABLE, 455 TokenType.TAG, 456 TokenType.VIEW, 457 TokenType.WAREHOUSE, 458 } 459 460 CREATABLES = { 461 TokenType.COLUMN, 462 TokenType.CONSTRAINT, 463 TokenType.FOREIGN_KEY, 464 TokenType.FUNCTION, 465 TokenType.INDEX, 466 TokenType.PROCEDURE, 467 *DB_CREATABLES, 468 } 469 470 ALTERABLES = { 471 TokenType.INDEX, 472 TokenType.TABLE, 473 TokenType.VIEW, 474 } 475 476 # Tokens that can represent identifiers 477 ID_VAR_TOKENS = { 478 TokenType.ALL, 479 TokenType.ATTACH, 480 TokenType.VAR, 481 TokenType.ANTI, 482 TokenType.APPLY, 483 TokenType.ASC, 484 TokenType.ASOF, 485 TokenType.AUTO_INCREMENT, 486 TokenType.BEGIN, 487 TokenType.BPCHAR, 488 TokenType.CACHE, 489 TokenType.CASE, 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.COMMENT, 493 TokenType.COMMIT, 494 TokenType.CONSTRAINT, 495 TokenType.COPY, 496 TokenType.CUBE, 497 TokenType.CURRENT_SCHEMA, 498 TokenType.DEFAULT, 499 TokenType.DELETE, 500 TokenType.DESC, 501 TokenType.DESCRIBE, 502 TokenType.DETACH, 503 TokenType.DICTIONARY, 504 TokenType.DIV, 505 TokenType.END, 506 TokenType.EXECUTE, 507 TokenType.EXPORT, 508 TokenType.ESCAPE, 509 TokenType.FALSE, 510 TokenType.FIRST, 511 TokenType.FILTER, 512 TokenType.FINAL, 513 TokenType.FORMAT, 514 TokenType.FULL, 515 TokenType.IDENTIFIER, 516 TokenType.IS, 517 TokenType.ISNULL, 518 TokenType.INTERVAL, 519 TokenType.KEEP, 520 TokenType.KILL, 521 TokenType.LEFT, 522 TokenType.LIMIT, 523 TokenType.LOAD, 524 TokenType.MERGE, 525 TokenType.NATURAL, 526 TokenType.NEXT, 527 TokenType.OFFSET, 528 TokenType.OPERATOR, 529 TokenType.ORDINALITY, 530 TokenType.OVERLAPS, 531 TokenType.OVERWRITE, 532 TokenType.PARTITION, 533 TokenType.PERCENT, 534 TokenType.PIVOT, 535 TokenType.PRAGMA, 536 TokenType.PUT, 537 TokenType.RANGE, 538 TokenType.RECURSIVE, 539 TokenType.REFERENCES, 540 TokenType.REFRESH, 541 TokenType.RENAME, 542 TokenType.REPLACE, 543 TokenType.RIGHT, 544 TokenType.ROLLUP, 545 TokenType.ROW, 546 TokenType.ROWS, 547 TokenType.SEMI, 548 TokenType.SET, 549 TokenType.SETTINGS, 550 TokenType.SHOW, 551 TokenType.TEMPORARY, 552 TokenType.TOP, 553 TokenType.TRUE, 554 TokenType.TRUNCATE, 555 TokenType.UNIQUE, 556 TokenType.UNNEST, 557 TokenType.UNPIVOT, 558 TokenType.UPDATE, 559 TokenType.USE, 560 TokenType.VOLATILE, 561 TokenType.WINDOW, 562 *CREATABLES, 563 *SUBQUERY_PREDICATES, 564 *TYPE_TOKENS, 565 *NO_PAREN_FUNCTIONS, 566 } 567 ID_VAR_TOKENS.remove(TokenType.UNION) 568 569 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 570 TokenType.ANTI, 571 TokenType.APPLY, 572 TokenType.ASOF, 573 TokenType.FULL, 574 TokenType.LEFT, 575 TokenType.LOCK, 576 TokenType.NATURAL, 577 TokenType.RIGHT, 578 TokenType.SEMI, 579 TokenType.WINDOW, 580 } 581 582 ALIAS_TOKENS = ID_VAR_TOKENS 583 584 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 585 586 ARRAY_CONSTRUCTORS = { 587 "ARRAY": exp.Array, 588 "LIST": exp.List, 589 } 590 591 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 592 593 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 594 595 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 596 597 FUNC_TOKENS = { 598 TokenType.COLLATE, 599 TokenType.COMMAND, 600 TokenType.CURRENT_DATE, 601 TokenType.CURRENT_DATETIME, 602 TokenType.CURRENT_SCHEMA, 603 TokenType.CURRENT_TIMESTAMP, 604 TokenType.CURRENT_TIME, 605 TokenType.CURRENT_USER, 606 TokenType.FILTER, 607 TokenType.FIRST, 608 TokenType.FORMAT, 609 TokenType.GLOB, 610 TokenType.IDENTIFIER, 611 TokenType.INDEX, 612 TokenType.ISNULL, 613 TokenType.ILIKE, 614 TokenType.INSERT, 615 TokenType.LIKE, 616 TokenType.MERGE, 617 TokenType.NEXT, 618 TokenType.OFFSET, 619 TokenType.PRIMARY_KEY, 620 TokenType.RANGE, 621 TokenType.REPLACE, 622 TokenType.RLIKE, 623 TokenType.ROW, 624 TokenType.UNNEST, 625 TokenType.VAR, 626 TokenType.LEFT, 627 TokenType.RIGHT, 628 TokenType.SEQUENCE, 629 TokenType.DATE, 630 TokenType.DATETIME, 631 TokenType.TABLE, 632 TokenType.TIMESTAMP, 633 TokenType.TIMESTAMPTZ, 634 TokenType.TRUNCATE, 635 TokenType.WINDOW, 636 TokenType.XOR, 637 *TYPE_TOKENS, 638 *SUBQUERY_PREDICATES, 639 } 640 641 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 642 TokenType.AND: exp.And, 643 } 644 645 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.COLON_EQ: exp.PropertyEQ, 647 } 648 649 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.OR: exp.Or, 651 } 652 653 EQUALITY = { 654 TokenType.EQ: exp.EQ, 655 TokenType.NEQ: exp.NEQ, 656 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 657 } 658 659 COMPARISON = { 660 TokenType.GT: exp.GT, 661 TokenType.GTE: exp.GTE, 662 TokenType.LT: exp.LT, 663 TokenType.LTE: exp.LTE, 664 } 665 666 BITWISE = { 667 TokenType.AMP: exp.BitwiseAnd, 668 TokenType.CARET: exp.BitwiseXor, 669 TokenType.PIPE: exp.BitwiseOr, 670 } 671 672 TERM = { 673 TokenType.DASH: exp.Sub, 674 TokenType.PLUS: exp.Add, 675 TokenType.MOD: exp.Mod, 676 TokenType.COLLATE: exp.Collate, 677 } 678 679 FACTOR = { 680 TokenType.DIV: exp.IntDiv, 681 TokenType.LR_ARROW: exp.Distance, 682 TokenType.SLASH: exp.Div, 683 TokenType.STAR: exp.Mul, 684 } 685 686 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 687 688 TIMES = { 689 TokenType.TIME, 690 TokenType.TIMETZ, 691 } 692 693 TIMESTAMPS = { 694 TokenType.TIMESTAMP, 695 TokenType.TIMESTAMPNTZ, 696 TokenType.TIMESTAMPTZ, 697 TokenType.TIMESTAMPLTZ, 698 *TIMES, 699 } 700 701 SET_OPERATIONS = { 702 TokenType.UNION, 703 TokenType.INTERSECT, 704 TokenType.EXCEPT, 705 } 706 707 JOIN_METHODS = { 708 TokenType.ASOF, 709 TokenType.NATURAL, 710 TokenType.POSITIONAL, 711 } 712 713 JOIN_SIDES = { 714 TokenType.LEFT, 715 TokenType.RIGHT, 716 TokenType.FULL, 717 } 718 719 JOIN_KINDS = { 720 TokenType.ANTI, 721 TokenType.CROSS, 722 TokenType.INNER, 723 TokenType.OUTER, 724 TokenType.SEMI, 725 TokenType.STRAIGHT_JOIN, 726 } 727 728 JOIN_HINTS: t.Set[str] = set() 729 730 LAMBDAS = { 731 TokenType.ARROW: lambda self, expressions: self.expression( 732 exp.Lambda, 733 this=self._replace_lambda( 734 self._parse_assignment(), 735 expressions, 736 ), 737 expressions=expressions, 738 ), 739 TokenType.FARROW: lambda self, expressions: self.expression( 740 exp.Kwarg, 741 this=exp.var(expressions[0].name), 742 expression=self._parse_assignment(), 743 ), 744 } 745 746 COLUMN_OPERATORS = { 747 TokenType.DOT: None, 748 TokenType.DOTCOLON: lambda self, this, to: self.expression( 749 exp.JSONCast, 750 this=this, 751 to=to, 752 ), 753 TokenType.DCOLON: lambda self, this, to: self.expression( 754 exp.Cast if self.STRICT_CAST else exp.TryCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.ARROW: lambda self, this, path: self.expression( 759 exp.JSONExtract, 760 this=this, 761 expression=self.dialect.to_json_path(path), 762 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 763 ), 764 TokenType.DARROW: lambda self, this, path: self.expression( 765 exp.JSONExtractScalar, 766 this=this, 767 expression=self.dialect.to_json_path(path), 768 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 769 ), 770 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 771 exp.JSONBExtract, 772 this=this, 773 expression=path, 774 ), 775 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtractScalar, 777 this=this, 778 expression=path, 779 ), 780 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 781 exp.JSONBContains, 782 this=this, 783 expression=key, 784 ), 785 } 786 787 EXPRESSION_PARSERS = { 788 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 789 exp.Column: lambda self: self._parse_column(), 790 exp.Condition: lambda self: self._parse_assignment(), 791 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 792 exp.Expression: lambda self: self._parse_expression(), 793 exp.From: lambda self: self._parse_from(joins=True), 794 exp.Group: lambda self: self._parse_group(), 795 exp.Having: lambda self: self._parse_having(), 796 exp.Hint: lambda self: self._parse_hint_body(), 797 exp.Identifier: lambda self: self._parse_id_var(), 798 exp.Join: lambda self: self._parse_join(), 799 exp.Lambda: lambda self: self._parse_lambda(), 800 exp.Lateral: lambda self: self._parse_lateral(), 801 exp.Limit: lambda self: self._parse_limit(), 802 exp.Offset: lambda self: self._parse_offset(), 803 exp.Order: lambda self: self._parse_order(), 804 exp.Ordered: lambda self: self._parse_ordered(), 805 exp.Properties: lambda self: self._parse_properties(), 806 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 807 exp.Qualify: lambda self: self._parse_qualify(), 808 exp.Returning: lambda self: self._parse_returning(), 809 exp.Select: lambda self: self._parse_select(), 810 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 811 exp.Table: lambda self: self._parse_table_parts(), 812 exp.TableAlias: lambda self: self._parse_table_alias(), 813 exp.Tuple: lambda self: self._parse_value(values=False), 814 exp.Whens: lambda self: self._parse_when_matched(), 815 exp.Where: lambda self: self._parse_where(), 816 exp.Window: lambda self: self._parse_named_window(), 817 exp.With: lambda self: self._parse_with(), 818 "JOIN_TYPE": lambda self: self._parse_join_parts(), 819 } 820 821 STATEMENT_PARSERS = { 822 TokenType.ALTER: lambda self: self._parse_alter(), 823 TokenType.ANALYZE: lambda self: self._parse_analyze(), 824 TokenType.BEGIN: lambda self: self._parse_transaction(), 825 TokenType.CACHE: lambda self: self._parse_cache(), 826 TokenType.COMMENT: lambda self: self._parse_comment(), 827 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 828 TokenType.COPY: lambda self: self._parse_copy(), 829 TokenType.CREATE: lambda self: self._parse_create(), 830 TokenType.DELETE: lambda self: self._parse_delete(), 831 TokenType.DESC: lambda self: self._parse_describe(), 832 TokenType.DESCRIBE: lambda self: self._parse_describe(), 833 TokenType.DROP: lambda self: self._parse_drop(), 834 TokenType.GRANT: lambda self: self._parse_grant(), 835 TokenType.INSERT: lambda self: self._parse_insert(), 836 TokenType.KILL: lambda self: self._parse_kill(), 837 TokenType.LOAD: lambda self: self._parse_load(), 838 TokenType.MERGE: lambda self: self._parse_merge(), 839 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 840 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 841 TokenType.REFRESH: lambda self: self._parse_refresh(), 842 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 843 TokenType.SET: lambda self: self._parse_set(), 844 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 845 TokenType.UNCACHE: lambda self: self._parse_uncache(), 846 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 847 TokenType.UPDATE: lambda self: self._parse_update(), 848 TokenType.USE: lambda self: self._parse_use(), 849 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 850 } 851 852 UNARY_PARSERS = { 853 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 854 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 855 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 856 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 857 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 858 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 859 } 860 861 STRING_PARSERS = { 862 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 863 exp.RawString, this=token.text 864 ), 865 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 866 exp.National, this=token.text 867 ), 868 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 869 TokenType.STRING: lambda self, token: self.expression( 870 exp.Literal, this=token.text, is_string=True 871 ), 872 TokenType.UNICODE_STRING: lambda self, token: self.expression( 873 exp.UnicodeString, 874 this=token.text, 875 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 876 ), 877 } 878 879 NUMERIC_PARSERS = { 880 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 881 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 882 TokenType.HEX_STRING: lambda self, token: self.expression( 883 exp.HexString, 884 this=token.text, 885 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 886 ), 887 TokenType.NUMBER: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=False 889 ), 890 } 891 892 PRIMARY_PARSERS = { 893 **STRING_PARSERS, 894 **NUMERIC_PARSERS, 895 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 896 TokenType.NULL: lambda self, _: self.expression(exp.Null), 897 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 898 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 899 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 900 TokenType.STAR: lambda self, _: self._parse_star_ops(), 901 } 902 903 PLACEHOLDER_PARSERS = { 904 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 905 TokenType.PARAMETER: lambda self: self._parse_parameter(), 906 TokenType.COLON: lambda self: ( 907 self.expression(exp.Placeholder, this=self._prev.text) 908 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 909 else None 910 ), 911 } 912 913 RANGE_PARSERS = { 914 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 915 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 916 TokenType.GLOB: binary_range_parser(exp.Glob), 917 TokenType.ILIKE: binary_range_parser(exp.ILike), 918 TokenType.IN: lambda self, this: self._parse_in(this), 919 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 920 TokenType.IS: lambda self, this: self._parse_is(this), 921 TokenType.LIKE: binary_range_parser(exp.Like), 922 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 923 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 924 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 925 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 926 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 927 } 928 929 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 930 "ALLOWED_VALUES": lambda self: self.expression( 931 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 932 ), 933 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 934 "AUTO": lambda self: self._parse_auto_property(), 935 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 936 "BACKUP": lambda self: self.expression( 937 exp.BackupProperty, this=self._parse_var(any_token=True) 938 ), 939 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 940 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 941 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 942 "CHECKSUM": lambda self: self._parse_checksum(), 943 "CLUSTER BY": lambda self: self._parse_cluster(), 944 "CLUSTERED": lambda self: self._parse_clustered_by(), 945 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 946 exp.CollateProperty, **kwargs 947 ), 948 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 949 "CONTAINS": lambda self: self._parse_contains_property(), 950 "COPY": lambda self: self._parse_copy_property(), 951 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 952 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 953 "DEFINER": lambda self: self._parse_definer(), 954 "DETERMINISTIC": lambda self: self.expression( 955 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 956 ), 957 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 958 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 959 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 960 "DISTKEY": lambda self: self._parse_distkey(), 961 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 962 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 963 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 964 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 965 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 966 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 967 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 968 "FREESPACE": lambda self: self._parse_freespace(), 969 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 970 "HEAP": lambda self: self.expression(exp.HeapProperty), 971 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 972 "IMMUTABLE": lambda self: self.expression( 973 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 974 ), 975 "INHERITS": lambda self: self.expression( 976 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 977 ), 978 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 979 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 980 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 981 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 982 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 983 "LIKE": lambda self: self._parse_create_like(), 984 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 985 "LOCK": lambda self: self._parse_locking(), 986 "LOCKING": lambda self: self._parse_locking(), 987 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 988 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 989 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 990 "MODIFIES": lambda self: self._parse_modifies_property(), 991 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 992 "NO": lambda self: self._parse_no_property(), 993 "ON": lambda self: self._parse_on_property(), 994 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 995 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 996 "PARTITION": lambda self: self._parse_partitioned_of(), 997 "PARTITION BY": lambda self: self._parse_partitioned_by(), 998 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 999 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1000 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1001 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1002 "READS": lambda self: self._parse_reads_property(), 1003 "REMOTE": lambda self: self._parse_remote_with_connection(), 1004 "RETURNS": lambda self: self._parse_returns(), 1005 "STRICT": lambda self: self.expression(exp.StrictProperty), 1006 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1007 "ROW": lambda self: self._parse_row(), 1008 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1009 "SAMPLE": lambda self: self.expression( 1010 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1011 ), 1012 "SECURE": lambda self: self.expression(exp.SecureProperty), 1013 "SECURITY": lambda self: self._parse_security(), 1014 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1015 "SETTINGS": lambda self: self._parse_settings_property(), 1016 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1017 "SORTKEY": lambda self: self._parse_sortkey(), 1018 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1019 "STABLE": lambda self: self.expression( 1020 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1021 ), 1022 "STORED": lambda self: self._parse_stored(), 1023 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1024 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1025 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1026 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1027 "TO": lambda self: self._parse_to_table(), 1028 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1029 "TRANSFORM": lambda self: self.expression( 1030 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1031 ), 1032 "TTL": lambda self: self._parse_ttl(), 1033 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1034 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1035 "VOLATILE": lambda self: self._parse_volatile_property(), 1036 "WITH": lambda self: self._parse_with_property(), 1037 } 1038 1039 CONSTRAINT_PARSERS = { 1040 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1041 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1042 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1043 "CHARACTER SET": lambda self: self.expression( 1044 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "CHECK": lambda self: self.expression( 1047 exp.CheckColumnConstraint, 1048 this=self._parse_wrapped(self._parse_assignment), 1049 enforced=self._match_text_seq("ENFORCED"), 1050 ), 1051 "COLLATE": lambda self: self.expression( 1052 exp.CollateColumnConstraint, 1053 this=self._parse_identifier() or self._parse_column(), 1054 ), 1055 "COMMENT": lambda self: self.expression( 1056 exp.CommentColumnConstraint, this=self._parse_string() 1057 ), 1058 "COMPRESS": lambda self: self._parse_compress(), 1059 "CLUSTERED": lambda self: self.expression( 1060 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1061 ), 1062 "NONCLUSTERED": lambda self: self.expression( 1063 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1064 ), 1065 "DEFAULT": lambda self: self.expression( 1066 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1067 ), 1068 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1069 "EPHEMERAL": lambda self: self.expression( 1070 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1071 ), 1072 "EXCLUDE": lambda self: self.expression( 1073 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1074 ), 1075 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1076 "FORMAT": lambda self: self.expression( 1077 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "GENERATED": lambda self: self._parse_generated_as_identity(), 1080 "IDENTITY": lambda self: self._parse_auto_increment(), 1081 "INLINE": lambda self: self._parse_inline(), 1082 "LIKE": lambda self: self._parse_create_like(), 1083 "NOT": lambda self: self._parse_not_constraint(), 1084 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1085 "ON": lambda self: ( 1086 self._match(TokenType.UPDATE) 1087 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1088 ) 1089 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1090 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1091 "PERIOD": lambda self: self._parse_period_for_system_time(), 1092 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1093 "REFERENCES": lambda self: self._parse_references(match=False), 1094 "TITLE": lambda self: self.expression( 1095 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1096 ), 1097 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1098 "UNIQUE": lambda self: self._parse_unique(), 1099 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1100 "WATERMARK": lambda self: self.expression( 1101 exp.WatermarkColumnConstraint, 1102 this=self._match(TokenType.FOR) and self._parse_column(), 1103 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1104 ), 1105 "WITH": lambda self: self.expression( 1106 exp.Properties, expressions=self._parse_wrapped_properties() 1107 ), 1108 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1109 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1110 } 1111 1112 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1113 klass = ( 1114 exp.PartitionedByBucket 1115 if self._prev.text.upper() == "BUCKET" 1116 else exp.PartitionByTruncate 1117 ) 1118 1119 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1120 this, expression = seq_get(args, 0), seq_get(args, 1) 1121 1122 if isinstance(this, exp.Literal): 1123 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1124 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1125 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1126 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1127 # 1128 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1129 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1130 this, expression = expression, this 1131 1132 return self.expression(klass, this=this, expression=expression) 1133 1134 ALTER_PARSERS = { 1135 "ADD": lambda self: self._parse_alter_table_add(), 1136 "AS": lambda self: self._parse_select(), 1137 "ALTER": lambda self: self._parse_alter_table_alter(), 1138 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1139 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1140 "DROP": lambda self: self._parse_alter_table_drop(), 1141 "RENAME": lambda self: self._parse_alter_table_rename(), 1142 "SET": lambda self: self._parse_alter_table_set(), 1143 "SWAP": lambda self: self.expression( 1144 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1145 ), 1146 } 1147 1148 ALTER_ALTER_PARSERS = { 1149 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1150 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1151 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1152 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1153 } 1154 1155 SCHEMA_UNNAMED_CONSTRAINTS = { 1156 "CHECK", 1157 "EXCLUDE", 1158 "FOREIGN KEY", 1159 "LIKE", 1160 "PERIOD", 1161 "PRIMARY KEY", 1162 "UNIQUE", 1163 "WATERMARK", 1164 "BUCKET", 1165 "TRUNCATE", 1166 } 1167 1168 NO_PAREN_FUNCTION_PARSERS = { 1169 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1170 "CASE": lambda self: self._parse_case(), 1171 "CONNECT_BY_ROOT": lambda self: self.expression( 1172 exp.ConnectByRoot, this=self._parse_column() 1173 ), 1174 "IF": lambda self: self._parse_if(), 1175 } 1176 1177 INVALID_FUNC_NAME_TOKENS = { 1178 TokenType.IDENTIFIER, 1179 TokenType.STRING, 1180 } 1181 1182 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1183 1184 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1185 1186 FUNCTION_PARSERS = { 1187 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1188 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1189 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1190 "DECODE": lambda self: self._parse_decode(), 1191 "EXTRACT": lambda self: self._parse_extract(), 1192 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1193 "GAP_FILL": lambda self: self._parse_gap_fill(), 1194 "JSON_OBJECT": lambda self: self._parse_json_object(), 1195 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1196 "JSON_TABLE": lambda self: self._parse_json_table(), 1197 "MATCH": lambda self: self._parse_match_against(), 1198 "NORMALIZE": lambda self: self._parse_normalize(), 1199 "OPENJSON": lambda self: self._parse_open_json(), 1200 "OVERLAY": lambda self: self._parse_overlay(), 1201 "POSITION": lambda self: self._parse_position(), 1202 "PREDICT": lambda self: self._parse_predict(), 1203 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1204 "STRING_AGG": lambda self: self._parse_string_agg(), 1205 "SUBSTRING": lambda self: self._parse_substring(), 1206 "TRIM": lambda self: self._parse_trim(), 1207 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1208 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1209 "XMLELEMENT": lambda self: self.expression( 1210 exp.XMLElement, 1211 this=self._match_text_seq("NAME") and self._parse_id_var(), 1212 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1213 ), 1214 "XMLTABLE": lambda self: self._parse_xml_table(), 1215 } 1216 1217 QUERY_MODIFIER_PARSERS = { 1218 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1219 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1220 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1221 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1222 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1223 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1224 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1225 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1226 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1227 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1228 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1229 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1230 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1231 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1232 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1233 TokenType.CLUSTER_BY: lambda self: ( 1234 "cluster", 1235 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1236 ), 1237 TokenType.DISTRIBUTE_BY: lambda self: ( 1238 "distribute", 1239 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1240 ), 1241 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1242 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1243 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1244 } 1245 1246 SET_PARSERS = { 1247 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1248 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1249 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1250 "TRANSACTION": lambda self: self._parse_set_transaction(), 1251 } 1252 1253 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1254 1255 TYPE_LITERAL_PARSERS = { 1256 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1257 } 1258 1259 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1260 1261 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1262 1263 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1264 1265 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1266 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1267 "ISOLATION": ( 1268 ("LEVEL", "REPEATABLE", "READ"), 1269 ("LEVEL", "READ", "COMMITTED"), 1270 ("LEVEL", "READ", "UNCOMITTED"), 1271 ("LEVEL", "SERIALIZABLE"), 1272 ), 1273 "READ": ("WRITE", "ONLY"), 1274 } 1275 1276 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1277 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1278 ) 1279 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1280 1281 CREATE_SEQUENCE: OPTIONS_TYPE = { 1282 "SCALE": ("EXTEND", "NOEXTEND"), 1283 "SHARD": ("EXTEND", "NOEXTEND"), 1284 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1285 **dict.fromkeys( 1286 ( 1287 "SESSION", 1288 "GLOBAL", 1289 "KEEP", 1290 "NOKEEP", 1291 "ORDER", 1292 "NOORDER", 1293 "NOCACHE", 1294 "CYCLE", 1295 "NOCYCLE", 1296 "NOMINVALUE", 1297 "NOMAXVALUE", 1298 "NOSCALE", 1299 "NOSHARD", 1300 ), 1301 tuple(), 1302 ), 1303 } 1304 1305 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1306 1307 USABLES: OPTIONS_TYPE = dict.fromkeys( 1308 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1309 ) 1310 1311 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1312 1313 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1314 "TYPE": ("EVOLUTION",), 1315 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1316 } 1317 1318 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1319 1320 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1321 1322 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1323 "NOT": ("ENFORCED",), 1324 "MATCH": ( 1325 "FULL", 1326 "PARTIAL", 1327 "SIMPLE", 1328 ), 1329 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1330 "USING": ( 1331 "BTREE", 1332 "HASH", 1333 ), 1334 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1335 } 1336 1337 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1338 1339 CLONE_KEYWORDS = {"CLONE", "COPY"} 1340 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1341 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1342 1343 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1344 1345 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1346 1347 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1348 1349 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1350 1351 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1352 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1353 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1354 1355 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1356 1357 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1358 1359 ADD_CONSTRAINT_TOKENS = { 1360 TokenType.CONSTRAINT, 1361 TokenType.FOREIGN_KEY, 1362 TokenType.INDEX, 1363 TokenType.KEY, 1364 TokenType.PRIMARY_KEY, 1365 TokenType.UNIQUE, 1366 } 1367 1368 DISTINCT_TOKENS = {TokenType.DISTINCT} 1369 1370 NULL_TOKENS = {TokenType.NULL} 1371 1372 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1373 1374 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1375 1376 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1377 1378 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1379 1380 ODBC_DATETIME_LITERALS = { 1381 "d": exp.Date, 1382 "t": exp.Time, 1383 "ts": exp.Timestamp, 1384 } 1385 1386 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1387 1388 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1389 1390 # The style options for the DESCRIBE statement 1391 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1392 1393 # The style options for the ANALYZE statement 1394 ANALYZE_STYLES = { 1395 "BUFFER_USAGE_LIMIT", 1396 "FULL", 1397 "LOCAL", 1398 "NO_WRITE_TO_BINLOG", 1399 "SAMPLE", 1400 "SKIP_LOCKED", 1401 "VERBOSE", 1402 } 1403 1404 ANALYZE_EXPRESSION_PARSERS = { 1405 "ALL": lambda self: self._parse_analyze_columns(), 1406 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1407 "DELETE": lambda self: self._parse_analyze_delete(), 1408 "DROP": lambda self: self._parse_analyze_histogram(), 1409 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1410 "LIST": lambda self: self._parse_analyze_list(), 1411 "PREDICATE": lambda self: self._parse_analyze_columns(), 1412 "UPDATE": lambda self: self._parse_analyze_histogram(), 1413 "VALIDATE": lambda self: self._parse_analyze_validate(), 1414 } 1415 1416 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1417 1418 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1419 1420 OPERATION_MODIFIERS: t.Set[str] = set() 1421 1422 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1423 1424 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1425 1426 STRICT_CAST = True 1427 1428 PREFIXED_PIVOT_COLUMNS = False 1429 IDENTIFY_PIVOT_STRINGS = False 1430 1431 LOG_DEFAULTS_TO_LN = False 1432 1433 # Whether ADD is present for each column added by ALTER TABLE 1434 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1435 1436 # Whether the table sample clause expects CSV syntax 1437 TABLESAMPLE_CSV = False 1438 1439 # The default method used for table sampling 1440 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1441 1442 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1443 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1444 1445 # Whether the TRIM function expects the characters to trim as its first argument 1446 TRIM_PATTERN_FIRST = False 1447 1448 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1449 STRING_ALIASES = False 1450 1451 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1452 MODIFIERS_ATTACHED_TO_SET_OP = True 1453 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1454 1455 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1456 NO_PAREN_IF_COMMANDS = True 1457 1458 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1459 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1460 1461 # Whether the `:` operator is used to extract a value from a VARIANT column 1462 COLON_IS_VARIANT_EXTRACT = False 1463 1464 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1465 # If this is True and '(' is not found, the keyword will be treated as an identifier 1466 VALUES_FOLLOWED_BY_PAREN = True 1467 1468 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1469 SUPPORTS_IMPLICIT_UNNEST = False 1470 1471 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1472 INTERVAL_SPANS = True 1473 1474 # Whether a PARTITION clause can follow a table reference 1475 SUPPORTS_PARTITION_SELECTION = False 1476 1477 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1478 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1479 1480 # Whether the 'AS' keyword is optional in the CTE definition syntax 1481 OPTIONAL_ALIAS_TOKEN_CTE = True 1482 1483 __slots__ = ( 1484 "error_level", 1485 "error_message_context", 1486 "max_errors", 1487 "dialect", 1488 "sql", 1489 "errors", 1490 "_tokens", 1491 "_index", 1492 "_curr", 1493 "_next", 1494 "_prev", 1495 "_prev_comments", 1496 ) 1497 1498 # Autofilled 1499 SHOW_TRIE: t.Dict = {} 1500 SET_TRIE: t.Dict = {} 1501 1502 def __init__( 1503 self, 1504 error_level: t.Optional[ErrorLevel] = None, 1505 error_message_context: int = 100, 1506 max_errors: int = 3, 1507 dialect: DialectType = None, 1508 ): 1509 from sqlglot.dialects import Dialect 1510 1511 self.error_level = error_level or ErrorLevel.IMMEDIATE 1512 self.error_message_context = error_message_context 1513 self.max_errors = max_errors 1514 self.dialect = Dialect.get_or_raise(dialect) 1515 self.reset() 1516 1517 def reset(self): 1518 self.sql = "" 1519 self.errors = [] 1520 self._tokens = [] 1521 self._index = 0 1522 self._curr = None 1523 self._next = None 1524 self._prev = None 1525 self._prev_comments = None 1526 1527 def parse( 1528 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1529 ) -> t.List[t.Optional[exp.Expression]]: 1530 """ 1531 Parses a list of tokens and returns a list of syntax trees, one tree 1532 per parsed SQL statement. 1533 1534 Args: 1535 raw_tokens: The list of tokens. 1536 sql: The original SQL string, used to produce helpful debug messages. 1537 1538 Returns: 1539 The list of the produced syntax trees. 1540 """ 1541 return self._parse( 1542 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1543 ) 1544 1545 def parse_into( 1546 self, 1547 expression_types: exp.IntoType, 1548 raw_tokens: t.List[Token], 1549 sql: t.Optional[str] = None, 1550 ) -> t.List[t.Optional[exp.Expression]]: 1551 """ 1552 Parses a list of tokens into a given Expression type. If a collection of Expression 1553 types is given instead, this method will try to parse the token list into each one 1554 of them, stopping at the first for which the parsing succeeds. 1555 1556 Args: 1557 expression_types: The expression type(s) to try and parse the token list into. 1558 raw_tokens: The list of tokens. 1559 sql: The original SQL string, used to produce helpful debug messages. 1560 1561 Returns: 1562 The target Expression. 1563 """ 1564 errors = [] 1565 for expression_type in ensure_list(expression_types): 1566 parser = self.EXPRESSION_PARSERS.get(expression_type) 1567 if not parser: 1568 raise TypeError(f"No parser registered for {expression_type}") 1569 1570 try: 1571 return self._parse(parser, raw_tokens, sql) 1572 except ParseError as e: 1573 e.errors[0]["into_expression"] = expression_type 1574 errors.append(e) 1575 1576 raise ParseError( 1577 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1578 errors=merge_errors(errors), 1579 ) from errors[-1] 1580 1581 def _parse( 1582 self, 1583 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1584 raw_tokens: t.List[Token], 1585 sql: t.Optional[str] = None, 1586 ) -> t.List[t.Optional[exp.Expression]]: 1587 self.reset() 1588 self.sql = sql or "" 1589 1590 total = len(raw_tokens) 1591 chunks: t.List[t.List[Token]] = [[]] 1592 1593 for i, token in enumerate(raw_tokens): 1594 if token.token_type == TokenType.SEMICOLON: 1595 if token.comments: 1596 chunks.append([token]) 1597 1598 if i < total - 1: 1599 chunks.append([]) 1600 else: 1601 chunks[-1].append(token) 1602 1603 expressions = [] 1604 1605 for tokens in chunks: 1606 self._index = -1 1607 self._tokens = tokens 1608 self._advance() 1609 1610 expressions.append(parse_method(self)) 1611 1612 if self._index < len(self._tokens): 1613 self.raise_error("Invalid expression / Unexpected token") 1614 1615 self.check_errors() 1616 1617 return expressions 1618 1619 def check_errors(self) -> None: 1620 """Logs or raises any found errors, depending on the chosen error level setting.""" 1621 if self.error_level == ErrorLevel.WARN: 1622 for error in self.errors: 1623 logger.error(str(error)) 1624 elif self.error_level == ErrorLevel.RAISE and self.errors: 1625 raise ParseError( 1626 concat_messages(self.errors, self.max_errors), 1627 errors=merge_errors(self.errors), 1628 ) 1629 1630 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1631 """ 1632 Appends an error in the list of recorded errors or raises it, depending on the chosen 1633 error level setting. 1634 """ 1635 token = token or self._curr or self._prev or Token.string("") 1636 start = token.start 1637 end = token.end + 1 1638 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1639 highlight = self.sql[start:end] 1640 end_context = self.sql[end : end + self.error_message_context] 1641 1642 error = ParseError.new( 1643 f"{message}. Line {token.line}, Col: {token.col}.\n" 1644 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1645 description=message, 1646 line=token.line, 1647 col=token.col, 1648 start_context=start_context, 1649 highlight=highlight, 1650 end_context=end_context, 1651 ) 1652 1653 if self.error_level == ErrorLevel.IMMEDIATE: 1654 raise error 1655 1656 self.errors.append(error) 1657 1658 def expression( 1659 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1660 ) -> E: 1661 """ 1662 Creates a new, validated Expression. 1663 1664 Args: 1665 exp_class: The expression class to instantiate. 1666 comments: An optional list of comments to attach to the expression. 1667 kwargs: The arguments to set for the expression along with their respective values. 1668 1669 Returns: 1670 The target expression. 1671 """ 1672 instance = exp_class(**kwargs) 1673 instance.add_comments(comments) if comments else self._add_comments(instance) 1674 return self.validate_expression(instance) 1675 1676 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1677 if expression and self._prev_comments: 1678 expression.add_comments(self._prev_comments) 1679 self._prev_comments = None 1680 1681 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1682 """ 1683 Validates an Expression, making sure that all its mandatory arguments are set. 1684 1685 Args: 1686 expression: The expression to validate. 1687 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1688 1689 Returns: 1690 The validated expression. 1691 """ 1692 if self.error_level != ErrorLevel.IGNORE: 1693 for error_message in expression.error_messages(args): 1694 self.raise_error(error_message) 1695 1696 return expression 1697 1698 def _find_sql(self, start: Token, end: Token) -> str: 1699 return self.sql[start.start : end.end + 1] 1700 1701 def _is_connected(self) -> bool: 1702 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1703 1704 def _advance(self, times: int = 1) -> None: 1705 self._index += times 1706 self._curr = seq_get(self._tokens, self._index) 1707 self._next = seq_get(self._tokens, self._index + 1) 1708 1709 if self._index > 0: 1710 self._prev = self._tokens[self._index - 1] 1711 self._prev_comments = self._prev.comments 1712 else: 1713 self._prev = None 1714 self._prev_comments = None 1715 1716 def _retreat(self, index: int) -> None: 1717 if index != self._index: 1718 self._advance(index - self._index) 1719 1720 def _warn_unsupported(self) -> None: 1721 if len(self._tokens) <= 1: 1722 return 1723 1724 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1725 # interested in emitting a warning for the one being currently processed. 1726 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1727 1728 logger.warning( 1729 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1730 ) 1731 1732 def _parse_command(self) -> exp.Command: 1733 self._warn_unsupported() 1734 return self.expression( 1735 exp.Command, 1736 comments=self._prev_comments, 1737 this=self._prev.text.upper(), 1738 expression=self._parse_string(), 1739 ) 1740 1741 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1742 """ 1743 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1744 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1745 solve this by setting & resetting the parser state accordingly 1746 """ 1747 index = self._index 1748 error_level = self.error_level 1749 1750 self.error_level = ErrorLevel.IMMEDIATE 1751 try: 1752 this = parse_method() 1753 except ParseError: 1754 this = None 1755 finally: 1756 if not this or retreat: 1757 self._retreat(index) 1758 self.error_level = error_level 1759 1760 return this 1761 1762 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1763 start = self._prev 1764 exists = self._parse_exists() if allow_exists else None 1765 1766 self._match(TokenType.ON) 1767 1768 materialized = self._match_text_seq("MATERIALIZED") 1769 kind = self._match_set(self.CREATABLES) and self._prev 1770 if not kind: 1771 return self._parse_as_command(start) 1772 1773 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1774 this = self._parse_user_defined_function(kind=kind.token_type) 1775 elif kind.token_type == TokenType.TABLE: 1776 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1777 elif kind.token_type == TokenType.COLUMN: 1778 this = self._parse_column() 1779 else: 1780 this = self._parse_id_var() 1781 1782 self._match(TokenType.IS) 1783 1784 return self.expression( 1785 exp.Comment, 1786 this=this, 1787 kind=kind.text, 1788 expression=self._parse_string(), 1789 exists=exists, 1790 materialized=materialized, 1791 ) 1792 1793 def _parse_to_table( 1794 self, 1795 ) -> exp.ToTableProperty: 1796 table = self._parse_table_parts(schema=True) 1797 return self.expression(exp.ToTableProperty, this=table) 1798 1799 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1800 def _parse_ttl(self) -> exp.Expression: 1801 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1802 this = self._parse_bitwise() 1803 1804 if self._match_text_seq("DELETE"): 1805 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1806 if self._match_text_seq("RECOMPRESS"): 1807 return self.expression( 1808 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1809 ) 1810 if self._match_text_seq("TO", "DISK"): 1811 return self.expression( 1812 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1813 ) 1814 if self._match_text_seq("TO", "VOLUME"): 1815 return self.expression( 1816 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1817 ) 1818 1819 return this 1820 1821 expressions = self._parse_csv(_parse_ttl_action) 1822 where = self._parse_where() 1823 group = self._parse_group() 1824 1825 aggregates = None 1826 if group and self._match(TokenType.SET): 1827 aggregates = self._parse_csv(self._parse_set_item) 1828 1829 return self.expression( 1830 exp.MergeTreeTTL, 1831 expressions=expressions, 1832 where=where, 1833 group=group, 1834 aggregates=aggregates, 1835 ) 1836 1837 def _parse_statement(self) -> t.Optional[exp.Expression]: 1838 if self._curr is None: 1839 return None 1840 1841 if self._match_set(self.STATEMENT_PARSERS): 1842 comments = self._prev_comments 1843 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1844 stmt.add_comments(comments, prepend=True) 1845 return stmt 1846 1847 if self._match_set(self.dialect.tokenizer.COMMANDS): 1848 return self._parse_command() 1849 1850 expression = self._parse_expression() 1851 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1852 return self._parse_query_modifiers(expression) 1853 1854 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1855 start = self._prev 1856 temporary = self._match(TokenType.TEMPORARY) 1857 materialized = self._match_text_seq("MATERIALIZED") 1858 1859 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1860 if not kind: 1861 return self._parse_as_command(start) 1862 1863 concurrently = self._match_text_seq("CONCURRENTLY") 1864 if_exists = exists or self._parse_exists() 1865 1866 if kind == "COLUMN": 1867 this = self._parse_column() 1868 else: 1869 this = self._parse_table_parts( 1870 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1871 ) 1872 1873 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1874 1875 if self._match(TokenType.L_PAREN, advance=False): 1876 expressions = self._parse_wrapped_csv(self._parse_types) 1877 else: 1878 expressions = None 1879 1880 return self.expression( 1881 exp.Drop, 1882 exists=if_exists, 1883 this=this, 1884 expressions=expressions, 1885 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1886 temporary=temporary, 1887 materialized=materialized, 1888 cascade=self._match_text_seq("CASCADE"), 1889 constraints=self._match_text_seq("CONSTRAINTS"), 1890 purge=self._match_text_seq("PURGE"), 1891 cluster=cluster, 1892 concurrently=concurrently, 1893 ) 1894 1895 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1896 return ( 1897 self._match_text_seq("IF") 1898 and (not not_ or self._match(TokenType.NOT)) 1899 and self._match(TokenType.EXISTS) 1900 ) 1901 1902 def _parse_create(self) -> exp.Create | exp.Command: 1903 # Note: this can't be None because we've matched a statement parser 1904 start = self._prev 1905 1906 replace = ( 1907 start.token_type == TokenType.REPLACE 1908 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1909 or self._match_pair(TokenType.OR, TokenType.ALTER) 1910 ) 1911 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1912 1913 unique = self._match(TokenType.UNIQUE) 1914 1915 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1916 clustered = True 1917 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1918 "COLUMNSTORE" 1919 ): 1920 clustered = False 1921 else: 1922 clustered = None 1923 1924 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1925 self._advance() 1926 1927 properties = None 1928 create_token = self._match_set(self.CREATABLES) and self._prev 1929 1930 if not create_token: 1931 # exp.Properties.Location.POST_CREATE 1932 properties = self._parse_properties() 1933 create_token = self._match_set(self.CREATABLES) and self._prev 1934 1935 if not properties or not create_token: 1936 return self._parse_as_command(start) 1937 1938 concurrently = self._match_text_seq("CONCURRENTLY") 1939 exists = self._parse_exists(not_=True) 1940 this = None 1941 expression: t.Optional[exp.Expression] = None 1942 indexes = None 1943 no_schema_binding = None 1944 begin = None 1945 end = None 1946 clone = None 1947 1948 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1949 nonlocal properties 1950 if properties and temp_props: 1951 properties.expressions.extend(temp_props.expressions) 1952 elif temp_props: 1953 properties = temp_props 1954 1955 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1956 this = self._parse_user_defined_function(kind=create_token.token_type) 1957 1958 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1959 extend_props(self._parse_properties()) 1960 1961 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1962 extend_props(self._parse_properties()) 1963 1964 if not expression: 1965 if self._match(TokenType.COMMAND): 1966 expression = self._parse_as_command(self._prev) 1967 else: 1968 begin = self._match(TokenType.BEGIN) 1969 return_ = self._match_text_seq("RETURN") 1970 1971 if self._match(TokenType.STRING, advance=False): 1972 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1973 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1974 expression = self._parse_string() 1975 extend_props(self._parse_properties()) 1976 else: 1977 expression = self._parse_user_defined_function_expression() 1978 1979 end = self._match_text_seq("END") 1980 1981 if return_: 1982 expression = self.expression(exp.Return, this=expression) 1983 elif create_token.token_type == TokenType.INDEX: 1984 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1985 if not self._match(TokenType.ON): 1986 index = self._parse_id_var() 1987 anonymous = False 1988 else: 1989 index = None 1990 anonymous = True 1991 1992 this = self._parse_index(index=index, anonymous=anonymous) 1993 elif create_token.token_type in self.DB_CREATABLES: 1994 table_parts = self._parse_table_parts( 1995 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1996 ) 1997 1998 # exp.Properties.Location.POST_NAME 1999 self._match(TokenType.COMMA) 2000 extend_props(self._parse_properties(before=True)) 2001 2002 this = self._parse_schema(this=table_parts) 2003 2004 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2005 extend_props(self._parse_properties()) 2006 2007 has_alias = self._match(TokenType.ALIAS) 2008 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2009 # exp.Properties.Location.POST_ALIAS 2010 extend_props(self._parse_properties()) 2011 2012 if create_token.token_type == TokenType.SEQUENCE: 2013 expression = self._parse_types() 2014 extend_props(self._parse_properties()) 2015 else: 2016 expression = self._parse_ddl_select() 2017 2018 # Some dialects also support using a table as an alias instead of a SELECT. 2019 # Here we fallback to this as an alternative. 2020 if not expression and has_alias: 2021 expression = self._try_parse(self._parse_table_parts) 2022 2023 if create_token.token_type == TokenType.TABLE: 2024 # exp.Properties.Location.POST_EXPRESSION 2025 extend_props(self._parse_properties()) 2026 2027 indexes = [] 2028 while True: 2029 index = self._parse_index() 2030 2031 # exp.Properties.Location.POST_INDEX 2032 extend_props(self._parse_properties()) 2033 if not index: 2034 break 2035 else: 2036 self._match(TokenType.COMMA) 2037 indexes.append(index) 2038 elif create_token.token_type == TokenType.VIEW: 2039 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2040 no_schema_binding = True 2041 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2042 extend_props(self._parse_properties()) 2043 2044 shallow = self._match_text_seq("SHALLOW") 2045 2046 if self._match_texts(self.CLONE_KEYWORDS): 2047 copy = self._prev.text.lower() == "copy" 2048 clone = self.expression( 2049 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2050 ) 2051 2052 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2053 return self._parse_as_command(start) 2054 2055 create_kind_text = create_token.text.upper() 2056 return self.expression( 2057 exp.Create, 2058 this=this, 2059 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2060 replace=replace, 2061 refresh=refresh, 2062 unique=unique, 2063 expression=expression, 2064 exists=exists, 2065 properties=properties, 2066 indexes=indexes, 2067 no_schema_binding=no_schema_binding, 2068 begin=begin, 2069 end=end, 2070 clone=clone, 2071 concurrently=concurrently, 2072 clustered=clustered, 2073 ) 2074 2075 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2076 seq = exp.SequenceProperties() 2077 2078 options = [] 2079 index = self._index 2080 2081 while self._curr: 2082 self._match(TokenType.COMMA) 2083 if self._match_text_seq("INCREMENT"): 2084 self._match_text_seq("BY") 2085 self._match_text_seq("=") 2086 seq.set("increment", self._parse_term()) 2087 elif self._match_text_seq("MINVALUE"): 2088 seq.set("minvalue", self._parse_term()) 2089 elif self._match_text_seq("MAXVALUE"): 2090 seq.set("maxvalue", self._parse_term()) 2091 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2092 self._match_text_seq("=") 2093 seq.set("start", self._parse_term()) 2094 elif self._match_text_seq("CACHE"): 2095 # T-SQL allows empty CACHE which is initialized dynamically 2096 seq.set("cache", self._parse_number() or True) 2097 elif self._match_text_seq("OWNED", "BY"): 2098 # "OWNED BY NONE" is the default 2099 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2100 else: 2101 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2102 if opt: 2103 options.append(opt) 2104 else: 2105 break 2106 2107 seq.set("options", options if options else None) 2108 return None if self._index == index else seq 2109 2110 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2111 # only used for teradata currently 2112 self._match(TokenType.COMMA) 2113 2114 kwargs = { 2115 "no": self._match_text_seq("NO"), 2116 "dual": self._match_text_seq("DUAL"), 2117 "before": self._match_text_seq("BEFORE"), 2118 "default": self._match_text_seq("DEFAULT"), 2119 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2120 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2121 "after": self._match_text_seq("AFTER"), 2122 "minimum": self._match_texts(("MIN", "MINIMUM")), 2123 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2124 } 2125 2126 if self._match_texts(self.PROPERTY_PARSERS): 2127 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2128 try: 2129 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2130 except TypeError: 2131 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2132 2133 return None 2134 2135 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2136 return self._parse_wrapped_csv(self._parse_property) 2137 2138 def _parse_property(self) -> t.Optional[exp.Expression]: 2139 if self._match_texts(self.PROPERTY_PARSERS): 2140 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2141 2142 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2143 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2144 2145 if self._match_text_seq("COMPOUND", "SORTKEY"): 2146 return self._parse_sortkey(compound=True) 2147 2148 if self._match_text_seq("SQL", "SECURITY"): 2149 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2150 2151 index = self._index 2152 key = self._parse_column() 2153 2154 if not self._match(TokenType.EQ): 2155 self._retreat(index) 2156 return self._parse_sequence_properties() 2157 2158 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2159 if isinstance(key, exp.Column): 2160 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2161 2162 value = self._parse_bitwise() or self._parse_var(any_token=True) 2163 2164 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2165 if isinstance(value, exp.Column): 2166 value = exp.var(value.name) 2167 2168 return self.expression(exp.Property, this=key, value=value) 2169 2170 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2171 if self._match_text_seq("BY"): 2172 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2173 2174 self._match(TokenType.ALIAS) 2175 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2176 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2177 2178 return self.expression( 2179 exp.FileFormatProperty, 2180 this=( 2181 self.expression( 2182 exp.InputOutputFormat, 2183 input_format=input_format, 2184 output_format=output_format, 2185 ) 2186 if input_format or output_format 2187 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2188 ), 2189 ) 2190 2191 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2192 field = self._parse_field() 2193 if isinstance(field, exp.Identifier) and not field.quoted: 2194 field = exp.var(field) 2195 2196 return field 2197 2198 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2199 self._match(TokenType.EQ) 2200 self._match(TokenType.ALIAS) 2201 2202 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2203 2204 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2205 properties = [] 2206 while True: 2207 if before: 2208 prop = self._parse_property_before() 2209 else: 2210 prop = self._parse_property() 2211 if not prop: 2212 break 2213 for p in ensure_list(prop): 2214 properties.append(p) 2215 2216 if properties: 2217 return self.expression(exp.Properties, expressions=properties) 2218 2219 return None 2220 2221 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2222 return self.expression( 2223 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2224 ) 2225 2226 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2227 if self._match_texts(("DEFINER", "INVOKER")): 2228 security_specifier = self._prev.text.upper() 2229 return self.expression(exp.SecurityProperty, this=security_specifier) 2230 return None 2231 2232 def _parse_settings_property(self) -> exp.SettingsProperty: 2233 return self.expression( 2234 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2235 ) 2236 2237 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2238 if self._index >= 2: 2239 pre_volatile_token = self._tokens[self._index - 2] 2240 else: 2241 pre_volatile_token = None 2242 2243 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2244 return exp.VolatileProperty() 2245 2246 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2247 2248 def _parse_retention_period(self) -> exp.Var: 2249 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2250 number = self._parse_number() 2251 number_str = f"{number} " if number else "" 2252 unit = self._parse_var(any_token=True) 2253 return exp.var(f"{number_str}{unit}") 2254 2255 def _parse_system_versioning_property( 2256 self, with_: bool = False 2257 ) -> exp.WithSystemVersioningProperty: 2258 self._match(TokenType.EQ) 2259 prop = self.expression( 2260 exp.WithSystemVersioningProperty, 2261 **{ # type: ignore 2262 "on": True, 2263 "with": with_, 2264 }, 2265 ) 2266 2267 if self._match_text_seq("OFF"): 2268 prop.set("on", False) 2269 return prop 2270 2271 self._match(TokenType.ON) 2272 if self._match(TokenType.L_PAREN): 2273 while self._curr and not self._match(TokenType.R_PAREN): 2274 if self._match_text_seq("HISTORY_TABLE", "="): 2275 prop.set("this", self._parse_table_parts()) 2276 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2277 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2278 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2279 prop.set("retention_period", self._parse_retention_period()) 2280 2281 self._match(TokenType.COMMA) 2282 2283 return prop 2284 2285 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2286 self._match(TokenType.EQ) 2287 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2288 prop = self.expression(exp.DataDeletionProperty, on=on) 2289 2290 if self._match(TokenType.L_PAREN): 2291 while self._curr and not self._match(TokenType.R_PAREN): 2292 if self._match_text_seq("FILTER_COLUMN", "="): 2293 prop.set("filter_column", self._parse_column()) 2294 elif self._match_text_seq("RETENTION_PERIOD", "="): 2295 prop.set("retention_period", self._parse_retention_period()) 2296 2297 self._match(TokenType.COMMA) 2298 2299 return prop 2300 2301 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2302 kind = "HASH" 2303 expressions: t.Optional[t.List[exp.Expression]] = None 2304 if self._match_text_seq("BY", "HASH"): 2305 expressions = self._parse_wrapped_csv(self._parse_id_var) 2306 elif self._match_text_seq("BY", "RANDOM"): 2307 kind = "RANDOM" 2308 2309 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2310 buckets: t.Optional[exp.Expression] = None 2311 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2312 buckets = self._parse_number() 2313 2314 return self.expression( 2315 exp.DistributedByProperty, 2316 expressions=expressions, 2317 kind=kind, 2318 buckets=buckets, 2319 order=self._parse_order(), 2320 ) 2321 2322 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2323 self._match_text_seq("KEY") 2324 expressions = self._parse_wrapped_id_vars() 2325 return self.expression(expr_type, expressions=expressions) 2326 2327 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2328 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2329 prop = self._parse_system_versioning_property(with_=True) 2330 self._match_r_paren() 2331 return prop 2332 2333 if self._match(TokenType.L_PAREN, advance=False): 2334 return self._parse_wrapped_properties() 2335 2336 if self._match_text_seq("JOURNAL"): 2337 return self._parse_withjournaltable() 2338 2339 if self._match_texts(self.VIEW_ATTRIBUTES): 2340 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2341 2342 if self._match_text_seq("DATA"): 2343 return self._parse_withdata(no=False) 2344 elif self._match_text_seq("NO", "DATA"): 2345 return self._parse_withdata(no=True) 2346 2347 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2348 return self._parse_serde_properties(with_=True) 2349 2350 if self._match(TokenType.SCHEMA): 2351 return self.expression( 2352 exp.WithSchemaBindingProperty, 2353 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2354 ) 2355 2356 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2357 return self.expression( 2358 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2359 ) 2360 2361 if not self._next: 2362 return None 2363 2364 return self._parse_withisolatedloading() 2365 2366 def _parse_procedure_option(self) -> exp.Expression | None: 2367 if self._match_text_seq("EXECUTE", "AS"): 2368 return self.expression( 2369 exp.ExecuteAsProperty, 2370 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2371 or self._parse_string(), 2372 ) 2373 2374 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2375 2376 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2377 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2378 self._match(TokenType.EQ) 2379 2380 user = self._parse_id_var() 2381 self._match(TokenType.PARAMETER) 2382 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2383 2384 if not user or not host: 2385 return None 2386 2387 return exp.DefinerProperty(this=f"{user}@{host}") 2388 2389 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2390 self._match(TokenType.TABLE) 2391 self._match(TokenType.EQ) 2392 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2393 2394 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2395 return self.expression(exp.LogProperty, no=no) 2396 2397 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2398 return self.expression(exp.JournalProperty, **kwargs) 2399 2400 def _parse_checksum(self) -> exp.ChecksumProperty: 2401 self._match(TokenType.EQ) 2402 2403 on = None 2404 if self._match(TokenType.ON): 2405 on = True 2406 elif self._match_text_seq("OFF"): 2407 on = False 2408 2409 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2410 2411 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2412 return self.expression( 2413 exp.Cluster, 2414 expressions=( 2415 self._parse_wrapped_csv(self._parse_ordered) 2416 if wrapped 2417 else self._parse_csv(self._parse_ordered) 2418 ), 2419 ) 2420 2421 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2422 self._match_text_seq("BY") 2423 2424 self._match_l_paren() 2425 expressions = self._parse_csv(self._parse_column) 2426 self._match_r_paren() 2427 2428 if self._match_text_seq("SORTED", "BY"): 2429 self._match_l_paren() 2430 sorted_by = self._parse_csv(self._parse_ordered) 2431 self._match_r_paren() 2432 else: 2433 sorted_by = None 2434 2435 self._match(TokenType.INTO) 2436 buckets = self._parse_number() 2437 self._match_text_seq("BUCKETS") 2438 2439 return self.expression( 2440 exp.ClusteredByProperty, 2441 expressions=expressions, 2442 sorted_by=sorted_by, 2443 buckets=buckets, 2444 ) 2445 2446 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2447 if not self._match_text_seq("GRANTS"): 2448 self._retreat(self._index - 1) 2449 return None 2450 2451 return self.expression(exp.CopyGrantsProperty) 2452 2453 def _parse_freespace(self) -> exp.FreespaceProperty: 2454 self._match(TokenType.EQ) 2455 return self.expression( 2456 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2457 ) 2458 2459 def _parse_mergeblockratio( 2460 self, no: bool = False, default: bool = False 2461 ) -> exp.MergeBlockRatioProperty: 2462 if self._match(TokenType.EQ): 2463 return self.expression( 2464 exp.MergeBlockRatioProperty, 2465 this=self._parse_number(), 2466 percent=self._match(TokenType.PERCENT), 2467 ) 2468 2469 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2470 2471 def _parse_datablocksize( 2472 self, 2473 default: t.Optional[bool] = None, 2474 minimum: t.Optional[bool] = None, 2475 maximum: t.Optional[bool] = None, 2476 ) -> exp.DataBlocksizeProperty: 2477 self._match(TokenType.EQ) 2478 size = self._parse_number() 2479 2480 units = None 2481 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2482 units = self._prev.text 2483 2484 return self.expression( 2485 exp.DataBlocksizeProperty, 2486 size=size, 2487 units=units, 2488 default=default, 2489 minimum=minimum, 2490 maximum=maximum, 2491 ) 2492 2493 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2494 self._match(TokenType.EQ) 2495 always = self._match_text_seq("ALWAYS") 2496 manual = self._match_text_seq("MANUAL") 2497 never = self._match_text_seq("NEVER") 2498 default = self._match_text_seq("DEFAULT") 2499 2500 autotemp = None 2501 if self._match_text_seq("AUTOTEMP"): 2502 autotemp = self._parse_schema() 2503 2504 return self.expression( 2505 exp.BlockCompressionProperty, 2506 always=always, 2507 manual=manual, 2508 never=never, 2509 default=default, 2510 autotemp=autotemp, 2511 ) 2512 2513 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2514 index = self._index 2515 no = self._match_text_seq("NO") 2516 concurrent = self._match_text_seq("CONCURRENT") 2517 2518 if not self._match_text_seq("ISOLATED", "LOADING"): 2519 self._retreat(index) 2520 return None 2521 2522 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2523 return self.expression( 2524 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2525 ) 2526 2527 def _parse_locking(self) -> exp.LockingProperty: 2528 if self._match(TokenType.TABLE): 2529 kind = "TABLE" 2530 elif self._match(TokenType.VIEW): 2531 kind = "VIEW" 2532 elif self._match(TokenType.ROW): 2533 kind = "ROW" 2534 elif self._match_text_seq("DATABASE"): 2535 kind = "DATABASE" 2536 else: 2537 kind = None 2538 2539 if kind in ("DATABASE", "TABLE", "VIEW"): 2540 this = self._parse_table_parts() 2541 else: 2542 this = None 2543 2544 if self._match(TokenType.FOR): 2545 for_or_in = "FOR" 2546 elif self._match(TokenType.IN): 2547 for_or_in = "IN" 2548 else: 2549 for_or_in = None 2550 2551 if self._match_text_seq("ACCESS"): 2552 lock_type = "ACCESS" 2553 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2554 lock_type = "EXCLUSIVE" 2555 elif self._match_text_seq("SHARE"): 2556 lock_type = "SHARE" 2557 elif self._match_text_seq("READ"): 2558 lock_type = "READ" 2559 elif self._match_text_seq("WRITE"): 2560 lock_type = "WRITE" 2561 elif self._match_text_seq("CHECKSUM"): 2562 lock_type = "CHECKSUM" 2563 else: 2564 lock_type = None 2565 2566 override = self._match_text_seq("OVERRIDE") 2567 2568 return self.expression( 2569 exp.LockingProperty, 2570 this=this, 2571 kind=kind, 2572 for_or_in=for_or_in, 2573 lock_type=lock_type, 2574 override=override, 2575 ) 2576 2577 def _parse_partition_by(self) -> t.List[exp.Expression]: 2578 if self._match(TokenType.PARTITION_BY): 2579 return self._parse_csv(self._parse_assignment) 2580 return [] 2581 2582 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2583 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2584 if self._match_text_seq("MINVALUE"): 2585 return exp.var("MINVALUE") 2586 if self._match_text_seq("MAXVALUE"): 2587 return exp.var("MAXVALUE") 2588 return self._parse_bitwise() 2589 2590 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2591 expression = None 2592 from_expressions = None 2593 to_expressions = None 2594 2595 if self._match(TokenType.IN): 2596 this = self._parse_wrapped_csv(self._parse_bitwise) 2597 elif self._match(TokenType.FROM): 2598 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2599 self._match_text_seq("TO") 2600 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2601 elif self._match_text_seq("WITH", "(", "MODULUS"): 2602 this = self._parse_number() 2603 self._match_text_seq(",", "REMAINDER") 2604 expression = self._parse_number() 2605 self._match_r_paren() 2606 else: 2607 self.raise_error("Failed to parse partition bound spec.") 2608 2609 return self.expression( 2610 exp.PartitionBoundSpec, 2611 this=this, 2612 expression=expression, 2613 from_expressions=from_expressions, 2614 to_expressions=to_expressions, 2615 ) 2616 2617 # https://www.postgresql.org/docs/current/sql-createtable.html 2618 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2619 if not self._match_text_seq("OF"): 2620 self._retreat(self._index - 1) 2621 return None 2622 2623 this = self._parse_table(schema=True) 2624 2625 if self._match(TokenType.DEFAULT): 2626 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2627 elif self._match_text_seq("FOR", "VALUES"): 2628 expression = self._parse_partition_bound_spec() 2629 else: 2630 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2631 2632 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2633 2634 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2635 self._match(TokenType.EQ) 2636 return self.expression( 2637 exp.PartitionedByProperty, 2638 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2639 ) 2640 2641 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2642 if self._match_text_seq("AND", "STATISTICS"): 2643 statistics = True 2644 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2645 statistics = False 2646 else: 2647 statistics = None 2648 2649 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2650 2651 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2652 if self._match_text_seq("SQL"): 2653 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2654 return None 2655 2656 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2657 if self._match_text_seq("SQL", "DATA"): 2658 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2659 return None 2660 2661 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2662 if self._match_text_seq("PRIMARY", "INDEX"): 2663 return exp.NoPrimaryIndexProperty() 2664 if self._match_text_seq("SQL"): 2665 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2666 return None 2667 2668 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2669 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2670 return exp.OnCommitProperty() 2671 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2672 return exp.OnCommitProperty(delete=True) 2673 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2674 2675 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2676 if self._match_text_seq("SQL", "DATA"): 2677 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2678 return None 2679 2680 def _parse_distkey(self) -> exp.DistKeyProperty: 2681 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2682 2683 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2684 table = self._parse_table(schema=True) 2685 2686 options = [] 2687 while self._match_texts(("INCLUDING", "EXCLUDING")): 2688 this = self._prev.text.upper() 2689 2690 id_var = self._parse_id_var() 2691 if not id_var: 2692 return None 2693 2694 options.append( 2695 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2696 ) 2697 2698 return self.expression(exp.LikeProperty, this=table, expressions=options) 2699 2700 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2701 return self.expression( 2702 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2703 ) 2704 2705 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2706 self._match(TokenType.EQ) 2707 return self.expression( 2708 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2709 ) 2710 2711 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2712 self._match_text_seq("WITH", "CONNECTION") 2713 return self.expression( 2714 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2715 ) 2716 2717 def _parse_returns(self) -> exp.ReturnsProperty: 2718 value: t.Optional[exp.Expression] 2719 null = None 2720 is_table = self._match(TokenType.TABLE) 2721 2722 if is_table: 2723 if self._match(TokenType.LT): 2724 value = self.expression( 2725 exp.Schema, 2726 this="TABLE", 2727 expressions=self._parse_csv(self._parse_struct_types), 2728 ) 2729 if not self._match(TokenType.GT): 2730 self.raise_error("Expecting >") 2731 else: 2732 value = self._parse_schema(exp.var("TABLE")) 2733 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2734 null = True 2735 value = None 2736 else: 2737 value = self._parse_types() 2738 2739 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2740 2741 def _parse_describe(self) -> exp.Describe: 2742 kind = self._match_set(self.CREATABLES) and self._prev.text 2743 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2744 if self._match(TokenType.DOT): 2745 style = None 2746 self._retreat(self._index - 2) 2747 2748 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2749 2750 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2751 this = self._parse_statement() 2752 else: 2753 this = self._parse_table(schema=True) 2754 2755 properties = self._parse_properties() 2756 expressions = properties.expressions if properties else None 2757 partition = self._parse_partition() 2758 return self.expression( 2759 exp.Describe, 2760 this=this, 2761 style=style, 2762 kind=kind, 2763 expressions=expressions, 2764 partition=partition, 2765 format=format, 2766 ) 2767 2768 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2769 kind = self._prev.text.upper() 2770 expressions = [] 2771 2772 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2773 if self._match(TokenType.WHEN): 2774 expression = self._parse_disjunction() 2775 self._match(TokenType.THEN) 2776 else: 2777 expression = None 2778 2779 else_ = self._match(TokenType.ELSE) 2780 2781 if not self._match(TokenType.INTO): 2782 return None 2783 2784 return self.expression( 2785 exp.ConditionalInsert, 2786 this=self.expression( 2787 exp.Insert, 2788 this=self._parse_table(schema=True), 2789 expression=self._parse_derived_table_values(), 2790 ), 2791 expression=expression, 2792 else_=else_, 2793 ) 2794 2795 expression = parse_conditional_insert() 2796 while expression is not None: 2797 expressions.append(expression) 2798 expression = parse_conditional_insert() 2799 2800 return self.expression( 2801 exp.MultitableInserts, 2802 kind=kind, 2803 comments=comments, 2804 expressions=expressions, 2805 source=self._parse_table(), 2806 ) 2807 2808 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2809 comments = [] 2810 hint = self._parse_hint() 2811 overwrite = self._match(TokenType.OVERWRITE) 2812 ignore = self._match(TokenType.IGNORE) 2813 local = self._match_text_seq("LOCAL") 2814 alternative = None 2815 is_function = None 2816 2817 if self._match_text_seq("DIRECTORY"): 2818 this: t.Optional[exp.Expression] = self.expression( 2819 exp.Directory, 2820 this=self._parse_var_or_string(), 2821 local=local, 2822 row_format=self._parse_row_format(match_row=True), 2823 ) 2824 else: 2825 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2826 comments += ensure_list(self._prev_comments) 2827 return self._parse_multitable_inserts(comments) 2828 2829 if self._match(TokenType.OR): 2830 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2831 2832 self._match(TokenType.INTO) 2833 comments += ensure_list(self._prev_comments) 2834 self._match(TokenType.TABLE) 2835 is_function = self._match(TokenType.FUNCTION) 2836 2837 this = ( 2838 self._parse_table(schema=True, parse_partition=True) 2839 if not is_function 2840 else self._parse_function() 2841 ) 2842 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2843 this.set("alias", self._parse_table_alias()) 2844 2845 returning = self._parse_returning() 2846 2847 return self.expression( 2848 exp.Insert, 2849 comments=comments, 2850 hint=hint, 2851 is_function=is_function, 2852 this=this, 2853 stored=self._match_text_seq("STORED") and self._parse_stored(), 2854 by_name=self._match_text_seq("BY", "NAME"), 2855 exists=self._parse_exists(), 2856 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2857 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2858 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2859 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2860 conflict=self._parse_on_conflict(), 2861 returning=returning or self._parse_returning(), 2862 overwrite=overwrite, 2863 alternative=alternative, 2864 ignore=ignore, 2865 source=self._match(TokenType.TABLE) and self._parse_table(), 2866 ) 2867 2868 def _parse_kill(self) -> exp.Kill: 2869 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2870 2871 return self.expression( 2872 exp.Kill, 2873 this=self._parse_primary(), 2874 kind=kind, 2875 ) 2876 2877 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2878 conflict = self._match_text_seq("ON", "CONFLICT") 2879 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2880 2881 if not conflict and not duplicate: 2882 return None 2883 2884 conflict_keys = None 2885 constraint = None 2886 2887 if conflict: 2888 if self._match_text_seq("ON", "CONSTRAINT"): 2889 constraint = self._parse_id_var() 2890 elif self._match(TokenType.L_PAREN): 2891 conflict_keys = self._parse_csv(self._parse_id_var) 2892 self._match_r_paren() 2893 2894 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2895 if self._prev.token_type == TokenType.UPDATE: 2896 self._match(TokenType.SET) 2897 expressions = self._parse_csv(self._parse_equality) 2898 else: 2899 expressions = None 2900 2901 return self.expression( 2902 exp.OnConflict, 2903 duplicate=duplicate, 2904 expressions=expressions, 2905 action=action, 2906 conflict_keys=conflict_keys, 2907 constraint=constraint, 2908 where=self._parse_where(), 2909 ) 2910 2911 def _parse_returning(self) -> t.Optional[exp.Returning]: 2912 if not self._match(TokenType.RETURNING): 2913 return None 2914 return self.expression( 2915 exp.Returning, 2916 expressions=self._parse_csv(self._parse_expression), 2917 into=self._match(TokenType.INTO) and self._parse_table_part(), 2918 ) 2919 2920 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2921 if not self._match(TokenType.FORMAT): 2922 return None 2923 return self._parse_row_format() 2924 2925 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2926 index = self._index 2927 with_ = with_ or self._match_text_seq("WITH") 2928 2929 if not self._match(TokenType.SERDE_PROPERTIES): 2930 self._retreat(index) 2931 return None 2932 return self.expression( 2933 exp.SerdeProperties, 2934 **{ # type: ignore 2935 "expressions": self._parse_wrapped_properties(), 2936 "with": with_, 2937 }, 2938 ) 2939 2940 def _parse_row_format( 2941 self, match_row: bool = False 2942 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2943 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2944 return None 2945 2946 if self._match_text_seq("SERDE"): 2947 this = self._parse_string() 2948 2949 serde_properties = self._parse_serde_properties() 2950 2951 return self.expression( 2952 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2953 ) 2954 2955 self._match_text_seq("DELIMITED") 2956 2957 kwargs = {} 2958 2959 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2960 kwargs["fields"] = self._parse_string() 2961 if self._match_text_seq("ESCAPED", "BY"): 2962 kwargs["escaped"] = self._parse_string() 2963 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2964 kwargs["collection_items"] = self._parse_string() 2965 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2966 kwargs["map_keys"] = self._parse_string() 2967 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2968 kwargs["lines"] = self._parse_string() 2969 if self._match_text_seq("NULL", "DEFINED", "AS"): 2970 kwargs["null"] = self._parse_string() 2971 2972 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2973 2974 def _parse_load(self) -> exp.LoadData | exp.Command: 2975 if self._match_text_seq("DATA"): 2976 local = self._match_text_seq("LOCAL") 2977 self._match_text_seq("INPATH") 2978 inpath = self._parse_string() 2979 overwrite = self._match(TokenType.OVERWRITE) 2980 self._match_pair(TokenType.INTO, TokenType.TABLE) 2981 2982 return self.expression( 2983 exp.LoadData, 2984 this=self._parse_table(schema=True), 2985 local=local, 2986 overwrite=overwrite, 2987 inpath=inpath, 2988 partition=self._parse_partition(), 2989 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2990 serde=self._match_text_seq("SERDE") and self._parse_string(), 2991 ) 2992 return self._parse_as_command(self._prev) 2993 2994 def _parse_delete(self) -> exp.Delete: 2995 # This handles MySQL's "Multiple-Table Syntax" 2996 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2997 tables = None 2998 if not self._match(TokenType.FROM, advance=False): 2999 tables = self._parse_csv(self._parse_table) or None 3000 3001 returning = self._parse_returning() 3002 3003 return self.expression( 3004 exp.Delete, 3005 tables=tables, 3006 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3007 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3008 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3009 where=self._parse_where(), 3010 returning=returning or self._parse_returning(), 3011 limit=self._parse_limit(), 3012 ) 3013 3014 def _parse_update(self) -> exp.Update: 3015 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3016 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3017 returning = self._parse_returning() 3018 return self.expression( 3019 exp.Update, 3020 **{ # type: ignore 3021 "this": this, 3022 "expressions": expressions, 3023 "from": self._parse_from(joins=True), 3024 "where": self._parse_where(), 3025 "returning": returning or self._parse_returning(), 3026 "order": self._parse_order(), 3027 "limit": self._parse_limit(), 3028 }, 3029 ) 3030 3031 def _parse_use(self) -> exp.Use: 3032 return self.expression( 3033 exp.Use, 3034 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3035 this=self._parse_table(schema=False), 3036 ) 3037 3038 def _parse_uncache(self) -> exp.Uncache: 3039 if not self._match(TokenType.TABLE): 3040 self.raise_error("Expecting TABLE after UNCACHE") 3041 3042 return self.expression( 3043 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3044 ) 3045 3046 def _parse_cache(self) -> exp.Cache: 3047 lazy = self._match_text_seq("LAZY") 3048 self._match(TokenType.TABLE) 3049 table = self._parse_table(schema=True) 3050 3051 options = [] 3052 if self._match_text_seq("OPTIONS"): 3053 self._match_l_paren() 3054 k = self._parse_string() 3055 self._match(TokenType.EQ) 3056 v = self._parse_string() 3057 options = [k, v] 3058 self._match_r_paren() 3059 3060 self._match(TokenType.ALIAS) 3061 return self.expression( 3062 exp.Cache, 3063 this=table, 3064 lazy=lazy, 3065 options=options, 3066 expression=self._parse_select(nested=True), 3067 ) 3068 3069 def _parse_partition(self) -> t.Optional[exp.Partition]: 3070 if not self._match_texts(self.PARTITION_KEYWORDS): 3071 return None 3072 3073 return self.expression( 3074 exp.Partition, 3075 subpartition=self._prev.text.upper() == "SUBPARTITION", 3076 expressions=self._parse_wrapped_csv(self._parse_assignment), 3077 ) 3078 3079 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3080 def _parse_value_expression() -> t.Optional[exp.Expression]: 3081 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3082 return exp.var(self._prev.text.upper()) 3083 return self._parse_expression() 3084 3085 if self._match(TokenType.L_PAREN): 3086 expressions = self._parse_csv(_parse_value_expression) 3087 self._match_r_paren() 3088 return self.expression(exp.Tuple, expressions=expressions) 3089 3090 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3091 expression = self._parse_expression() 3092 if expression: 3093 return self.expression(exp.Tuple, expressions=[expression]) 3094 return None 3095 3096 def _parse_projections(self) -> t.List[exp.Expression]: 3097 return self._parse_expressions() 3098 3099 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3100 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3101 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3102 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3103 ) 3104 elif self._match(TokenType.FROM): 3105 from_ = self._parse_from(skip_from_token=True) 3106 # Support parentheses for duckdb FROM-first syntax 3107 select = self._parse_select() 3108 if select: 3109 select.set("from", from_) 3110 this = select 3111 else: 3112 this = exp.select("*").from_(t.cast(exp.From, from_)) 3113 else: 3114 this = ( 3115 self._parse_table() 3116 if table 3117 else self._parse_select(nested=True, parse_set_operation=False) 3118 ) 3119 3120 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3121 # in case a modifier (e.g. join) is following 3122 if table and isinstance(this, exp.Values) and this.alias: 3123 alias = this.args["alias"].pop() 3124 this = exp.Table(this=this, alias=alias) 3125 3126 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3127 3128 return this 3129 3130 def _parse_select( 3131 self, 3132 nested: bool = False, 3133 table: bool = False, 3134 parse_subquery_alias: bool = True, 3135 parse_set_operation: bool = True, 3136 ) -> t.Optional[exp.Expression]: 3137 cte = self._parse_with() 3138 3139 if cte: 3140 this = self._parse_statement() 3141 3142 if not this: 3143 self.raise_error("Failed to parse any statement following CTE") 3144 return cte 3145 3146 if "with" in this.arg_types: 3147 this.set("with", cte) 3148 else: 3149 self.raise_error(f"{this.key} does not support CTE") 3150 this = cte 3151 3152 return this 3153 3154 # duckdb supports leading with FROM x 3155 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3156 3157 if self._match(TokenType.SELECT): 3158 comments = self._prev_comments 3159 3160 hint = self._parse_hint() 3161 3162 if self._next and not self._next.token_type == TokenType.DOT: 3163 all_ = self._match(TokenType.ALL) 3164 distinct = self._match_set(self.DISTINCT_TOKENS) 3165 else: 3166 all_, distinct = None, None 3167 3168 kind = ( 3169 self._match(TokenType.ALIAS) 3170 and self._match_texts(("STRUCT", "VALUE")) 3171 and self._prev.text.upper() 3172 ) 3173 3174 if distinct: 3175 distinct = self.expression( 3176 exp.Distinct, 3177 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3178 ) 3179 3180 if all_ and distinct: 3181 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3182 3183 operation_modifiers = [] 3184 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3185 operation_modifiers.append(exp.var(self._prev.text.upper())) 3186 3187 limit = self._parse_limit(top=True) 3188 projections = self._parse_projections() 3189 3190 this = self.expression( 3191 exp.Select, 3192 kind=kind, 3193 hint=hint, 3194 distinct=distinct, 3195 expressions=projections, 3196 limit=limit, 3197 operation_modifiers=operation_modifiers or None, 3198 ) 3199 this.comments = comments 3200 3201 into = self._parse_into() 3202 if into: 3203 this.set("into", into) 3204 3205 if not from_: 3206 from_ = self._parse_from() 3207 3208 if from_: 3209 this.set("from", from_) 3210 3211 this = self._parse_query_modifiers(this) 3212 elif (table or nested) and self._match(TokenType.L_PAREN): 3213 this = self._parse_wrapped_select(table=table) 3214 3215 # We return early here so that the UNION isn't attached to the subquery by the 3216 # following call to _parse_set_operations, but instead becomes the parent node 3217 self._match_r_paren() 3218 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3219 elif self._match(TokenType.VALUES, advance=False): 3220 this = self._parse_derived_table_values() 3221 elif from_: 3222 this = exp.select("*").from_(from_.this, copy=False) 3223 elif self._match(TokenType.SUMMARIZE): 3224 table = self._match(TokenType.TABLE) 3225 this = self._parse_select() or self._parse_string() or self._parse_table() 3226 return self.expression(exp.Summarize, this=this, table=table) 3227 elif self._match(TokenType.DESCRIBE): 3228 this = self._parse_describe() 3229 elif self._match_text_seq("STREAM"): 3230 this = self._parse_function() 3231 if this: 3232 this = self.expression(exp.Stream, this=this) 3233 else: 3234 self._retreat(self._index - 1) 3235 else: 3236 this = None 3237 3238 return self._parse_set_operations(this) if parse_set_operation else this 3239 3240 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3241 self._match_text_seq("SEARCH") 3242 3243 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3244 3245 if not kind: 3246 return None 3247 3248 self._match_text_seq("FIRST", "BY") 3249 3250 return self.expression( 3251 exp.RecursiveWithSearch, 3252 kind=kind, 3253 this=self._parse_id_var(), 3254 expression=self._match_text_seq("SET") and self._parse_id_var(), 3255 using=self._match_text_seq("USING") and self._parse_id_var(), 3256 ) 3257 3258 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3259 if not skip_with_token and not self._match(TokenType.WITH): 3260 return None 3261 3262 comments = self._prev_comments 3263 recursive = self._match(TokenType.RECURSIVE) 3264 3265 last_comments = None 3266 expressions = [] 3267 while True: 3268 cte = self._parse_cte() 3269 if isinstance(cte, exp.CTE): 3270 expressions.append(cte) 3271 if last_comments: 3272 cte.add_comments(last_comments) 3273 3274 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3275 break 3276 else: 3277 self._match(TokenType.WITH) 3278 3279 last_comments = self._prev_comments 3280 3281 return self.expression( 3282 exp.With, 3283 comments=comments, 3284 expressions=expressions, 3285 recursive=recursive, 3286 search=self._parse_recursive_with_search(), 3287 ) 3288 3289 def _parse_cte(self) -> t.Optional[exp.CTE]: 3290 index = self._index 3291 3292 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3293 if not alias or not alias.this: 3294 self.raise_error("Expected CTE to have alias") 3295 3296 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3297 self._retreat(index) 3298 return None 3299 3300 comments = self._prev_comments 3301 3302 if self._match_text_seq("NOT", "MATERIALIZED"): 3303 materialized = False 3304 elif self._match_text_seq("MATERIALIZED"): 3305 materialized = True 3306 else: 3307 materialized = None 3308 3309 cte = self.expression( 3310 exp.CTE, 3311 this=self._parse_wrapped(self._parse_statement), 3312 alias=alias, 3313 materialized=materialized, 3314 comments=comments, 3315 ) 3316 3317 if isinstance(cte.this, exp.Values): 3318 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3319 3320 return cte 3321 3322 def _parse_table_alias( 3323 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3324 ) -> t.Optional[exp.TableAlias]: 3325 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3326 # so this section tries to parse the clause version and if it fails, it treats the token 3327 # as an identifier (alias) 3328 if self._can_parse_limit_or_offset(): 3329 return None 3330 3331 any_token = self._match(TokenType.ALIAS) 3332 alias = ( 3333 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3334 or self._parse_string_as_identifier() 3335 ) 3336 3337 index = self._index 3338 if self._match(TokenType.L_PAREN): 3339 columns = self._parse_csv(self._parse_function_parameter) 3340 self._match_r_paren() if columns else self._retreat(index) 3341 else: 3342 columns = None 3343 3344 if not alias and not columns: 3345 return None 3346 3347 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3348 3349 # We bubble up comments from the Identifier to the TableAlias 3350 if isinstance(alias, exp.Identifier): 3351 table_alias.add_comments(alias.pop_comments()) 3352 3353 return table_alias 3354 3355 def _parse_subquery( 3356 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3357 ) -> t.Optional[exp.Subquery]: 3358 if not this: 3359 return None 3360 3361 return self.expression( 3362 exp.Subquery, 3363 this=this, 3364 pivots=self._parse_pivots(), 3365 alias=self._parse_table_alias() if parse_alias else None, 3366 sample=self._parse_table_sample(), 3367 ) 3368 3369 def _implicit_unnests_to_explicit(self, this: E) -> E: 3370 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3371 3372 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3373 for i, join in enumerate(this.args.get("joins") or []): 3374 table = join.this 3375 normalized_table = table.copy() 3376 normalized_table.meta["maybe_column"] = True 3377 normalized_table = _norm(normalized_table, dialect=self.dialect) 3378 3379 if isinstance(table, exp.Table) and not join.args.get("on"): 3380 if normalized_table.parts[0].name in refs: 3381 table_as_column = table.to_column() 3382 unnest = exp.Unnest(expressions=[table_as_column]) 3383 3384 # Table.to_column creates a parent Alias node that we want to convert to 3385 # a TableAlias and attach to the Unnest, so it matches the parser's output 3386 if isinstance(table.args.get("alias"), exp.TableAlias): 3387 table_as_column.replace(table_as_column.this) 3388 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3389 3390 table.replace(unnest) 3391 3392 refs.add(normalized_table.alias_or_name) 3393 3394 return this 3395 3396 def _parse_query_modifiers( 3397 self, this: t.Optional[exp.Expression] 3398 ) -> t.Optional[exp.Expression]: 3399 if isinstance(this, self.MODIFIABLES): 3400 for join in self._parse_joins(): 3401 this.append("joins", join) 3402 for lateral in iter(self._parse_lateral, None): 3403 this.append("laterals", lateral) 3404 3405 while True: 3406 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3407 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3408 key, expression = parser(self) 3409 3410 if expression: 3411 this.set(key, expression) 3412 if key == "limit": 3413 offset = expression.args.pop("offset", None) 3414 3415 if offset: 3416 offset = exp.Offset(expression=offset) 3417 this.set("offset", offset) 3418 3419 limit_by_expressions = expression.expressions 3420 expression.set("expressions", None) 3421 offset.set("expressions", limit_by_expressions) 3422 continue 3423 break 3424 3425 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3426 this = self._implicit_unnests_to_explicit(this) 3427 3428 return this 3429 3430 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3431 start = self._curr 3432 while self._curr: 3433 self._advance() 3434 3435 end = self._tokens[self._index - 1] 3436 return exp.Hint(expressions=[self._find_sql(start, end)]) 3437 3438 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3439 return self._parse_function_call() 3440 3441 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3442 start_index = self._index 3443 should_fallback_to_string = False 3444 3445 hints = [] 3446 try: 3447 for hint in iter( 3448 lambda: self._parse_csv( 3449 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3450 ), 3451 [], 3452 ): 3453 hints.extend(hint) 3454 except ParseError: 3455 should_fallback_to_string = True 3456 3457 if should_fallback_to_string or self._curr: 3458 self._retreat(start_index) 3459 return self._parse_hint_fallback_to_string() 3460 3461 return self.expression(exp.Hint, expressions=hints) 3462 3463 def _parse_hint(self) -> t.Optional[exp.Hint]: 3464 if self._match(TokenType.HINT) and self._prev_comments: 3465 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3466 3467 return None 3468 3469 def _parse_into(self) -> t.Optional[exp.Into]: 3470 if not self._match(TokenType.INTO): 3471 return None 3472 3473 temp = self._match(TokenType.TEMPORARY) 3474 unlogged = self._match_text_seq("UNLOGGED") 3475 self._match(TokenType.TABLE) 3476 3477 return self.expression( 3478 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3479 ) 3480 3481 def _parse_from( 3482 self, joins: bool = False, skip_from_token: bool = False 3483 ) -> t.Optional[exp.From]: 3484 if not skip_from_token and not self._match(TokenType.FROM): 3485 return None 3486 3487 return self.expression( 3488 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3489 ) 3490 3491 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3492 return self.expression( 3493 exp.MatchRecognizeMeasure, 3494 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3495 this=self._parse_expression(), 3496 ) 3497 3498 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3499 if not self._match(TokenType.MATCH_RECOGNIZE): 3500 return None 3501 3502 self._match_l_paren() 3503 3504 partition = self._parse_partition_by() 3505 order = self._parse_order() 3506 3507 measures = ( 3508 self._parse_csv(self._parse_match_recognize_measure) 3509 if self._match_text_seq("MEASURES") 3510 else None 3511 ) 3512 3513 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3514 rows = exp.var("ONE ROW PER MATCH") 3515 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3516 text = "ALL ROWS PER MATCH" 3517 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3518 text += " SHOW EMPTY MATCHES" 3519 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3520 text += " OMIT EMPTY MATCHES" 3521 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3522 text += " WITH UNMATCHED ROWS" 3523 rows = exp.var(text) 3524 else: 3525 rows = None 3526 3527 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3528 text = "AFTER MATCH SKIP" 3529 if self._match_text_seq("PAST", "LAST", "ROW"): 3530 text += " PAST LAST ROW" 3531 elif self._match_text_seq("TO", "NEXT", "ROW"): 3532 text += " TO NEXT ROW" 3533 elif self._match_text_seq("TO", "FIRST"): 3534 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3535 elif self._match_text_seq("TO", "LAST"): 3536 text += f" TO LAST {self._advance_any().text}" # type: ignore 3537 after = exp.var(text) 3538 else: 3539 after = None 3540 3541 if self._match_text_seq("PATTERN"): 3542 self._match_l_paren() 3543 3544 if not self._curr: 3545 self.raise_error("Expecting )", self._curr) 3546 3547 paren = 1 3548 start = self._curr 3549 3550 while self._curr and paren > 0: 3551 if self._curr.token_type == TokenType.L_PAREN: 3552 paren += 1 3553 if self._curr.token_type == TokenType.R_PAREN: 3554 paren -= 1 3555 3556 end = self._prev 3557 self._advance() 3558 3559 if paren > 0: 3560 self.raise_error("Expecting )", self._curr) 3561 3562 pattern = exp.var(self._find_sql(start, end)) 3563 else: 3564 pattern = None 3565 3566 define = ( 3567 self._parse_csv(self._parse_name_as_expression) 3568 if self._match_text_seq("DEFINE") 3569 else None 3570 ) 3571 3572 self._match_r_paren() 3573 3574 return self.expression( 3575 exp.MatchRecognize, 3576 partition_by=partition, 3577 order=order, 3578 measures=measures, 3579 rows=rows, 3580 after=after, 3581 pattern=pattern, 3582 define=define, 3583 alias=self._parse_table_alias(), 3584 ) 3585 3586 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3587 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3588 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3589 cross_apply = False 3590 3591 if cross_apply is not None: 3592 this = self._parse_select(table=True) 3593 view = None 3594 outer = None 3595 elif self._match(TokenType.LATERAL): 3596 this = self._parse_select(table=True) 3597 view = self._match(TokenType.VIEW) 3598 outer = self._match(TokenType.OUTER) 3599 else: 3600 return None 3601 3602 if not this: 3603 this = ( 3604 self._parse_unnest() 3605 or self._parse_function() 3606 or self._parse_id_var(any_token=False) 3607 ) 3608 3609 while self._match(TokenType.DOT): 3610 this = exp.Dot( 3611 this=this, 3612 expression=self._parse_function() or self._parse_id_var(any_token=False), 3613 ) 3614 3615 ordinality: t.Optional[bool] = None 3616 3617 if view: 3618 table = self._parse_id_var(any_token=False) 3619 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3620 table_alias: t.Optional[exp.TableAlias] = self.expression( 3621 exp.TableAlias, this=table, columns=columns 3622 ) 3623 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3624 # We move the alias from the lateral's child node to the lateral itself 3625 table_alias = this.args["alias"].pop() 3626 else: 3627 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3628 table_alias = self._parse_table_alias() 3629 3630 return self.expression( 3631 exp.Lateral, 3632 this=this, 3633 view=view, 3634 outer=outer, 3635 alias=table_alias, 3636 cross_apply=cross_apply, 3637 ordinality=ordinality, 3638 ) 3639 3640 def _parse_join_parts( 3641 self, 3642 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3643 return ( 3644 self._match_set(self.JOIN_METHODS) and self._prev, 3645 self._match_set(self.JOIN_SIDES) and self._prev, 3646 self._match_set(self.JOIN_KINDS) and self._prev, 3647 ) 3648 3649 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3650 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3651 this = self._parse_column() 3652 if isinstance(this, exp.Column): 3653 return this.this 3654 return this 3655 3656 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3657 3658 def _parse_join( 3659 self, skip_join_token: bool = False, parse_bracket: bool = False 3660 ) -> t.Optional[exp.Join]: 3661 if self._match(TokenType.COMMA): 3662 table = self._try_parse(self._parse_table) 3663 if table: 3664 return self.expression(exp.Join, this=table) 3665 return None 3666 3667 index = self._index 3668 method, side, kind = self._parse_join_parts() 3669 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3670 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3671 3672 if not skip_join_token and not join: 3673 self._retreat(index) 3674 kind = None 3675 method = None 3676 side = None 3677 3678 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3679 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3680 3681 if not skip_join_token and not join and not outer_apply and not cross_apply: 3682 return None 3683 3684 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3685 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3686 kwargs["expressions"] = self._parse_csv( 3687 lambda: self._parse_table(parse_bracket=parse_bracket) 3688 ) 3689 3690 if method: 3691 kwargs["method"] = method.text 3692 if side: 3693 kwargs["side"] = side.text 3694 if kind: 3695 kwargs["kind"] = kind.text 3696 if hint: 3697 kwargs["hint"] = hint 3698 3699 if self._match(TokenType.MATCH_CONDITION): 3700 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3701 3702 if self._match(TokenType.ON): 3703 kwargs["on"] = self._parse_assignment() 3704 elif self._match(TokenType.USING): 3705 kwargs["using"] = self._parse_using_identifiers() 3706 elif ( 3707 not (outer_apply or cross_apply) 3708 and not isinstance(kwargs["this"], exp.Unnest) 3709 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3710 ): 3711 index = self._index 3712 joins: t.Optional[list] = list(self._parse_joins()) 3713 3714 if joins and self._match(TokenType.ON): 3715 kwargs["on"] = self._parse_assignment() 3716 elif joins and self._match(TokenType.USING): 3717 kwargs["using"] = self._parse_using_identifiers() 3718 else: 3719 joins = None 3720 self._retreat(index) 3721 3722 kwargs["this"].set("joins", joins if joins else None) 3723 3724 comments = [c for token in (method, side, kind) if token for c in token.comments] 3725 return self.expression(exp.Join, comments=comments, **kwargs) 3726 3727 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3728 this = self._parse_assignment() 3729 3730 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3731 return this 3732 3733 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3734 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3735 3736 return this 3737 3738 def _parse_index_params(self) -> exp.IndexParameters: 3739 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3740 3741 if self._match(TokenType.L_PAREN, advance=False): 3742 columns = self._parse_wrapped_csv(self._parse_with_operator) 3743 else: 3744 columns = None 3745 3746 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3747 partition_by = self._parse_partition_by() 3748 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3749 tablespace = ( 3750 self._parse_var(any_token=True) 3751 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3752 else None 3753 ) 3754 where = self._parse_where() 3755 3756 on = self._parse_field() if self._match(TokenType.ON) else None 3757 3758 return self.expression( 3759 exp.IndexParameters, 3760 using=using, 3761 columns=columns, 3762 include=include, 3763 partition_by=partition_by, 3764 where=where, 3765 with_storage=with_storage, 3766 tablespace=tablespace, 3767 on=on, 3768 ) 3769 3770 def _parse_index( 3771 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3772 ) -> t.Optional[exp.Index]: 3773 if index or anonymous: 3774 unique = None 3775 primary = None 3776 amp = None 3777 3778 self._match(TokenType.ON) 3779 self._match(TokenType.TABLE) # hive 3780 table = self._parse_table_parts(schema=True) 3781 else: 3782 unique = self._match(TokenType.UNIQUE) 3783 primary = self._match_text_seq("PRIMARY") 3784 amp = self._match_text_seq("AMP") 3785 3786 if not self._match(TokenType.INDEX): 3787 return None 3788 3789 index = self._parse_id_var() 3790 table = None 3791 3792 params = self._parse_index_params() 3793 3794 return self.expression( 3795 exp.Index, 3796 this=index, 3797 table=table, 3798 unique=unique, 3799 primary=primary, 3800 amp=amp, 3801 params=params, 3802 ) 3803 3804 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3805 hints: t.List[exp.Expression] = [] 3806 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3807 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3808 hints.append( 3809 self.expression( 3810 exp.WithTableHint, 3811 expressions=self._parse_csv( 3812 lambda: self._parse_function() or self._parse_var(any_token=True) 3813 ), 3814 ) 3815 ) 3816 self._match_r_paren() 3817 else: 3818 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3819 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3820 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3821 3822 self._match_set((TokenType.INDEX, TokenType.KEY)) 3823 if self._match(TokenType.FOR): 3824 hint.set("target", self._advance_any() and self._prev.text.upper()) 3825 3826 hint.set("expressions", self._parse_wrapped_id_vars()) 3827 hints.append(hint) 3828 3829 return hints or None 3830 3831 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3832 return ( 3833 (not schema and self._parse_function(optional_parens=False)) 3834 or self._parse_id_var(any_token=False) 3835 or self._parse_string_as_identifier() 3836 or self._parse_placeholder() 3837 ) 3838 3839 def _parse_table_parts( 3840 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3841 ) -> exp.Table: 3842 catalog = None 3843 db = None 3844 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3845 3846 while self._match(TokenType.DOT): 3847 if catalog: 3848 # This allows nesting the table in arbitrarily many dot expressions if needed 3849 table = self.expression( 3850 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3851 ) 3852 else: 3853 catalog = db 3854 db = table 3855 # "" used for tsql FROM a..b case 3856 table = self._parse_table_part(schema=schema) or "" 3857 3858 if ( 3859 wildcard 3860 and self._is_connected() 3861 and (isinstance(table, exp.Identifier) or not table) 3862 and self._match(TokenType.STAR) 3863 ): 3864 if isinstance(table, exp.Identifier): 3865 table.args["this"] += "*" 3866 else: 3867 table = exp.Identifier(this="*") 3868 3869 # We bubble up comments from the Identifier to the Table 3870 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3871 3872 if is_db_reference: 3873 catalog = db 3874 db = table 3875 table = None 3876 3877 if not table and not is_db_reference: 3878 self.raise_error(f"Expected table name but got {self._curr}") 3879 if not db and is_db_reference: 3880 self.raise_error(f"Expected database name but got {self._curr}") 3881 3882 table = self.expression( 3883 exp.Table, 3884 comments=comments, 3885 this=table, 3886 db=db, 3887 catalog=catalog, 3888 ) 3889 3890 changes = self._parse_changes() 3891 if changes: 3892 table.set("changes", changes) 3893 3894 at_before = self._parse_historical_data() 3895 if at_before: 3896 table.set("when", at_before) 3897 3898 pivots = self._parse_pivots() 3899 if pivots: 3900 table.set("pivots", pivots) 3901 3902 return table 3903 3904 def _parse_table( 3905 self, 3906 schema: bool = False, 3907 joins: bool = False, 3908 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3909 parse_bracket: bool = False, 3910 is_db_reference: bool = False, 3911 parse_partition: bool = False, 3912 ) -> t.Optional[exp.Expression]: 3913 lateral = self._parse_lateral() 3914 if lateral: 3915 return lateral 3916 3917 unnest = self._parse_unnest() 3918 if unnest: 3919 return unnest 3920 3921 values = self._parse_derived_table_values() 3922 if values: 3923 return values 3924 3925 subquery = self._parse_select(table=True) 3926 if subquery: 3927 if not subquery.args.get("pivots"): 3928 subquery.set("pivots", self._parse_pivots()) 3929 return subquery 3930 3931 bracket = parse_bracket and self._parse_bracket(None) 3932 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3933 3934 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3935 self._parse_table 3936 ) 3937 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3938 3939 only = self._match(TokenType.ONLY) 3940 3941 this = t.cast( 3942 exp.Expression, 3943 bracket 3944 or rows_from 3945 or self._parse_bracket( 3946 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3947 ), 3948 ) 3949 3950 if only: 3951 this.set("only", only) 3952 3953 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3954 self._match_text_seq("*") 3955 3956 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3957 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3958 this.set("partition", self._parse_partition()) 3959 3960 if schema: 3961 return self._parse_schema(this=this) 3962 3963 version = self._parse_version() 3964 3965 if version: 3966 this.set("version", version) 3967 3968 if self.dialect.ALIAS_POST_TABLESAMPLE: 3969 this.set("sample", self._parse_table_sample()) 3970 3971 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3972 if alias: 3973 this.set("alias", alias) 3974 3975 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3976 return self.expression( 3977 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3978 ) 3979 3980 this.set("hints", self._parse_table_hints()) 3981 3982 if not this.args.get("pivots"): 3983 this.set("pivots", self._parse_pivots()) 3984 3985 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3986 this.set("sample", self._parse_table_sample()) 3987 3988 if joins: 3989 for join in self._parse_joins(): 3990 this.append("joins", join) 3991 3992 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3993 this.set("ordinality", True) 3994 this.set("alias", self._parse_table_alias()) 3995 3996 return this 3997 3998 def _parse_version(self) -> t.Optional[exp.Version]: 3999 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4000 this = "TIMESTAMP" 4001 elif self._match(TokenType.VERSION_SNAPSHOT): 4002 this = "VERSION" 4003 else: 4004 return None 4005 4006 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4007 kind = self._prev.text.upper() 4008 start = self._parse_bitwise() 4009 self._match_texts(("TO", "AND")) 4010 end = self._parse_bitwise() 4011 expression: t.Optional[exp.Expression] = self.expression( 4012 exp.Tuple, expressions=[start, end] 4013 ) 4014 elif self._match_text_seq("CONTAINED", "IN"): 4015 kind = "CONTAINED IN" 4016 expression = self.expression( 4017 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4018 ) 4019 elif self._match(TokenType.ALL): 4020 kind = "ALL" 4021 expression = None 4022 else: 4023 self._match_text_seq("AS", "OF") 4024 kind = "AS OF" 4025 expression = self._parse_type() 4026 4027 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4028 4029 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4030 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4031 index = self._index 4032 historical_data = None 4033 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4034 this = self._prev.text.upper() 4035 kind = ( 4036 self._match(TokenType.L_PAREN) 4037 and self._match_texts(self.HISTORICAL_DATA_KIND) 4038 and self._prev.text.upper() 4039 ) 4040 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4041 4042 if expression: 4043 self._match_r_paren() 4044 historical_data = self.expression( 4045 exp.HistoricalData, this=this, kind=kind, expression=expression 4046 ) 4047 else: 4048 self._retreat(index) 4049 4050 return historical_data 4051 4052 def _parse_changes(self) -> t.Optional[exp.Changes]: 4053 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4054 return None 4055 4056 information = self._parse_var(any_token=True) 4057 self._match_r_paren() 4058 4059 return self.expression( 4060 exp.Changes, 4061 information=information, 4062 at_before=self._parse_historical_data(), 4063 end=self._parse_historical_data(), 4064 ) 4065 4066 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4067 if not self._match(TokenType.UNNEST): 4068 return None 4069 4070 expressions = self._parse_wrapped_csv(self._parse_equality) 4071 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4072 4073 alias = self._parse_table_alias() if with_alias else None 4074 4075 if alias: 4076 if self.dialect.UNNEST_COLUMN_ONLY: 4077 if alias.args.get("columns"): 4078 self.raise_error("Unexpected extra column alias in unnest.") 4079 4080 alias.set("columns", [alias.this]) 4081 alias.set("this", None) 4082 4083 columns = alias.args.get("columns") or [] 4084 if offset and len(expressions) < len(columns): 4085 offset = columns.pop() 4086 4087 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4088 self._match(TokenType.ALIAS) 4089 offset = self._parse_id_var( 4090 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4091 ) or exp.to_identifier("offset") 4092 4093 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4094 4095 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4096 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4097 if not is_derived and not ( 4098 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4099 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4100 ): 4101 return None 4102 4103 expressions = self._parse_csv(self._parse_value) 4104 alias = self._parse_table_alias() 4105 4106 if is_derived: 4107 self._match_r_paren() 4108 4109 return self.expression( 4110 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4111 ) 4112 4113 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4114 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4115 as_modifier and self._match_text_seq("USING", "SAMPLE") 4116 ): 4117 return None 4118 4119 bucket_numerator = None 4120 bucket_denominator = None 4121 bucket_field = None 4122 percent = None 4123 size = None 4124 seed = None 4125 4126 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4127 matched_l_paren = self._match(TokenType.L_PAREN) 4128 4129 if self.TABLESAMPLE_CSV: 4130 num = None 4131 expressions = self._parse_csv(self._parse_primary) 4132 else: 4133 expressions = None 4134 num = ( 4135 self._parse_factor() 4136 if self._match(TokenType.NUMBER, advance=False) 4137 else self._parse_primary() or self._parse_placeholder() 4138 ) 4139 4140 if self._match_text_seq("BUCKET"): 4141 bucket_numerator = self._parse_number() 4142 self._match_text_seq("OUT", "OF") 4143 bucket_denominator = bucket_denominator = self._parse_number() 4144 self._match(TokenType.ON) 4145 bucket_field = self._parse_field() 4146 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4147 percent = num 4148 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4149 size = num 4150 else: 4151 percent = num 4152 4153 if matched_l_paren: 4154 self._match_r_paren() 4155 4156 if self._match(TokenType.L_PAREN): 4157 method = self._parse_var(upper=True) 4158 seed = self._match(TokenType.COMMA) and self._parse_number() 4159 self._match_r_paren() 4160 elif self._match_texts(("SEED", "REPEATABLE")): 4161 seed = self._parse_wrapped(self._parse_number) 4162 4163 if not method and self.DEFAULT_SAMPLING_METHOD: 4164 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4165 4166 return self.expression( 4167 exp.TableSample, 4168 expressions=expressions, 4169 method=method, 4170 bucket_numerator=bucket_numerator, 4171 bucket_denominator=bucket_denominator, 4172 bucket_field=bucket_field, 4173 percent=percent, 4174 size=size, 4175 seed=seed, 4176 ) 4177 4178 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4179 return list(iter(self._parse_pivot, None)) or None 4180 4181 def _parse_joins(self) -> t.Iterator[exp.Join]: 4182 return iter(self._parse_join, None) 4183 4184 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4185 if not self._match(TokenType.INTO): 4186 return None 4187 4188 return self.expression( 4189 exp.UnpivotColumns, 4190 this=self._match_text_seq("NAME") and self._parse_column(), 4191 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4192 ) 4193 4194 # https://duckdb.org/docs/sql/statements/pivot 4195 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4196 def _parse_on() -> t.Optional[exp.Expression]: 4197 this = self._parse_bitwise() 4198 4199 if self._match(TokenType.IN): 4200 # PIVOT ... ON col IN (row_val1, row_val2) 4201 return self._parse_in(this) 4202 if self._match(TokenType.ALIAS, advance=False): 4203 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4204 return self._parse_alias(this) 4205 4206 return this 4207 4208 this = self._parse_table() 4209 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4210 into = self._parse_unpivot_columns() 4211 using = self._match(TokenType.USING) and self._parse_csv( 4212 lambda: self._parse_alias(self._parse_function()) 4213 ) 4214 group = self._parse_group() 4215 4216 return self.expression( 4217 exp.Pivot, 4218 this=this, 4219 expressions=expressions, 4220 using=using, 4221 group=group, 4222 unpivot=is_unpivot, 4223 into=into, 4224 ) 4225 4226 def _parse_pivot_in(self) -> exp.In: 4227 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4228 this = self._parse_select_or_expression() 4229 4230 self._match(TokenType.ALIAS) 4231 alias = self._parse_bitwise() 4232 if alias: 4233 if isinstance(alias, exp.Column) and not alias.db: 4234 alias = alias.this 4235 return self.expression(exp.PivotAlias, this=this, alias=alias) 4236 4237 return this 4238 4239 value = self._parse_column() 4240 4241 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4242 self.raise_error("Expecting IN (") 4243 4244 if self._match(TokenType.ANY): 4245 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4246 else: 4247 exprs = self._parse_csv(_parse_aliased_expression) 4248 4249 self._match_r_paren() 4250 return self.expression(exp.In, this=value, expressions=exprs) 4251 4252 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4253 index = self._index 4254 include_nulls = None 4255 4256 if self._match(TokenType.PIVOT): 4257 unpivot = False 4258 elif self._match(TokenType.UNPIVOT): 4259 unpivot = True 4260 4261 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4262 if self._match_text_seq("INCLUDE", "NULLS"): 4263 include_nulls = True 4264 elif self._match_text_seq("EXCLUDE", "NULLS"): 4265 include_nulls = False 4266 else: 4267 return None 4268 4269 expressions = [] 4270 4271 if not self._match(TokenType.L_PAREN): 4272 self._retreat(index) 4273 return None 4274 4275 if unpivot: 4276 expressions = self._parse_csv(self._parse_column) 4277 else: 4278 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4279 4280 if not expressions: 4281 self.raise_error("Failed to parse PIVOT's aggregation list") 4282 4283 if not self._match(TokenType.FOR): 4284 self.raise_error("Expecting FOR") 4285 4286 fields = [] 4287 while True: 4288 field = self._try_parse(self._parse_pivot_in) 4289 if not field: 4290 break 4291 fields.append(field) 4292 4293 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4294 self._parse_bitwise 4295 ) 4296 4297 group = self._parse_group() 4298 4299 self._match_r_paren() 4300 4301 pivot = self.expression( 4302 exp.Pivot, 4303 expressions=expressions, 4304 fields=fields, 4305 unpivot=unpivot, 4306 include_nulls=include_nulls, 4307 default_on_null=default_on_null, 4308 group=group, 4309 ) 4310 4311 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4312 pivot.set("alias", self._parse_table_alias()) 4313 4314 if not unpivot: 4315 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4316 4317 columns: t.List[exp.Expression] = [] 4318 all_fields = [] 4319 for pivot_field in pivot.fields: 4320 pivot_field_expressions = pivot_field.expressions 4321 4322 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4323 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4324 continue 4325 4326 all_fields.append( 4327 [ 4328 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4329 for fld in pivot_field_expressions 4330 ] 4331 ) 4332 4333 if all_fields: 4334 if names: 4335 all_fields.append(names) 4336 4337 # Generate all possible combinations of the pivot columns 4338 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4339 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4340 for fld_parts_tuple in itertools.product(*all_fields): 4341 fld_parts = list(fld_parts_tuple) 4342 4343 if names and self.PREFIXED_PIVOT_COLUMNS: 4344 # Move the "name" to the front of the list 4345 fld_parts.insert(0, fld_parts.pop(-1)) 4346 4347 columns.append(exp.to_identifier("_".join(fld_parts))) 4348 4349 pivot.set("columns", columns) 4350 4351 return pivot 4352 4353 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4354 return [agg.alias for agg in aggregations if agg.alias] 4355 4356 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4357 if not skip_where_token and not self._match(TokenType.PREWHERE): 4358 return None 4359 4360 return self.expression( 4361 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4362 ) 4363 4364 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4365 if not skip_where_token and not self._match(TokenType.WHERE): 4366 return None 4367 4368 return self.expression( 4369 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4370 ) 4371 4372 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4373 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4374 return None 4375 4376 elements: t.Dict[str, t.Any] = defaultdict(list) 4377 4378 if self._match(TokenType.ALL): 4379 elements["all"] = True 4380 elif self._match(TokenType.DISTINCT): 4381 elements["all"] = False 4382 4383 while True: 4384 index = self._index 4385 4386 elements["expressions"].extend( 4387 self._parse_csv( 4388 lambda: None 4389 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4390 else self._parse_assignment() 4391 ) 4392 ) 4393 4394 before_with_index = self._index 4395 with_prefix = self._match(TokenType.WITH) 4396 4397 if self._match(TokenType.ROLLUP): 4398 elements["rollup"].append( 4399 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4400 ) 4401 elif self._match(TokenType.CUBE): 4402 elements["cube"].append( 4403 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4404 ) 4405 elif self._match(TokenType.GROUPING_SETS): 4406 elements["grouping_sets"].append( 4407 self.expression( 4408 exp.GroupingSets, 4409 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4410 ) 4411 ) 4412 elif self._match_text_seq("TOTALS"): 4413 elements["totals"] = True # type: ignore 4414 4415 if before_with_index <= self._index <= before_with_index + 1: 4416 self._retreat(before_with_index) 4417 break 4418 4419 if index == self._index: 4420 break 4421 4422 return self.expression(exp.Group, **elements) # type: ignore 4423 4424 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4425 return self.expression( 4426 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4427 ) 4428 4429 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4430 if self._match(TokenType.L_PAREN): 4431 grouping_set = self._parse_csv(self._parse_column) 4432 self._match_r_paren() 4433 return self.expression(exp.Tuple, expressions=grouping_set) 4434 4435 return self._parse_column() 4436 4437 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4438 if not skip_having_token and not self._match(TokenType.HAVING): 4439 return None 4440 return self.expression(exp.Having, this=self._parse_assignment()) 4441 4442 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4443 if not self._match(TokenType.QUALIFY): 4444 return None 4445 return self.expression(exp.Qualify, this=self._parse_assignment()) 4446 4447 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4448 if skip_start_token: 4449 start = None 4450 elif self._match(TokenType.START_WITH): 4451 start = self._parse_assignment() 4452 else: 4453 return None 4454 4455 self._match(TokenType.CONNECT_BY) 4456 nocycle = self._match_text_seq("NOCYCLE") 4457 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4458 exp.Prior, this=self._parse_bitwise() 4459 ) 4460 connect = self._parse_assignment() 4461 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4462 4463 if not start and self._match(TokenType.START_WITH): 4464 start = self._parse_assignment() 4465 4466 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4467 4468 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4469 this = self._parse_id_var(any_token=True) 4470 if self._match(TokenType.ALIAS): 4471 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4472 return this 4473 4474 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4475 if self._match_text_seq("INTERPOLATE"): 4476 return self._parse_wrapped_csv(self._parse_name_as_expression) 4477 return None 4478 4479 def _parse_order( 4480 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4481 ) -> t.Optional[exp.Expression]: 4482 siblings = None 4483 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4484 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4485 return this 4486 4487 siblings = True 4488 4489 return self.expression( 4490 exp.Order, 4491 this=this, 4492 expressions=self._parse_csv(self._parse_ordered), 4493 siblings=siblings, 4494 ) 4495 4496 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4497 if not self._match(token): 4498 return None 4499 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4500 4501 def _parse_ordered( 4502 self, parse_method: t.Optional[t.Callable] = None 4503 ) -> t.Optional[exp.Ordered]: 4504 this = parse_method() if parse_method else self._parse_assignment() 4505 if not this: 4506 return None 4507 4508 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4509 this = exp.var("ALL") 4510 4511 asc = self._match(TokenType.ASC) 4512 desc = self._match(TokenType.DESC) or (asc and False) 4513 4514 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4515 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4516 4517 nulls_first = is_nulls_first or False 4518 explicitly_null_ordered = is_nulls_first or is_nulls_last 4519 4520 if ( 4521 not explicitly_null_ordered 4522 and ( 4523 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4524 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4525 ) 4526 and self.dialect.NULL_ORDERING != "nulls_are_last" 4527 ): 4528 nulls_first = True 4529 4530 if self._match_text_seq("WITH", "FILL"): 4531 with_fill = self.expression( 4532 exp.WithFill, 4533 **{ # type: ignore 4534 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4535 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4536 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4537 "interpolate": self._parse_interpolate(), 4538 }, 4539 ) 4540 else: 4541 with_fill = None 4542 4543 return self.expression( 4544 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4545 ) 4546 4547 def _parse_limit_options(self) -> exp.LimitOptions: 4548 percent = self._match(TokenType.PERCENT) 4549 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4550 self._match_text_seq("ONLY") 4551 with_ties = self._match_text_seq("WITH", "TIES") 4552 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4553 4554 def _parse_limit( 4555 self, 4556 this: t.Optional[exp.Expression] = None, 4557 top: bool = False, 4558 skip_limit_token: bool = False, 4559 ) -> t.Optional[exp.Expression]: 4560 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4561 comments = self._prev_comments 4562 if top: 4563 limit_paren = self._match(TokenType.L_PAREN) 4564 expression = self._parse_term() if limit_paren else self._parse_number() 4565 4566 if limit_paren: 4567 self._match_r_paren() 4568 4569 limit_options = self._parse_limit_options() 4570 else: 4571 limit_options = None 4572 expression = self._parse_term() 4573 4574 if self._match(TokenType.COMMA): 4575 offset = expression 4576 expression = self._parse_term() 4577 else: 4578 offset = None 4579 4580 limit_exp = self.expression( 4581 exp.Limit, 4582 this=this, 4583 expression=expression, 4584 offset=offset, 4585 comments=comments, 4586 limit_options=limit_options, 4587 expressions=self._parse_limit_by(), 4588 ) 4589 4590 return limit_exp 4591 4592 if self._match(TokenType.FETCH): 4593 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4594 direction = self._prev.text.upper() if direction else "FIRST" 4595 4596 count = self._parse_field(tokens=self.FETCH_TOKENS) 4597 4598 return self.expression( 4599 exp.Fetch, 4600 direction=direction, 4601 count=count, 4602 limit_options=self._parse_limit_options(), 4603 ) 4604 4605 return this 4606 4607 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4608 if not self._match(TokenType.OFFSET): 4609 return this 4610 4611 count = self._parse_term() 4612 self._match_set((TokenType.ROW, TokenType.ROWS)) 4613 4614 return self.expression( 4615 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4616 ) 4617 4618 def _can_parse_limit_or_offset(self) -> bool: 4619 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4620 return False 4621 4622 index = self._index 4623 result = bool( 4624 self._try_parse(self._parse_limit, retreat=True) 4625 or self._try_parse(self._parse_offset, retreat=True) 4626 ) 4627 self._retreat(index) 4628 return result 4629 4630 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4631 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4632 4633 def _parse_locks(self) -> t.List[exp.Lock]: 4634 locks = [] 4635 while True: 4636 if self._match_text_seq("FOR", "UPDATE"): 4637 update = True 4638 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4639 "LOCK", "IN", "SHARE", "MODE" 4640 ): 4641 update = False 4642 else: 4643 break 4644 4645 expressions = None 4646 if self._match_text_seq("OF"): 4647 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4648 4649 wait: t.Optional[bool | exp.Expression] = None 4650 if self._match_text_seq("NOWAIT"): 4651 wait = True 4652 elif self._match_text_seq("WAIT"): 4653 wait = self._parse_primary() 4654 elif self._match_text_seq("SKIP", "LOCKED"): 4655 wait = False 4656 4657 locks.append( 4658 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4659 ) 4660 4661 return locks 4662 4663 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4664 start = self._index 4665 _, side_token, kind_token = self._parse_join_parts() 4666 4667 side = side_token.text if side_token else None 4668 kind = kind_token.text if kind_token else None 4669 4670 if not self._match_set(self.SET_OPERATIONS): 4671 self._retreat(start) 4672 return None 4673 4674 token_type = self._prev.token_type 4675 4676 if token_type == TokenType.UNION: 4677 operation: t.Type[exp.SetOperation] = exp.Union 4678 elif token_type == TokenType.EXCEPT: 4679 operation = exp.Except 4680 else: 4681 operation = exp.Intersect 4682 4683 comments = self._prev.comments 4684 4685 if self._match(TokenType.DISTINCT): 4686 distinct: t.Optional[bool] = True 4687 elif self._match(TokenType.ALL): 4688 distinct = False 4689 else: 4690 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4691 if distinct is None: 4692 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4693 4694 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4695 "STRICT", "CORRESPONDING" 4696 ) 4697 if self._match_text_seq("CORRESPONDING"): 4698 by_name = True 4699 if not side and not kind: 4700 kind = "INNER" 4701 4702 on_column_list = None 4703 if by_name and self._match_texts(("ON", "BY")): 4704 on_column_list = self._parse_wrapped_csv(self._parse_column) 4705 4706 expression = self._parse_select(nested=True, parse_set_operation=False) 4707 4708 return self.expression( 4709 operation, 4710 comments=comments, 4711 this=this, 4712 distinct=distinct, 4713 by_name=by_name, 4714 expression=expression, 4715 side=side, 4716 kind=kind, 4717 on=on_column_list, 4718 ) 4719 4720 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4721 while True: 4722 setop = self.parse_set_operation(this) 4723 if not setop: 4724 break 4725 this = setop 4726 4727 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4728 expression = this.expression 4729 4730 if expression: 4731 for arg in self.SET_OP_MODIFIERS: 4732 expr = expression.args.get(arg) 4733 if expr: 4734 this.set(arg, expr.pop()) 4735 4736 return this 4737 4738 def _parse_expression(self) -> t.Optional[exp.Expression]: 4739 return self._parse_alias(self._parse_assignment()) 4740 4741 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4742 this = self._parse_disjunction() 4743 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4744 # This allows us to parse <non-identifier token> := <expr> 4745 this = exp.column( 4746 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4747 ) 4748 4749 while self._match_set(self.ASSIGNMENT): 4750 if isinstance(this, exp.Column) and len(this.parts) == 1: 4751 this = this.this 4752 4753 this = self.expression( 4754 self.ASSIGNMENT[self._prev.token_type], 4755 this=this, 4756 comments=self._prev_comments, 4757 expression=self._parse_assignment(), 4758 ) 4759 4760 return this 4761 4762 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4763 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4764 4765 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4766 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4767 4768 def _parse_equality(self) -> t.Optional[exp.Expression]: 4769 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4770 4771 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4772 return self._parse_tokens(self._parse_range, self.COMPARISON) 4773 4774 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4775 this = this or self._parse_bitwise() 4776 negate = self._match(TokenType.NOT) 4777 4778 if self._match_set(self.RANGE_PARSERS): 4779 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4780 if not expression: 4781 return this 4782 4783 this = expression 4784 elif self._match(TokenType.ISNULL): 4785 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4786 4787 # Postgres supports ISNULL and NOTNULL for conditions. 4788 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4789 if self._match(TokenType.NOTNULL): 4790 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4791 this = self.expression(exp.Not, this=this) 4792 4793 if negate: 4794 this = self._negate_range(this) 4795 4796 if self._match(TokenType.IS): 4797 this = self._parse_is(this) 4798 4799 return this 4800 4801 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4802 if not this: 4803 return this 4804 4805 return self.expression(exp.Not, this=this) 4806 4807 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4808 index = self._index - 1 4809 negate = self._match(TokenType.NOT) 4810 4811 if self._match_text_seq("DISTINCT", "FROM"): 4812 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4813 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4814 4815 if self._match(TokenType.JSON): 4816 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4817 4818 if self._match_text_seq("WITH"): 4819 _with = True 4820 elif self._match_text_seq("WITHOUT"): 4821 _with = False 4822 else: 4823 _with = None 4824 4825 unique = self._match(TokenType.UNIQUE) 4826 self._match_text_seq("KEYS") 4827 expression: t.Optional[exp.Expression] = self.expression( 4828 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4829 ) 4830 else: 4831 expression = self._parse_primary() or self._parse_null() 4832 if not expression: 4833 self._retreat(index) 4834 return None 4835 4836 this = self.expression(exp.Is, this=this, expression=expression) 4837 return self.expression(exp.Not, this=this) if negate else this 4838 4839 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4840 unnest = self._parse_unnest(with_alias=False) 4841 if unnest: 4842 this = self.expression(exp.In, this=this, unnest=unnest) 4843 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4844 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4845 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4846 4847 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4848 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4849 else: 4850 this = self.expression(exp.In, this=this, expressions=expressions) 4851 4852 if matched_l_paren: 4853 self._match_r_paren(this) 4854 elif not self._match(TokenType.R_BRACKET, expression=this): 4855 self.raise_error("Expecting ]") 4856 else: 4857 this = self.expression(exp.In, this=this, field=self._parse_column()) 4858 4859 return this 4860 4861 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4862 low = self._parse_bitwise() 4863 self._match(TokenType.AND) 4864 high = self._parse_bitwise() 4865 return self.expression(exp.Between, this=this, low=low, high=high) 4866 4867 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4868 if not self._match(TokenType.ESCAPE): 4869 return this 4870 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4871 4872 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4873 index = self._index 4874 4875 if not self._match(TokenType.INTERVAL) and match_interval: 4876 return None 4877 4878 if self._match(TokenType.STRING, advance=False): 4879 this = self._parse_primary() 4880 else: 4881 this = self._parse_term() 4882 4883 if not this or ( 4884 isinstance(this, exp.Column) 4885 and not this.table 4886 and not this.this.quoted 4887 and this.name.upper() == "IS" 4888 ): 4889 self._retreat(index) 4890 return None 4891 4892 unit = self._parse_function() or ( 4893 not self._match(TokenType.ALIAS, advance=False) 4894 and self._parse_var(any_token=True, upper=True) 4895 ) 4896 4897 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4898 # each INTERVAL expression into this canonical form so it's easy to transpile 4899 if this and this.is_number: 4900 this = exp.Literal.string(this.to_py()) 4901 elif this and this.is_string: 4902 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4903 if parts and unit: 4904 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4905 unit = None 4906 self._retreat(self._index - 1) 4907 4908 if len(parts) == 1: 4909 this = exp.Literal.string(parts[0][0]) 4910 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4911 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4912 unit = self.expression( 4913 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4914 ) 4915 4916 interval = self.expression(exp.Interval, this=this, unit=unit) 4917 4918 index = self._index 4919 self._match(TokenType.PLUS) 4920 4921 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4922 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4923 return self.expression( 4924 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4925 ) 4926 4927 self._retreat(index) 4928 return interval 4929 4930 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4931 this = self._parse_term() 4932 4933 while True: 4934 if self._match_set(self.BITWISE): 4935 this = self.expression( 4936 self.BITWISE[self._prev.token_type], 4937 this=this, 4938 expression=self._parse_term(), 4939 ) 4940 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4941 this = self.expression( 4942 exp.DPipe, 4943 this=this, 4944 expression=self._parse_term(), 4945 safe=not self.dialect.STRICT_STRING_CONCAT, 4946 ) 4947 elif self._match(TokenType.DQMARK): 4948 this = self.expression( 4949 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4950 ) 4951 elif self._match_pair(TokenType.LT, TokenType.LT): 4952 this = self.expression( 4953 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4954 ) 4955 elif self._match_pair(TokenType.GT, TokenType.GT): 4956 this = self.expression( 4957 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4958 ) 4959 else: 4960 break 4961 4962 return this 4963 4964 def _parse_term(self) -> t.Optional[exp.Expression]: 4965 this = self._parse_factor() 4966 4967 while self._match_set(self.TERM): 4968 klass = self.TERM[self._prev.token_type] 4969 comments = self._prev_comments 4970 expression = self._parse_factor() 4971 4972 this = self.expression(klass, this=this, comments=comments, expression=expression) 4973 4974 if isinstance(this, exp.Collate): 4975 expr = this.expression 4976 4977 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4978 # fallback to Identifier / Var 4979 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4980 ident = expr.this 4981 if isinstance(ident, exp.Identifier): 4982 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4983 4984 return this 4985 4986 def _parse_factor(self) -> t.Optional[exp.Expression]: 4987 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4988 this = parse_method() 4989 4990 while self._match_set(self.FACTOR): 4991 klass = self.FACTOR[self._prev.token_type] 4992 comments = self._prev_comments 4993 expression = parse_method() 4994 4995 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4996 self._retreat(self._index - 1) 4997 return this 4998 4999 this = self.expression(klass, this=this, comments=comments, expression=expression) 5000 5001 if isinstance(this, exp.Div): 5002 this.args["typed"] = self.dialect.TYPED_DIVISION 5003 this.args["safe"] = self.dialect.SAFE_DIVISION 5004 5005 return this 5006 5007 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5008 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5009 5010 def _parse_unary(self) -> t.Optional[exp.Expression]: 5011 if self._match_set(self.UNARY_PARSERS): 5012 return self.UNARY_PARSERS[self._prev.token_type](self) 5013 return self._parse_at_time_zone(self._parse_type()) 5014 5015 def _parse_type( 5016 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5017 ) -> t.Optional[exp.Expression]: 5018 interval = parse_interval and self._parse_interval() 5019 if interval: 5020 return interval 5021 5022 index = self._index 5023 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5024 5025 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5026 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5027 if isinstance(data_type, exp.Cast): 5028 # This constructor can contain ops directly after it, for instance struct unnesting: 5029 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5030 return self._parse_column_ops(data_type) 5031 5032 if data_type: 5033 index2 = self._index 5034 this = self._parse_primary() 5035 5036 if isinstance(this, exp.Literal): 5037 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5038 if parser: 5039 return parser(self, this, data_type) 5040 5041 return self.expression(exp.Cast, this=this, to=data_type) 5042 5043 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5044 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5045 # 5046 # If the index difference here is greater than 1, that means the parser itself must have 5047 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5048 # 5049 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5050 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5051 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5052 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5053 # 5054 # In these cases, we don't really want to return the converted type, but instead retreat 5055 # and try to parse a Column or Identifier in the section below. 5056 if data_type.expressions and index2 - index > 1: 5057 self._retreat(index2) 5058 return self._parse_column_ops(data_type) 5059 5060 self._retreat(index) 5061 5062 if fallback_to_identifier: 5063 return self._parse_id_var() 5064 5065 this = self._parse_column() 5066 return this and self._parse_column_ops(this) 5067 5068 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5069 this = self._parse_type() 5070 if not this: 5071 return None 5072 5073 if isinstance(this, exp.Column) and not this.table: 5074 this = exp.var(this.name.upper()) 5075 5076 return self.expression( 5077 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5078 ) 5079 5080 def _parse_types( 5081 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5082 ) -> t.Optional[exp.Expression]: 5083 index = self._index 5084 5085 this: t.Optional[exp.Expression] = None 5086 prefix = self._match_text_seq("SYSUDTLIB", ".") 5087 5088 if not self._match_set(self.TYPE_TOKENS): 5089 identifier = allow_identifiers and self._parse_id_var( 5090 any_token=False, tokens=(TokenType.VAR,) 5091 ) 5092 if isinstance(identifier, exp.Identifier): 5093 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5094 5095 if len(tokens) != 1: 5096 self.raise_error("Unexpected identifier", self._prev) 5097 5098 if tokens[0].token_type in self.TYPE_TOKENS: 5099 self._prev = tokens[0] 5100 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5101 type_name = identifier.name 5102 5103 while self._match(TokenType.DOT): 5104 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5105 5106 this = exp.DataType.build(type_name, udt=True) 5107 else: 5108 self._retreat(self._index - 1) 5109 return None 5110 else: 5111 return None 5112 5113 type_token = self._prev.token_type 5114 5115 if type_token == TokenType.PSEUDO_TYPE: 5116 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5117 5118 if type_token == TokenType.OBJECT_IDENTIFIER: 5119 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5120 5121 # https://materialize.com/docs/sql/types/map/ 5122 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5123 key_type = self._parse_types( 5124 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5125 ) 5126 if not self._match(TokenType.FARROW): 5127 self._retreat(index) 5128 return None 5129 5130 value_type = self._parse_types( 5131 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5132 ) 5133 if not self._match(TokenType.R_BRACKET): 5134 self._retreat(index) 5135 return None 5136 5137 return exp.DataType( 5138 this=exp.DataType.Type.MAP, 5139 expressions=[key_type, value_type], 5140 nested=True, 5141 prefix=prefix, 5142 ) 5143 5144 nested = type_token in self.NESTED_TYPE_TOKENS 5145 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5146 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5147 expressions = None 5148 maybe_func = False 5149 5150 if self._match(TokenType.L_PAREN): 5151 if is_struct: 5152 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5153 elif nested: 5154 expressions = self._parse_csv( 5155 lambda: self._parse_types( 5156 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5157 ) 5158 ) 5159 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5160 this = expressions[0] 5161 this.set("nullable", True) 5162 self._match_r_paren() 5163 return this 5164 elif type_token in self.ENUM_TYPE_TOKENS: 5165 expressions = self._parse_csv(self._parse_equality) 5166 elif is_aggregate: 5167 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5168 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5169 ) 5170 if not func_or_ident: 5171 return None 5172 expressions = [func_or_ident] 5173 if self._match(TokenType.COMMA): 5174 expressions.extend( 5175 self._parse_csv( 5176 lambda: self._parse_types( 5177 check_func=check_func, 5178 schema=schema, 5179 allow_identifiers=allow_identifiers, 5180 ) 5181 ) 5182 ) 5183 else: 5184 expressions = self._parse_csv(self._parse_type_size) 5185 5186 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5187 if type_token == TokenType.VECTOR and len(expressions) == 2: 5188 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5189 5190 if not expressions or not self._match(TokenType.R_PAREN): 5191 self._retreat(index) 5192 return None 5193 5194 maybe_func = True 5195 5196 values: t.Optional[t.List[exp.Expression]] = None 5197 5198 if nested and self._match(TokenType.LT): 5199 if is_struct: 5200 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5201 else: 5202 expressions = self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5205 ) 5206 ) 5207 5208 if not self._match(TokenType.GT): 5209 self.raise_error("Expecting >") 5210 5211 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5212 values = self._parse_csv(self._parse_assignment) 5213 if not values and is_struct: 5214 values = None 5215 self._retreat(self._index - 1) 5216 else: 5217 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5218 5219 if type_token in self.TIMESTAMPS: 5220 if self._match_text_seq("WITH", "TIME", "ZONE"): 5221 maybe_func = False 5222 tz_type = ( 5223 exp.DataType.Type.TIMETZ 5224 if type_token in self.TIMES 5225 else exp.DataType.Type.TIMESTAMPTZ 5226 ) 5227 this = exp.DataType(this=tz_type, expressions=expressions) 5228 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5229 maybe_func = False 5230 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5231 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5232 maybe_func = False 5233 elif type_token == TokenType.INTERVAL: 5234 unit = self._parse_var(upper=True) 5235 if unit: 5236 if self._match_text_seq("TO"): 5237 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5238 5239 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5240 else: 5241 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5242 elif type_token == TokenType.VOID: 5243 this = exp.DataType(this=exp.DataType.Type.NULL) 5244 5245 if maybe_func and check_func: 5246 index2 = self._index 5247 peek = self._parse_string() 5248 5249 if not peek: 5250 self._retreat(index) 5251 return None 5252 5253 self._retreat(index2) 5254 5255 if not this: 5256 if self._match_text_seq("UNSIGNED"): 5257 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5258 if not unsigned_type_token: 5259 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5260 5261 type_token = unsigned_type_token or type_token 5262 5263 this = exp.DataType( 5264 this=exp.DataType.Type[type_token.value], 5265 expressions=expressions, 5266 nested=nested, 5267 prefix=prefix, 5268 ) 5269 5270 # Empty arrays/structs are allowed 5271 if values is not None: 5272 cls = exp.Struct if is_struct else exp.Array 5273 this = exp.cast(cls(expressions=values), this, copy=False) 5274 5275 elif expressions: 5276 this.set("expressions", expressions) 5277 5278 # https://materialize.com/docs/sql/types/list/#type-name 5279 while self._match(TokenType.LIST): 5280 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5281 5282 index = self._index 5283 5284 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5285 matched_array = self._match(TokenType.ARRAY) 5286 5287 while self._curr: 5288 datatype_token = self._prev.token_type 5289 matched_l_bracket = self._match(TokenType.L_BRACKET) 5290 5291 if (not matched_l_bracket and not matched_array) or ( 5292 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5293 ): 5294 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5295 # not to be confused with the fixed size array parsing 5296 break 5297 5298 matched_array = False 5299 values = self._parse_csv(self._parse_assignment) or None 5300 if ( 5301 values 5302 and not schema 5303 and ( 5304 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5305 ) 5306 ): 5307 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5308 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5309 self._retreat(index) 5310 break 5311 5312 this = exp.DataType( 5313 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5314 ) 5315 self._match(TokenType.R_BRACKET) 5316 5317 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5318 converter = self.TYPE_CONVERTERS.get(this.this) 5319 if converter: 5320 this = converter(t.cast(exp.DataType, this)) 5321 5322 return this 5323 5324 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5325 index = self._index 5326 5327 if ( 5328 self._curr 5329 and self._next 5330 and self._curr.token_type in self.TYPE_TOKENS 5331 and self._next.token_type in self.TYPE_TOKENS 5332 ): 5333 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5334 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5335 this = self._parse_id_var() 5336 else: 5337 this = ( 5338 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5339 or self._parse_id_var() 5340 ) 5341 5342 self._match(TokenType.COLON) 5343 5344 if ( 5345 type_required 5346 and not isinstance(this, exp.DataType) 5347 and not self._match_set(self.TYPE_TOKENS, advance=False) 5348 ): 5349 self._retreat(index) 5350 return self._parse_types() 5351 5352 return self._parse_column_def(this) 5353 5354 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5355 if not self._match_text_seq("AT", "TIME", "ZONE"): 5356 return this 5357 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5358 5359 def _parse_column(self) -> t.Optional[exp.Expression]: 5360 this = self._parse_column_reference() 5361 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5362 5363 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5364 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5365 5366 return column 5367 5368 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5369 this = self._parse_field() 5370 if ( 5371 not this 5372 and self._match(TokenType.VALUES, advance=False) 5373 and self.VALUES_FOLLOWED_BY_PAREN 5374 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5375 ): 5376 this = self._parse_id_var() 5377 5378 if isinstance(this, exp.Identifier): 5379 # We bubble up comments from the Identifier to the Column 5380 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5381 5382 return this 5383 5384 def _parse_colon_as_variant_extract( 5385 self, this: t.Optional[exp.Expression] 5386 ) -> t.Optional[exp.Expression]: 5387 casts = [] 5388 json_path = [] 5389 escape = None 5390 5391 while self._match(TokenType.COLON): 5392 start_index = self._index 5393 5394 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5395 path = self._parse_column_ops( 5396 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5397 ) 5398 5399 # The cast :: operator has a lower precedence than the extraction operator :, so 5400 # we rearrange the AST appropriately to avoid casting the JSON path 5401 while isinstance(path, exp.Cast): 5402 casts.append(path.to) 5403 path = path.this 5404 5405 if casts: 5406 dcolon_offset = next( 5407 i 5408 for i, t in enumerate(self._tokens[start_index:]) 5409 if t.token_type == TokenType.DCOLON 5410 ) 5411 end_token = self._tokens[start_index + dcolon_offset - 1] 5412 else: 5413 end_token = self._prev 5414 5415 if path: 5416 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5417 # it'll roundtrip to a string literal in GET_PATH 5418 if isinstance(path, exp.Identifier) and path.quoted: 5419 escape = True 5420 5421 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5422 5423 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5424 # Databricks transforms it back to the colon/dot notation 5425 if json_path: 5426 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5427 5428 if json_path_expr: 5429 json_path_expr.set("escape", escape) 5430 5431 this = self.expression( 5432 exp.JSONExtract, 5433 this=this, 5434 expression=json_path_expr, 5435 variant_extract=True, 5436 ) 5437 5438 while casts: 5439 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5440 5441 return this 5442 5443 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5444 return self._parse_types() 5445 5446 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5447 this = self._parse_bracket(this) 5448 5449 while self._match_set(self.COLUMN_OPERATORS): 5450 op_token = self._prev.token_type 5451 op = self.COLUMN_OPERATORS.get(op_token) 5452 5453 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5454 field = self._parse_dcolon() 5455 if not field: 5456 self.raise_error("Expected type") 5457 elif op and self._curr: 5458 field = self._parse_column_reference() or self._parse_bracket() 5459 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5460 field = self._parse_column_ops(field) 5461 else: 5462 field = self._parse_field(any_token=True, anonymous_func=True) 5463 5464 if isinstance(field, (exp.Func, exp.Window)) and this: 5465 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5466 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5467 this = exp.replace_tree( 5468 this, 5469 lambda n: ( 5470 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5471 if n.table 5472 else n.this 5473 ) 5474 if isinstance(n, exp.Column) 5475 else n, 5476 ) 5477 5478 if op: 5479 this = op(self, this, field) 5480 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5481 this = self.expression( 5482 exp.Column, 5483 comments=this.comments, 5484 this=field, 5485 table=this.this, 5486 db=this.args.get("table"), 5487 catalog=this.args.get("db"), 5488 ) 5489 elif isinstance(field, exp.Window): 5490 # Move the exp.Dot's to the window's function 5491 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5492 field.set("this", window_func) 5493 this = field 5494 else: 5495 this = self.expression(exp.Dot, this=this, expression=field) 5496 5497 if field and field.comments: 5498 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5499 5500 this = self._parse_bracket(this) 5501 5502 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5503 5504 def _parse_primary(self) -> t.Optional[exp.Expression]: 5505 if self._match_set(self.PRIMARY_PARSERS): 5506 token_type = self._prev.token_type 5507 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5508 5509 if token_type == TokenType.STRING: 5510 expressions = [primary] 5511 while self._match(TokenType.STRING): 5512 expressions.append(exp.Literal.string(self._prev.text)) 5513 5514 if len(expressions) > 1: 5515 return self.expression(exp.Concat, expressions=expressions) 5516 5517 return primary 5518 5519 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5520 return exp.Literal.number(f"0.{self._prev.text}") 5521 5522 if self._match(TokenType.L_PAREN): 5523 comments = self._prev_comments 5524 query = self._parse_select() 5525 5526 if query: 5527 expressions = [query] 5528 else: 5529 expressions = self._parse_expressions() 5530 5531 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5532 5533 if not this and self._match(TokenType.R_PAREN, advance=False): 5534 this = self.expression(exp.Tuple) 5535 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5536 this = self._parse_subquery(this=this, parse_alias=False) 5537 elif isinstance(this, exp.Subquery): 5538 this = self._parse_subquery( 5539 this=self._parse_set_operations(this), parse_alias=False 5540 ) 5541 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5542 this = self.expression(exp.Tuple, expressions=expressions) 5543 else: 5544 this = self.expression(exp.Paren, this=this) 5545 5546 if this: 5547 this.add_comments(comments) 5548 5549 self._match_r_paren(expression=this) 5550 return this 5551 5552 return None 5553 5554 def _parse_field( 5555 self, 5556 any_token: bool = False, 5557 tokens: t.Optional[t.Collection[TokenType]] = None, 5558 anonymous_func: bool = False, 5559 ) -> t.Optional[exp.Expression]: 5560 if anonymous_func: 5561 field = ( 5562 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5563 or self._parse_primary() 5564 ) 5565 else: 5566 field = self._parse_primary() or self._parse_function( 5567 anonymous=anonymous_func, any_token=any_token 5568 ) 5569 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5570 5571 def _parse_function( 5572 self, 5573 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5574 anonymous: bool = False, 5575 optional_parens: bool = True, 5576 any_token: bool = False, 5577 ) -> t.Optional[exp.Expression]: 5578 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5579 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5580 fn_syntax = False 5581 if ( 5582 self._match(TokenType.L_BRACE, advance=False) 5583 and self._next 5584 and self._next.text.upper() == "FN" 5585 ): 5586 self._advance(2) 5587 fn_syntax = True 5588 5589 func = self._parse_function_call( 5590 functions=functions, 5591 anonymous=anonymous, 5592 optional_parens=optional_parens, 5593 any_token=any_token, 5594 ) 5595 5596 if fn_syntax: 5597 self._match(TokenType.R_BRACE) 5598 5599 return func 5600 5601 def _parse_function_call( 5602 self, 5603 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5604 anonymous: bool = False, 5605 optional_parens: bool = True, 5606 any_token: bool = False, 5607 ) -> t.Optional[exp.Expression]: 5608 if not self._curr: 5609 return None 5610 5611 comments = self._curr.comments 5612 token_type = self._curr.token_type 5613 this = self._curr.text 5614 upper = this.upper() 5615 5616 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5617 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5618 self._advance() 5619 return self._parse_window(parser(self)) 5620 5621 if not self._next or self._next.token_type != TokenType.L_PAREN: 5622 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5623 self._advance() 5624 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5625 5626 return None 5627 5628 if any_token: 5629 if token_type in self.RESERVED_TOKENS: 5630 return None 5631 elif token_type not in self.FUNC_TOKENS: 5632 return None 5633 5634 self._advance(2) 5635 5636 parser = self.FUNCTION_PARSERS.get(upper) 5637 if parser and not anonymous: 5638 this = parser(self) 5639 else: 5640 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5641 5642 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5643 this = self.expression( 5644 subquery_predicate, comments=comments, this=self._parse_select() 5645 ) 5646 self._match_r_paren() 5647 return this 5648 5649 if functions is None: 5650 functions = self.FUNCTIONS 5651 5652 function = functions.get(upper) 5653 known_function = function and not anonymous 5654 5655 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5656 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5657 5658 post_func_comments = self._curr and self._curr.comments 5659 if known_function and post_func_comments: 5660 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5661 # call we'll construct it as exp.Anonymous, even if it's "known" 5662 if any( 5663 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5664 for comment in post_func_comments 5665 ): 5666 known_function = False 5667 5668 if alias and known_function: 5669 args = self._kv_to_prop_eq(args) 5670 5671 if known_function: 5672 func_builder = t.cast(t.Callable, function) 5673 5674 if "dialect" in func_builder.__code__.co_varnames: 5675 func = func_builder(args, dialect=self.dialect) 5676 else: 5677 func = func_builder(args) 5678 5679 func = self.validate_expression(func, args) 5680 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5681 func.meta["name"] = this 5682 5683 this = func 5684 else: 5685 if token_type == TokenType.IDENTIFIER: 5686 this = exp.Identifier(this=this, quoted=True) 5687 this = self.expression(exp.Anonymous, this=this, expressions=args) 5688 5689 if isinstance(this, exp.Expression): 5690 this.add_comments(comments) 5691 5692 self._match_r_paren(this) 5693 return self._parse_window(this) 5694 5695 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5696 return expression 5697 5698 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5699 transformed = [] 5700 5701 for index, e in enumerate(expressions): 5702 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5703 if isinstance(e, exp.Alias): 5704 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5705 5706 if not isinstance(e, exp.PropertyEQ): 5707 e = self.expression( 5708 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5709 ) 5710 5711 if isinstance(e.this, exp.Column): 5712 e.this.replace(e.this.this) 5713 else: 5714 e = self._to_prop_eq(e, index) 5715 5716 transformed.append(e) 5717 5718 return transformed 5719 5720 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5721 return self._parse_statement() 5722 5723 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5724 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5725 5726 def _parse_user_defined_function( 5727 self, kind: t.Optional[TokenType] = None 5728 ) -> t.Optional[exp.Expression]: 5729 this = self._parse_table_parts(schema=True) 5730 5731 if not self._match(TokenType.L_PAREN): 5732 return this 5733 5734 expressions = self._parse_csv(self._parse_function_parameter) 5735 self._match_r_paren() 5736 return self.expression( 5737 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5738 ) 5739 5740 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5741 literal = self._parse_primary() 5742 if literal: 5743 return self.expression(exp.Introducer, this=token.text, expression=literal) 5744 5745 return self.expression(exp.Identifier, this=token.text) 5746 5747 def _parse_session_parameter(self) -> exp.SessionParameter: 5748 kind = None 5749 this = self._parse_id_var() or self._parse_primary() 5750 5751 if this and self._match(TokenType.DOT): 5752 kind = this.name 5753 this = self._parse_var() or self._parse_primary() 5754 5755 return self.expression(exp.SessionParameter, this=this, kind=kind) 5756 5757 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5758 return self._parse_id_var() 5759 5760 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5761 index = self._index 5762 5763 if self._match(TokenType.L_PAREN): 5764 expressions = t.cast( 5765 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5766 ) 5767 5768 if not self._match(TokenType.R_PAREN): 5769 self._retreat(index) 5770 else: 5771 expressions = [self._parse_lambda_arg()] 5772 5773 if self._match_set(self.LAMBDAS): 5774 return self.LAMBDAS[self._prev.token_type](self, expressions) 5775 5776 self._retreat(index) 5777 5778 this: t.Optional[exp.Expression] 5779 5780 if self._match(TokenType.DISTINCT): 5781 this = self.expression( 5782 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5783 ) 5784 else: 5785 this = self._parse_select_or_expression(alias=alias) 5786 5787 return self._parse_limit( 5788 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5789 ) 5790 5791 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5792 index = self._index 5793 if not self._match(TokenType.L_PAREN): 5794 return this 5795 5796 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5797 # expr can be of both types 5798 if self._match_set(self.SELECT_START_TOKENS): 5799 self._retreat(index) 5800 return this 5801 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5802 self._match_r_paren() 5803 return self.expression(exp.Schema, this=this, expressions=args) 5804 5805 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5806 return self._parse_column_def(self._parse_field(any_token=True)) 5807 5808 def _parse_column_def( 5809 self, this: t.Optional[exp.Expression], computed_column: bool = True 5810 ) -> t.Optional[exp.Expression]: 5811 # column defs are not really columns, they're identifiers 5812 if isinstance(this, exp.Column): 5813 this = this.this 5814 5815 if not computed_column: 5816 self._match(TokenType.ALIAS) 5817 5818 kind = self._parse_types(schema=True) 5819 5820 if self._match_text_seq("FOR", "ORDINALITY"): 5821 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5822 5823 constraints: t.List[exp.Expression] = [] 5824 5825 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5826 ("ALIAS", "MATERIALIZED") 5827 ): 5828 persisted = self._prev.text.upper() == "MATERIALIZED" 5829 constraint_kind = exp.ComputedColumnConstraint( 5830 this=self._parse_assignment(), 5831 persisted=persisted or self._match_text_seq("PERSISTED"), 5832 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5833 ) 5834 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5835 elif ( 5836 kind 5837 and self._match(TokenType.ALIAS, advance=False) 5838 and ( 5839 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5840 or (self._next and self._next.token_type == TokenType.L_PAREN) 5841 ) 5842 ): 5843 self._advance() 5844 constraints.append( 5845 self.expression( 5846 exp.ColumnConstraint, 5847 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5848 ) 5849 ) 5850 5851 while True: 5852 constraint = self._parse_column_constraint() 5853 if not constraint: 5854 break 5855 constraints.append(constraint) 5856 5857 if not kind and not constraints: 5858 return this 5859 5860 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5861 5862 def _parse_auto_increment( 5863 self, 5864 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5865 start = None 5866 increment = None 5867 5868 if self._match(TokenType.L_PAREN, advance=False): 5869 args = self._parse_wrapped_csv(self._parse_bitwise) 5870 start = seq_get(args, 0) 5871 increment = seq_get(args, 1) 5872 elif self._match_text_seq("START"): 5873 start = self._parse_bitwise() 5874 self._match_text_seq("INCREMENT") 5875 increment = self._parse_bitwise() 5876 5877 if start and increment: 5878 return exp.GeneratedAsIdentityColumnConstraint( 5879 start=start, increment=increment, this=False 5880 ) 5881 5882 return exp.AutoIncrementColumnConstraint() 5883 5884 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5885 if not self._match_text_seq("REFRESH"): 5886 self._retreat(self._index - 1) 5887 return None 5888 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5889 5890 def _parse_compress(self) -> exp.CompressColumnConstraint: 5891 if self._match(TokenType.L_PAREN, advance=False): 5892 return self.expression( 5893 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5894 ) 5895 5896 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5897 5898 def _parse_generated_as_identity( 5899 self, 5900 ) -> ( 5901 exp.GeneratedAsIdentityColumnConstraint 5902 | exp.ComputedColumnConstraint 5903 | exp.GeneratedAsRowColumnConstraint 5904 ): 5905 if self._match_text_seq("BY", "DEFAULT"): 5906 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5907 this = self.expression( 5908 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5909 ) 5910 else: 5911 self._match_text_seq("ALWAYS") 5912 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5913 5914 self._match(TokenType.ALIAS) 5915 5916 if self._match_text_seq("ROW"): 5917 start = self._match_text_seq("START") 5918 if not start: 5919 self._match(TokenType.END) 5920 hidden = self._match_text_seq("HIDDEN") 5921 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5922 5923 identity = self._match_text_seq("IDENTITY") 5924 5925 if self._match(TokenType.L_PAREN): 5926 if self._match(TokenType.START_WITH): 5927 this.set("start", self._parse_bitwise()) 5928 if self._match_text_seq("INCREMENT", "BY"): 5929 this.set("increment", self._parse_bitwise()) 5930 if self._match_text_seq("MINVALUE"): 5931 this.set("minvalue", self._parse_bitwise()) 5932 if self._match_text_seq("MAXVALUE"): 5933 this.set("maxvalue", self._parse_bitwise()) 5934 5935 if self._match_text_seq("CYCLE"): 5936 this.set("cycle", True) 5937 elif self._match_text_seq("NO", "CYCLE"): 5938 this.set("cycle", False) 5939 5940 if not identity: 5941 this.set("expression", self._parse_range()) 5942 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5943 args = self._parse_csv(self._parse_bitwise) 5944 this.set("start", seq_get(args, 0)) 5945 this.set("increment", seq_get(args, 1)) 5946 5947 self._match_r_paren() 5948 5949 return this 5950 5951 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5952 self._match_text_seq("LENGTH") 5953 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5954 5955 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5956 if self._match_text_seq("NULL"): 5957 return self.expression(exp.NotNullColumnConstraint) 5958 if self._match_text_seq("CASESPECIFIC"): 5959 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5960 if self._match_text_seq("FOR", "REPLICATION"): 5961 return self.expression(exp.NotForReplicationColumnConstraint) 5962 5963 # Unconsume the `NOT` token 5964 self._retreat(self._index - 1) 5965 return None 5966 5967 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5968 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5969 5970 procedure_option_follows = ( 5971 self._match(TokenType.WITH, advance=False) 5972 and self._next 5973 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5974 ) 5975 5976 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5977 return self.expression( 5978 exp.ColumnConstraint, 5979 this=this, 5980 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5981 ) 5982 5983 return this 5984 5985 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5986 if not self._match(TokenType.CONSTRAINT): 5987 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5988 5989 return self.expression( 5990 exp.Constraint, 5991 this=self._parse_id_var(), 5992 expressions=self._parse_unnamed_constraints(), 5993 ) 5994 5995 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5996 constraints = [] 5997 while True: 5998 constraint = self._parse_unnamed_constraint() or self._parse_function() 5999 if not constraint: 6000 break 6001 constraints.append(constraint) 6002 6003 return constraints 6004 6005 def _parse_unnamed_constraint( 6006 self, constraints: t.Optional[t.Collection[str]] = None 6007 ) -> t.Optional[exp.Expression]: 6008 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6009 constraints or self.CONSTRAINT_PARSERS 6010 ): 6011 return None 6012 6013 constraint = self._prev.text.upper() 6014 if constraint not in self.CONSTRAINT_PARSERS: 6015 self.raise_error(f"No parser found for schema constraint {constraint}.") 6016 6017 return self.CONSTRAINT_PARSERS[constraint](self) 6018 6019 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6020 return self._parse_id_var(any_token=False) 6021 6022 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6023 self._match_text_seq("KEY") 6024 return self.expression( 6025 exp.UniqueColumnConstraint, 6026 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6027 this=self._parse_schema(self._parse_unique_key()), 6028 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6029 on_conflict=self._parse_on_conflict(), 6030 options=self._parse_key_constraint_options(), 6031 ) 6032 6033 def _parse_key_constraint_options(self) -> t.List[str]: 6034 options = [] 6035 while True: 6036 if not self._curr: 6037 break 6038 6039 if self._match(TokenType.ON): 6040 action = None 6041 on = self._advance_any() and self._prev.text 6042 6043 if self._match_text_seq("NO", "ACTION"): 6044 action = "NO ACTION" 6045 elif self._match_text_seq("CASCADE"): 6046 action = "CASCADE" 6047 elif self._match_text_seq("RESTRICT"): 6048 action = "RESTRICT" 6049 elif self._match_pair(TokenType.SET, TokenType.NULL): 6050 action = "SET NULL" 6051 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6052 action = "SET DEFAULT" 6053 else: 6054 self.raise_error("Invalid key constraint") 6055 6056 options.append(f"ON {on} {action}") 6057 else: 6058 var = self._parse_var_from_options( 6059 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6060 ) 6061 if not var: 6062 break 6063 options.append(var.name) 6064 6065 return options 6066 6067 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6068 if match and not self._match(TokenType.REFERENCES): 6069 return None 6070 6071 expressions = None 6072 this = self._parse_table(schema=True) 6073 options = self._parse_key_constraint_options() 6074 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6075 6076 def _parse_foreign_key(self) -> exp.ForeignKey: 6077 expressions = self._parse_wrapped_id_vars() 6078 reference = self._parse_references() 6079 on_options = {} 6080 6081 while self._match(TokenType.ON): 6082 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6083 self.raise_error("Expected DELETE or UPDATE") 6084 6085 kind = self._prev.text.lower() 6086 6087 if self._match_text_seq("NO", "ACTION"): 6088 action = "NO ACTION" 6089 elif self._match(TokenType.SET): 6090 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6091 action = "SET " + self._prev.text.upper() 6092 else: 6093 self._advance() 6094 action = self._prev.text.upper() 6095 6096 on_options[kind] = action 6097 6098 return self.expression( 6099 exp.ForeignKey, 6100 expressions=expressions, 6101 reference=reference, 6102 options=self._parse_key_constraint_options(), 6103 **on_options, # type: ignore 6104 ) 6105 6106 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6107 return self._parse_ordered() or self._parse_field() 6108 6109 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6110 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6111 self._retreat(self._index - 1) 6112 return None 6113 6114 id_vars = self._parse_wrapped_id_vars() 6115 return self.expression( 6116 exp.PeriodForSystemTimeConstraint, 6117 this=seq_get(id_vars, 0), 6118 expression=seq_get(id_vars, 1), 6119 ) 6120 6121 def _parse_primary_key( 6122 self, wrapped_optional: bool = False, in_props: bool = False 6123 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6124 desc = ( 6125 self._match_set((TokenType.ASC, TokenType.DESC)) 6126 and self._prev.token_type == TokenType.DESC 6127 ) 6128 6129 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6130 return self.expression( 6131 exp.PrimaryKeyColumnConstraint, 6132 desc=desc, 6133 options=self._parse_key_constraint_options(), 6134 ) 6135 6136 expressions = self._parse_wrapped_csv( 6137 self._parse_primary_key_part, optional=wrapped_optional 6138 ) 6139 options = self._parse_key_constraint_options() 6140 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6141 6142 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6143 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6144 6145 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6146 """ 6147 Parses a datetime column in ODBC format. We parse the column into the corresponding 6148 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6149 same as we did for `DATE('yyyy-mm-dd')`. 6150 6151 Reference: 6152 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6153 """ 6154 self._match(TokenType.VAR) 6155 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6156 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6157 if not self._match(TokenType.R_BRACE): 6158 self.raise_error("Expected }") 6159 return expression 6160 6161 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6162 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6163 return this 6164 6165 bracket_kind = self._prev.token_type 6166 if ( 6167 bracket_kind == TokenType.L_BRACE 6168 and self._curr 6169 and self._curr.token_type == TokenType.VAR 6170 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6171 ): 6172 return self._parse_odbc_datetime_literal() 6173 6174 expressions = self._parse_csv( 6175 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6176 ) 6177 6178 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6179 self.raise_error("Expected ]") 6180 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6181 self.raise_error("Expected }") 6182 6183 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6184 if bracket_kind == TokenType.L_BRACE: 6185 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6186 elif not this: 6187 this = build_array_constructor( 6188 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6189 ) 6190 else: 6191 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6192 if constructor_type: 6193 return build_array_constructor( 6194 constructor_type, 6195 args=expressions, 6196 bracket_kind=bracket_kind, 6197 dialect=self.dialect, 6198 ) 6199 6200 expressions = apply_index_offset( 6201 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6202 ) 6203 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6204 6205 self._add_comments(this) 6206 return self._parse_bracket(this) 6207 6208 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6209 if self._match(TokenType.COLON): 6210 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6211 return this 6212 6213 def _parse_case(self) -> t.Optional[exp.Expression]: 6214 ifs = [] 6215 default = None 6216 6217 comments = self._prev_comments 6218 expression = self._parse_assignment() 6219 6220 while self._match(TokenType.WHEN): 6221 this = self._parse_assignment() 6222 self._match(TokenType.THEN) 6223 then = self._parse_assignment() 6224 ifs.append(self.expression(exp.If, this=this, true=then)) 6225 6226 if self._match(TokenType.ELSE): 6227 default = self._parse_assignment() 6228 6229 if not self._match(TokenType.END): 6230 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6231 default = exp.column("interval") 6232 else: 6233 self.raise_error("Expected END after CASE", self._prev) 6234 6235 return self.expression( 6236 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6237 ) 6238 6239 def _parse_if(self) -> t.Optional[exp.Expression]: 6240 if self._match(TokenType.L_PAREN): 6241 args = self._parse_csv( 6242 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6243 ) 6244 this = self.validate_expression(exp.If.from_arg_list(args), args) 6245 self._match_r_paren() 6246 else: 6247 index = self._index - 1 6248 6249 if self.NO_PAREN_IF_COMMANDS and index == 0: 6250 return self._parse_as_command(self._prev) 6251 6252 condition = self._parse_assignment() 6253 6254 if not condition: 6255 self._retreat(index) 6256 return None 6257 6258 self._match(TokenType.THEN) 6259 true = self._parse_assignment() 6260 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6261 self._match(TokenType.END) 6262 this = self.expression(exp.If, this=condition, true=true, false=false) 6263 6264 return this 6265 6266 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6267 if not self._match_text_seq("VALUE", "FOR"): 6268 self._retreat(self._index - 1) 6269 return None 6270 6271 return self.expression( 6272 exp.NextValueFor, 6273 this=self._parse_column(), 6274 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6275 ) 6276 6277 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6278 this = self._parse_function() or self._parse_var_or_string(upper=True) 6279 6280 if self._match(TokenType.FROM): 6281 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6282 6283 if not self._match(TokenType.COMMA): 6284 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6285 6286 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6287 6288 def _parse_gap_fill(self) -> exp.GapFill: 6289 self._match(TokenType.TABLE) 6290 this = self._parse_table() 6291 6292 self._match(TokenType.COMMA) 6293 args = [this, *self._parse_csv(self._parse_lambda)] 6294 6295 gap_fill = exp.GapFill.from_arg_list(args) 6296 return self.validate_expression(gap_fill, args) 6297 6298 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6299 this = self._parse_assignment() 6300 6301 if not self._match(TokenType.ALIAS): 6302 if self._match(TokenType.COMMA): 6303 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6304 6305 self.raise_error("Expected AS after CAST") 6306 6307 fmt = None 6308 to = self._parse_types() 6309 6310 default = self._match(TokenType.DEFAULT) 6311 if default: 6312 default = self._parse_bitwise() 6313 self._match_text_seq("ON", "CONVERSION", "ERROR") 6314 6315 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6316 fmt_string = self._parse_string() 6317 fmt = self._parse_at_time_zone(fmt_string) 6318 6319 if not to: 6320 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6321 if to.this in exp.DataType.TEMPORAL_TYPES: 6322 this = self.expression( 6323 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6324 this=this, 6325 format=exp.Literal.string( 6326 format_time( 6327 fmt_string.this if fmt_string else "", 6328 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6329 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6330 ) 6331 ), 6332 safe=safe, 6333 ) 6334 6335 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6336 this.set("zone", fmt.args["zone"]) 6337 return this 6338 elif not to: 6339 self.raise_error("Expected TYPE after CAST") 6340 elif isinstance(to, exp.Identifier): 6341 to = exp.DataType.build(to.name, udt=True) 6342 elif to.this == exp.DataType.Type.CHAR: 6343 if self._match(TokenType.CHARACTER_SET): 6344 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6345 6346 return self.expression( 6347 exp.Cast if strict else exp.TryCast, 6348 this=this, 6349 to=to, 6350 format=fmt, 6351 safe=safe, 6352 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6353 default=default, 6354 ) 6355 6356 def _parse_string_agg(self) -> exp.GroupConcat: 6357 if self._match(TokenType.DISTINCT): 6358 args: t.List[t.Optional[exp.Expression]] = [ 6359 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6360 ] 6361 if self._match(TokenType.COMMA): 6362 args.extend(self._parse_csv(self._parse_assignment)) 6363 else: 6364 args = self._parse_csv(self._parse_assignment) # type: ignore 6365 6366 if self._match_text_seq("ON", "OVERFLOW"): 6367 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6368 if self._match_text_seq("ERROR"): 6369 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6370 else: 6371 self._match_text_seq("TRUNCATE") 6372 on_overflow = self.expression( 6373 exp.OverflowTruncateBehavior, 6374 this=self._parse_string(), 6375 with_count=( 6376 self._match_text_seq("WITH", "COUNT") 6377 or not self._match_text_seq("WITHOUT", "COUNT") 6378 ), 6379 ) 6380 else: 6381 on_overflow = None 6382 6383 index = self._index 6384 if not self._match(TokenType.R_PAREN) and args: 6385 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6386 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6387 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6388 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6389 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6390 6391 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6392 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6393 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6394 if not self._match_text_seq("WITHIN", "GROUP"): 6395 self._retreat(index) 6396 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6397 6398 # The corresponding match_r_paren will be called in parse_function (caller) 6399 self._match_l_paren() 6400 6401 return self.expression( 6402 exp.GroupConcat, 6403 this=self._parse_order(this=seq_get(args, 0)), 6404 separator=seq_get(args, 1), 6405 on_overflow=on_overflow, 6406 ) 6407 6408 def _parse_convert( 6409 self, strict: bool, safe: t.Optional[bool] = None 6410 ) -> t.Optional[exp.Expression]: 6411 this = self._parse_bitwise() 6412 6413 if self._match(TokenType.USING): 6414 to: t.Optional[exp.Expression] = self.expression( 6415 exp.CharacterSet, this=self._parse_var() 6416 ) 6417 elif self._match(TokenType.COMMA): 6418 to = self._parse_types() 6419 else: 6420 to = None 6421 6422 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6423 6424 def _parse_xml_table(self) -> exp.XMLTable: 6425 namespaces = None 6426 passing = None 6427 columns = None 6428 6429 if self._match_text_seq("XMLNAMESPACES", "("): 6430 namespaces = self._parse_xml_namespace() 6431 self._match_text_seq(")", ",") 6432 6433 this = self._parse_string() 6434 6435 if self._match_text_seq("PASSING"): 6436 # The BY VALUE keywords are optional and are provided for semantic clarity 6437 self._match_text_seq("BY", "VALUE") 6438 passing = self._parse_csv(self._parse_column) 6439 6440 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6441 6442 if self._match_text_seq("COLUMNS"): 6443 columns = self._parse_csv(self._parse_field_def) 6444 6445 return self.expression( 6446 exp.XMLTable, 6447 this=this, 6448 namespaces=namespaces, 6449 passing=passing, 6450 columns=columns, 6451 by_ref=by_ref, 6452 ) 6453 6454 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6455 namespaces = [] 6456 6457 while True: 6458 if self._match(TokenType.DEFAULT): 6459 uri = self._parse_string() 6460 else: 6461 uri = self._parse_alias(self._parse_string()) 6462 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6463 if not self._match(TokenType.COMMA): 6464 break 6465 6466 return namespaces 6467 6468 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6469 """ 6470 There are generally two variants of the DECODE function: 6471 6472 - DECODE(bin, charset) 6473 - DECODE(expression, search, result [, search, result] ... [, default]) 6474 6475 The second variant will always be parsed into a CASE expression. Note that NULL 6476 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6477 instead of relying on pattern matching. 6478 """ 6479 args = self._parse_csv(self._parse_assignment) 6480 6481 if len(args) < 3: 6482 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6483 6484 expression, *expressions = args 6485 if not expression: 6486 return None 6487 6488 ifs = [] 6489 for search, result in zip(expressions[::2], expressions[1::2]): 6490 if not search or not result: 6491 return None 6492 6493 if isinstance(search, exp.Literal): 6494 ifs.append( 6495 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6496 ) 6497 elif isinstance(search, exp.Null): 6498 ifs.append( 6499 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6500 ) 6501 else: 6502 cond = exp.or_( 6503 exp.EQ(this=expression.copy(), expression=search), 6504 exp.and_( 6505 exp.Is(this=expression.copy(), expression=exp.Null()), 6506 exp.Is(this=search.copy(), expression=exp.Null()), 6507 copy=False, 6508 ), 6509 copy=False, 6510 ) 6511 ifs.append(exp.If(this=cond, true=result)) 6512 6513 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6514 6515 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6516 self._match_text_seq("KEY") 6517 key = self._parse_column() 6518 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6519 self._match_text_seq("VALUE") 6520 value = self._parse_bitwise() 6521 6522 if not key and not value: 6523 return None 6524 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6525 6526 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6527 if not this or not self._match_text_seq("FORMAT", "JSON"): 6528 return this 6529 6530 return self.expression(exp.FormatJson, this=this) 6531 6532 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6533 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6534 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6535 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6536 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6537 else: 6538 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6539 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6540 6541 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6542 6543 if not empty and not error and not null: 6544 return None 6545 6546 return self.expression( 6547 exp.OnCondition, 6548 empty=empty, 6549 error=error, 6550 null=null, 6551 ) 6552 6553 def _parse_on_handling( 6554 self, on: str, *values: str 6555 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6556 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6557 for value in values: 6558 if self._match_text_seq(value, "ON", on): 6559 return f"{value} ON {on}" 6560 6561 index = self._index 6562 if self._match(TokenType.DEFAULT): 6563 default_value = self._parse_bitwise() 6564 if self._match_text_seq("ON", on): 6565 return default_value 6566 6567 self._retreat(index) 6568 6569 return None 6570 6571 @t.overload 6572 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6573 6574 @t.overload 6575 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6576 6577 def _parse_json_object(self, agg=False): 6578 star = self._parse_star() 6579 expressions = ( 6580 [star] 6581 if star 6582 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6583 ) 6584 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6585 6586 unique_keys = None 6587 if self._match_text_seq("WITH", "UNIQUE"): 6588 unique_keys = True 6589 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6590 unique_keys = False 6591 6592 self._match_text_seq("KEYS") 6593 6594 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6595 self._parse_type() 6596 ) 6597 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6598 6599 return self.expression( 6600 exp.JSONObjectAgg if agg else exp.JSONObject, 6601 expressions=expressions, 6602 null_handling=null_handling, 6603 unique_keys=unique_keys, 6604 return_type=return_type, 6605 encoding=encoding, 6606 ) 6607 6608 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6609 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6610 if not self._match_text_seq("NESTED"): 6611 this = self._parse_id_var() 6612 kind = self._parse_types(allow_identifiers=False) 6613 nested = None 6614 else: 6615 this = None 6616 kind = None 6617 nested = True 6618 6619 path = self._match_text_seq("PATH") and self._parse_string() 6620 nested_schema = nested and self._parse_json_schema() 6621 6622 return self.expression( 6623 exp.JSONColumnDef, 6624 this=this, 6625 kind=kind, 6626 path=path, 6627 nested_schema=nested_schema, 6628 ) 6629 6630 def _parse_json_schema(self) -> exp.JSONSchema: 6631 self._match_text_seq("COLUMNS") 6632 return self.expression( 6633 exp.JSONSchema, 6634 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6635 ) 6636 6637 def _parse_json_table(self) -> exp.JSONTable: 6638 this = self._parse_format_json(self._parse_bitwise()) 6639 path = self._match(TokenType.COMMA) and self._parse_string() 6640 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6641 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6642 schema = self._parse_json_schema() 6643 6644 return exp.JSONTable( 6645 this=this, 6646 schema=schema, 6647 path=path, 6648 error_handling=error_handling, 6649 empty_handling=empty_handling, 6650 ) 6651 6652 def _parse_match_against(self) -> exp.MatchAgainst: 6653 expressions = self._parse_csv(self._parse_column) 6654 6655 self._match_text_seq(")", "AGAINST", "(") 6656 6657 this = self._parse_string() 6658 6659 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6660 modifier = "IN NATURAL LANGUAGE MODE" 6661 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6662 modifier = f"{modifier} WITH QUERY EXPANSION" 6663 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6664 modifier = "IN BOOLEAN MODE" 6665 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6666 modifier = "WITH QUERY EXPANSION" 6667 else: 6668 modifier = None 6669 6670 return self.expression( 6671 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6672 ) 6673 6674 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6675 def _parse_open_json(self) -> exp.OpenJSON: 6676 this = self._parse_bitwise() 6677 path = self._match(TokenType.COMMA) and self._parse_string() 6678 6679 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6680 this = self._parse_field(any_token=True) 6681 kind = self._parse_types() 6682 path = self._parse_string() 6683 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6684 6685 return self.expression( 6686 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6687 ) 6688 6689 expressions = None 6690 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6691 self._match_l_paren() 6692 expressions = self._parse_csv(_parse_open_json_column_def) 6693 6694 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6695 6696 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6697 args = self._parse_csv(self._parse_bitwise) 6698 6699 if self._match(TokenType.IN): 6700 return self.expression( 6701 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6702 ) 6703 6704 if haystack_first: 6705 haystack = seq_get(args, 0) 6706 needle = seq_get(args, 1) 6707 else: 6708 haystack = seq_get(args, 1) 6709 needle = seq_get(args, 0) 6710 6711 return self.expression( 6712 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6713 ) 6714 6715 def _parse_predict(self) -> exp.Predict: 6716 self._match_text_seq("MODEL") 6717 this = self._parse_table() 6718 6719 self._match(TokenType.COMMA) 6720 self._match_text_seq("TABLE") 6721 6722 return self.expression( 6723 exp.Predict, 6724 this=this, 6725 expression=self._parse_table(), 6726 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6727 ) 6728 6729 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6730 args = self._parse_csv(self._parse_table) 6731 return exp.JoinHint(this=func_name.upper(), expressions=args) 6732 6733 def _parse_substring(self) -> exp.Substring: 6734 # Postgres supports the form: substring(string [from int] [for int]) 6735 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6736 6737 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6738 6739 if self._match(TokenType.FROM): 6740 args.append(self._parse_bitwise()) 6741 if self._match(TokenType.FOR): 6742 if len(args) == 1: 6743 args.append(exp.Literal.number(1)) 6744 args.append(self._parse_bitwise()) 6745 6746 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6747 6748 def _parse_trim(self) -> exp.Trim: 6749 # https://www.w3resource.com/sql/character-functions/trim.php 6750 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6751 6752 position = None 6753 collation = None 6754 expression = None 6755 6756 if self._match_texts(self.TRIM_TYPES): 6757 position = self._prev.text.upper() 6758 6759 this = self._parse_bitwise() 6760 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6761 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6762 expression = self._parse_bitwise() 6763 6764 if invert_order: 6765 this, expression = expression, this 6766 6767 if self._match(TokenType.COLLATE): 6768 collation = self._parse_bitwise() 6769 6770 return self.expression( 6771 exp.Trim, this=this, position=position, expression=expression, collation=collation 6772 ) 6773 6774 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6775 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6776 6777 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6778 return self._parse_window(self._parse_id_var(), alias=True) 6779 6780 def _parse_respect_or_ignore_nulls( 6781 self, this: t.Optional[exp.Expression] 6782 ) -> t.Optional[exp.Expression]: 6783 if self._match_text_seq("IGNORE", "NULLS"): 6784 return self.expression(exp.IgnoreNulls, this=this) 6785 if self._match_text_seq("RESPECT", "NULLS"): 6786 return self.expression(exp.RespectNulls, this=this) 6787 return this 6788 6789 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6790 if self._match(TokenType.HAVING): 6791 self._match_texts(("MAX", "MIN")) 6792 max = self._prev.text.upper() != "MIN" 6793 return self.expression( 6794 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6795 ) 6796 6797 return this 6798 6799 def _parse_window( 6800 self, this: t.Optional[exp.Expression], alias: bool = False 6801 ) -> t.Optional[exp.Expression]: 6802 func = this 6803 comments = func.comments if isinstance(func, exp.Expression) else None 6804 6805 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6806 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6807 if self._match_text_seq("WITHIN", "GROUP"): 6808 order = self._parse_wrapped(self._parse_order) 6809 this = self.expression(exp.WithinGroup, this=this, expression=order) 6810 6811 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6812 self._match(TokenType.WHERE) 6813 this = self.expression( 6814 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6815 ) 6816 self._match_r_paren() 6817 6818 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6819 # Some dialects choose to implement and some do not. 6820 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6821 6822 # There is some code above in _parse_lambda that handles 6823 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6824 6825 # The below changes handle 6826 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6827 6828 # Oracle allows both formats 6829 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6830 # and Snowflake chose to do the same for familiarity 6831 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6832 if isinstance(this, exp.AggFunc): 6833 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6834 6835 if ignore_respect and ignore_respect is not this: 6836 ignore_respect.replace(ignore_respect.this) 6837 this = self.expression(ignore_respect.__class__, this=this) 6838 6839 this = self._parse_respect_or_ignore_nulls(this) 6840 6841 # bigquery select from window x AS (partition by ...) 6842 if alias: 6843 over = None 6844 self._match(TokenType.ALIAS) 6845 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6846 return this 6847 else: 6848 over = self._prev.text.upper() 6849 6850 if comments and isinstance(func, exp.Expression): 6851 func.pop_comments() 6852 6853 if not self._match(TokenType.L_PAREN): 6854 return self.expression( 6855 exp.Window, 6856 comments=comments, 6857 this=this, 6858 alias=self._parse_id_var(False), 6859 over=over, 6860 ) 6861 6862 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6863 6864 first = self._match(TokenType.FIRST) 6865 if self._match_text_seq("LAST"): 6866 first = False 6867 6868 partition, order = self._parse_partition_and_order() 6869 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6870 6871 if kind: 6872 self._match(TokenType.BETWEEN) 6873 start = self._parse_window_spec() 6874 self._match(TokenType.AND) 6875 end = self._parse_window_spec() 6876 6877 spec = self.expression( 6878 exp.WindowSpec, 6879 kind=kind, 6880 start=start["value"], 6881 start_side=start["side"], 6882 end=end["value"], 6883 end_side=end["side"], 6884 ) 6885 else: 6886 spec = None 6887 6888 self._match_r_paren() 6889 6890 window = self.expression( 6891 exp.Window, 6892 comments=comments, 6893 this=this, 6894 partition_by=partition, 6895 order=order, 6896 spec=spec, 6897 alias=window_alias, 6898 over=over, 6899 first=first, 6900 ) 6901 6902 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6903 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6904 return self._parse_window(window, alias=alias) 6905 6906 return window 6907 6908 def _parse_partition_and_order( 6909 self, 6910 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6911 return self._parse_partition_by(), self._parse_order() 6912 6913 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6914 self._match(TokenType.BETWEEN) 6915 6916 return { 6917 "value": ( 6918 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6919 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6920 or self._parse_bitwise() 6921 ), 6922 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6923 } 6924 6925 def _parse_alias( 6926 self, this: t.Optional[exp.Expression], explicit: bool = False 6927 ) -> t.Optional[exp.Expression]: 6928 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6929 # so this section tries to parse the clause version and if it fails, it treats the token 6930 # as an identifier (alias) 6931 if self._can_parse_limit_or_offset(): 6932 return this 6933 6934 any_token = self._match(TokenType.ALIAS) 6935 comments = self._prev_comments or [] 6936 6937 if explicit and not any_token: 6938 return this 6939 6940 if self._match(TokenType.L_PAREN): 6941 aliases = self.expression( 6942 exp.Aliases, 6943 comments=comments, 6944 this=this, 6945 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6946 ) 6947 self._match_r_paren(aliases) 6948 return aliases 6949 6950 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6951 self.STRING_ALIASES and self._parse_string_as_identifier() 6952 ) 6953 6954 if alias: 6955 comments.extend(alias.pop_comments()) 6956 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6957 column = this.this 6958 6959 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6960 if not this.comments and column and column.comments: 6961 this.comments = column.pop_comments() 6962 6963 return this 6964 6965 def _parse_id_var( 6966 self, 6967 any_token: bool = True, 6968 tokens: t.Optional[t.Collection[TokenType]] = None, 6969 ) -> t.Optional[exp.Expression]: 6970 expression = self._parse_identifier() 6971 if not expression and ( 6972 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6973 ): 6974 quoted = self._prev.token_type == TokenType.STRING 6975 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6976 6977 return expression 6978 6979 def _parse_string(self) -> t.Optional[exp.Expression]: 6980 if self._match_set(self.STRING_PARSERS): 6981 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6982 return self._parse_placeholder() 6983 6984 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6985 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6986 6987 def _parse_number(self) -> t.Optional[exp.Expression]: 6988 if self._match_set(self.NUMERIC_PARSERS): 6989 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6990 return self._parse_placeholder() 6991 6992 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6993 if self._match(TokenType.IDENTIFIER): 6994 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6995 return self._parse_placeholder() 6996 6997 def _parse_var( 6998 self, 6999 any_token: bool = False, 7000 tokens: t.Optional[t.Collection[TokenType]] = None, 7001 upper: bool = False, 7002 ) -> t.Optional[exp.Expression]: 7003 if ( 7004 (any_token and self._advance_any()) 7005 or self._match(TokenType.VAR) 7006 or (self._match_set(tokens) if tokens else False) 7007 ): 7008 return self.expression( 7009 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7010 ) 7011 return self._parse_placeholder() 7012 7013 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7014 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7015 self._advance() 7016 return self._prev 7017 return None 7018 7019 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7020 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7021 7022 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7023 return self._parse_primary() or self._parse_var(any_token=True) 7024 7025 def _parse_null(self) -> t.Optional[exp.Expression]: 7026 if self._match_set(self.NULL_TOKENS): 7027 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7028 return self._parse_placeholder() 7029 7030 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7031 if self._match(TokenType.TRUE): 7032 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7033 if self._match(TokenType.FALSE): 7034 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7035 return self._parse_placeholder() 7036 7037 def _parse_star(self) -> t.Optional[exp.Expression]: 7038 if self._match(TokenType.STAR): 7039 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7040 return self._parse_placeholder() 7041 7042 def _parse_parameter(self) -> exp.Parameter: 7043 this = self._parse_identifier() or self._parse_primary_or_var() 7044 return self.expression(exp.Parameter, this=this) 7045 7046 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7047 if self._match_set(self.PLACEHOLDER_PARSERS): 7048 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7049 if placeholder: 7050 return placeholder 7051 self._advance(-1) 7052 return None 7053 7054 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7055 if not self._match_texts(keywords): 7056 return None 7057 if self._match(TokenType.L_PAREN, advance=False): 7058 return self._parse_wrapped_csv(self._parse_expression) 7059 7060 expression = self._parse_expression() 7061 return [expression] if expression else None 7062 7063 def _parse_csv( 7064 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7065 ) -> t.List[exp.Expression]: 7066 parse_result = parse_method() 7067 items = [parse_result] if parse_result is not None else [] 7068 7069 while self._match(sep): 7070 self._add_comments(parse_result) 7071 parse_result = parse_method() 7072 if parse_result is not None: 7073 items.append(parse_result) 7074 7075 return items 7076 7077 def _parse_tokens( 7078 self, parse_method: t.Callable, expressions: t.Dict 7079 ) -> t.Optional[exp.Expression]: 7080 this = parse_method() 7081 7082 while self._match_set(expressions): 7083 this = self.expression( 7084 expressions[self._prev.token_type], 7085 this=this, 7086 comments=self._prev_comments, 7087 expression=parse_method(), 7088 ) 7089 7090 return this 7091 7092 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7093 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7094 7095 def _parse_wrapped_csv( 7096 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7097 ) -> t.List[exp.Expression]: 7098 return self._parse_wrapped( 7099 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7100 ) 7101 7102 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7103 wrapped = self._match(TokenType.L_PAREN) 7104 if not wrapped and not optional: 7105 self.raise_error("Expecting (") 7106 parse_result = parse_method() 7107 if wrapped: 7108 self._match_r_paren() 7109 return parse_result 7110 7111 def _parse_expressions(self) -> t.List[exp.Expression]: 7112 return self._parse_csv(self._parse_expression) 7113 7114 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7115 return self._parse_select() or self._parse_set_operations( 7116 self._parse_alias(self._parse_assignment(), explicit=True) 7117 if alias 7118 else self._parse_assignment() 7119 ) 7120 7121 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7122 return self._parse_query_modifiers( 7123 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7124 ) 7125 7126 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7127 this = None 7128 if self._match_texts(self.TRANSACTION_KIND): 7129 this = self._prev.text 7130 7131 self._match_texts(("TRANSACTION", "WORK")) 7132 7133 modes = [] 7134 while True: 7135 mode = [] 7136 while self._match(TokenType.VAR): 7137 mode.append(self._prev.text) 7138 7139 if mode: 7140 modes.append(" ".join(mode)) 7141 if not self._match(TokenType.COMMA): 7142 break 7143 7144 return self.expression(exp.Transaction, this=this, modes=modes) 7145 7146 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7147 chain = None 7148 savepoint = None 7149 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7150 7151 self._match_texts(("TRANSACTION", "WORK")) 7152 7153 if self._match_text_seq("TO"): 7154 self._match_text_seq("SAVEPOINT") 7155 savepoint = self._parse_id_var() 7156 7157 if self._match(TokenType.AND): 7158 chain = not self._match_text_seq("NO") 7159 self._match_text_seq("CHAIN") 7160 7161 if is_rollback: 7162 return self.expression(exp.Rollback, savepoint=savepoint) 7163 7164 return self.expression(exp.Commit, chain=chain) 7165 7166 def _parse_refresh(self) -> exp.Refresh: 7167 self._match(TokenType.TABLE) 7168 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7169 7170 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7171 if not self._match_text_seq("ADD"): 7172 return None 7173 7174 self._match(TokenType.COLUMN) 7175 exists_column = self._parse_exists(not_=True) 7176 expression = self._parse_field_def() 7177 7178 if expression: 7179 expression.set("exists", exists_column) 7180 7181 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7182 if self._match_texts(("FIRST", "AFTER")): 7183 position = self._prev.text 7184 column_position = self.expression( 7185 exp.ColumnPosition, this=self._parse_column(), position=position 7186 ) 7187 expression.set("position", column_position) 7188 7189 return expression 7190 7191 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7192 drop = self._match(TokenType.DROP) and self._parse_drop() 7193 if drop and not isinstance(drop, exp.Command): 7194 drop.set("kind", drop.args.get("kind", "COLUMN")) 7195 return drop 7196 7197 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7198 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7199 return self.expression( 7200 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7201 ) 7202 7203 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7204 index = self._index - 1 7205 7206 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7207 return self._parse_csv( 7208 lambda: self.expression( 7209 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7210 ) 7211 ) 7212 7213 self._retreat(index) 7214 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7215 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7216 7217 if self._match_text_seq("ADD", "COLUMNS"): 7218 schema = self._parse_schema() 7219 if schema: 7220 return [schema] 7221 return [] 7222 7223 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7224 7225 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7226 if self._match_texts(self.ALTER_ALTER_PARSERS): 7227 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7228 7229 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7230 # keyword after ALTER we default to parsing this statement 7231 self._match(TokenType.COLUMN) 7232 column = self._parse_field(any_token=True) 7233 7234 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7235 return self.expression(exp.AlterColumn, this=column, drop=True) 7236 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7237 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7238 if self._match(TokenType.COMMENT): 7239 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7240 if self._match_text_seq("DROP", "NOT", "NULL"): 7241 return self.expression( 7242 exp.AlterColumn, 7243 this=column, 7244 drop=True, 7245 allow_null=True, 7246 ) 7247 if self._match_text_seq("SET", "NOT", "NULL"): 7248 return self.expression( 7249 exp.AlterColumn, 7250 this=column, 7251 allow_null=False, 7252 ) 7253 7254 if self._match_text_seq("SET", "VISIBLE"): 7255 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7256 if self._match_text_seq("SET", "INVISIBLE"): 7257 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7258 7259 self._match_text_seq("SET", "DATA") 7260 self._match_text_seq("TYPE") 7261 return self.expression( 7262 exp.AlterColumn, 7263 this=column, 7264 dtype=self._parse_types(), 7265 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7266 using=self._match(TokenType.USING) and self._parse_assignment(), 7267 ) 7268 7269 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7270 if self._match_texts(("ALL", "EVEN", "AUTO")): 7271 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7272 7273 self._match_text_seq("KEY", "DISTKEY") 7274 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7275 7276 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7277 if compound: 7278 self._match_text_seq("SORTKEY") 7279 7280 if self._match(TokenType.L_PAREN, advance=False): 7281 return self.expression( 7282 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7283 ) 7284 7285 self._match_texts(("AUTO", "NONE")) 7286 return self.expression( 7287 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7288 ) 7289 7290 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7291 index = self._index - 1 7292 7293 partition_exists = self._parse_exists() 7294 if self._match(TokenType.PARTITION, advance=False): 7295 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7296 7297 self._retreat(index) 7298 return self._parse_csv(self._parse_drop_column) 7299 7300 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7301 if self._match(TokenType.COLUMN): 7302 exists = self._parse_exists() 7303 old_column = self._parse_column() 7304 to = self._match_text_seq("TO") 7305 new_column = self._parse_column() 7306 7307 if old_column is None or to is None or new_column is None: 7308 return None 7309 7310 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7311 7312 self._match_text_seq("TO") 7313 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7314 7315 def _parse_alter_table_set(self) -> exp.AlterSet: 7316 alter_set = self.expression(exp.AlterSet) 7317 7318 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7319 "TABLE", "PROPERTIES" 7320 ): 7321 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7322 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7323 alter_set.set("expressions", [self._parse_assignment()]) 7324 elif self._match_texts(("LOGGED", "UNLOGGED")): 7325 alter_set.set("option", exp.var(self._prev.text.upper())) 7326 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7327 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7328 elif self._match_text_seq("LOCATION"): 7329 alter_set.set("location", self._parse_field()) 7330 elif self._match_text_seq("ACCESS", "METHOD"): 7331 alter_set.set("access_method", self._parse_field()) 7332 elif self._match_text_seq("TABLESPACE"): 7333 alter_set.set("tablespace", self._parse_field()) 7334 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7335 alter_set.set("file_format", [self._parse_field()]) 7336 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7337 alter_set.set("file_format", self._parse_wrapped_options()) 7338 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7339 alter_set.set("copy_options", self._parse_wrapped_options()) 7340 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7341 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7342 else: 7343 if self._match_text_seq("SERDE"): 7344 alter_set.set("serde", self._parse_field()) 7345 7346 alter_set.set("expressions", [self._parse_properties()]) 7347 7348 return alter_set 7349 7350 def _parse_alter(self) -> exp.Alter | exp.Command: 7351 start = self._prev 7352 7353 alter_token = self._match_set(self.ALTERABLES) and self._prev 7354 if not alter_token: 7355 return self._parse_as_command(start) 7356 7357 exists = self._parse_exists() 7358 only = self._match_text_seq("ONLY") 7359 this = self._parse_table(schema=True) 7360 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7361 7362 if self._next: 7363 self._advance() 7364 7365 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7366 if parser: 7367 actions = ensure_list(parser(self)) 7368 not_valid = self._match_text_seq("NOT", "VALID") 7369 options = self._parse_csv(self._parse_property) 7370 7371 if not self._curr and actions: 7372 return self.expression( 7373 exp.Alter, 7374 this=this, 7375 kind=alter_token.text.upper(), 7376 exists=exists, 7377 actions=actions, 7378 only=only, 7379 options=options, 7380 cluster=cluster, 7381 not_valid=not_valid, 7382 ) 7383 7384 return self._parse_as_command(start) 7385 7386 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7387 start = self._prev 7388 # https://duckdb.org/docs/sql/statements/analyze 7389 if not self._curr: 7390 return self.expression(exp.Analyze) 7391 7392 options = [] 7393 while self._match_texts(self.ANALYZE_STYLES): 7394 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7395 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7396 else: 7397 options.append(self._prev.text.upper()) 7398 7399 this: t.Optional[exp.Expression] = None 7400 inner_expression: t.Optional[exp.Expression] = None 7401 7402 kind = self._curr and self._curr.text.upper() 7403 7404 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7405 this = self._parse_table_parts() 7406 elif self._match_text_seq("TABLES"): 7407 if self._match_set((TokenType.FROM, TokenType.IN)): 7408 kind = f"{kind} {self._prev.text.upper()}" 7409 this = self._parse_table(schema=True, is_db_reference=True) 7410 elif self._match_text_seq("DATABASE"): 7411 this = self._parse_table(schema=True, is_db_reference=True) 7412 elif self._match_text_seq("CLUSTER"): 7413 this = self._parse_table() 7414 # Try matching inner expr keywords before fallback to parse table. 7415 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7416 kind = None 7417 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7418 else: 7419 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7420 kind = None 7421 this = self._parse_table_parts() 7422 7423 partition = self._try_parse(self._parse_partition) 7424 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7425 return self._parse_as_command(start) 7426 7427 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7428 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7429 "WITH", "ASYNC", "MODE" 7430 ): 7431 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7432 else: 7433 mode = None 7434 7435 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7436 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7437 7438 properties = self._parse_properties() 7439 return self.expression( 7440 exp.Analyze, 7441 kind=kind, 7442 this=this, 7443 mode=mode, 7444 partition=partition, 7445 properties=properties, 7446 expression=inner_expression, 7447 options=options, 7448 ) 7449 7450 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7451 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7452 this = None 7453 kind = self._prev.text.upper() 7454 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7455 expressions = [] 7456 7457 if not self._match_text_seq("STATISTICS"): 7458 self.raise_error("Expecting token STATISTICS") 7459 7460 if self._match_text_seq("NOSCAN"): 7461 this = "NOSCAN" 7462 elif self._match(TokenType.FOR): 7463 if self._match_text_seq("ALL", "COLUMNS"): 7464 this = "FOR ALL COLUMNS" 7465 if self._match_texts("COLUMNS"): 7466 this = "FOR COLUMNS" 7467 expressions = self._parse_csv(self._parse_column_reference) 7468 elif self._match_text_seq("SAMPLE"): 7469 sample = self._parse_number() 7470 expressions = [ 7471 self.expression( 7472 exp.AnalyzeSample, 7473 sample=sample, 7474 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7475 ) 7476 ] 7477 7478 return self.expression( 7479 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7480 ) 7481 7482 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7483 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7484 kind = None 7485 this = None 7486 expression: t.Optional[exp.Expression] = None 7487 if self._match_text_seq("REF", "UPDATE"): 7488 kind = "REF" 7489 this = "UPDATE" 7490 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7491 this = "UPDATE SET DANGLING TO NULL" 7492 elif self._match_text_seq("STRUCTURE"): 7493 kind = "STRUCTURE" 7494 if self._match_text_seq("CASCADE", "FAST"): 7495 this = "CASCADE FAST" 7496 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7497 ("ONLINE", "OFFLINE") 7498 ): 7499 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7500 expression = self._parse_into() 7501 7502 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7503 7504 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7505 this = self._prev.text.upper() 7506 if self._match_text_seq("COLUMNS"): 7507 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7508 return None 7509 7510 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7511 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7512 if self._match_text_seq("STATISTICS"): 7513 return self.expression(exp.AnalyzeDelete, kind=kind) 7514 return None 7515 7516 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7517 if self._match_text_seq("CHAINED", "ROWS"): 7518 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7519 return None 7520 7521 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7522 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7523 this = self._prev.text.upper() 7524 expression: t.Optional[exp.Expression] = None 7525 expressions = [] 7526 update_options = None 7527 7528 if self._match_text_seq("HISTOGRAM", "ON"): 7529 expressions = self._parse_csv(self._parse_column_reference) 7530 with_expressions = [] 7531 while self._match(TokenType.WITH): 7532 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7533 if self._match_texts(("SYNC", "ASYNC")): 7534 if self._match_text_seq("MODE", advance=False): 7535 with_expressions.append(f"{self._prev.text.upper()} MODE") 7536 self._advance() 7537 else: 7538 buckets = self._parse_number() 7539 if self._match_text_seq("BUCKETS"): 7540 with_expressions.append(f"{buckets} BUCKETS") 7541 if with_expressions: 7542 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7543 7544 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7545 TokenType.UPDATE, advance=False 7546 ): 7547 update_options = self._prev.text.upper() 7548 self._advance() 7549 elif self._match_text_seq("USING", "DATA"): 7550 expression = self.expression(exp.UsingData, this=self._parse_string()) 7551 7552 return self.expression( 7553 exp.AnalyzeHistogram, 7554 this=this, 7555 expressions=expressions, 7556 expression=expression, 7557 update_options=update_options, 7558 ) 7559 7560 def _parse_merge(self) -> exp.Merge: 7561 self._match(TokenType.INTO) 7562 target = self._parse_table() 7563 7564 if target and self._match(TokenType.ALIAS, advance=False): 7565 target.set("alias", self._parse_table_alias()) 7566 7567 self._match(TokenType.USING) 7568 using = self._parse_table() 7569 7570 self._match(TokenType.ON) 7571 on = self._parse_assignment() 7572 7573 return self.expression( 7574 exp.Merge, 7575 this=target, 7576 using=using, 7577 on=on, 7578 whens=self._parse_when_matched(), 7579 returning=self._parse_returning(), 7580 ) 7581 7582 def _parse_when_matched(self) -> exp.Whens: 7583 whens = [] 7584 7585 while self._match(TokenType.WHEN): 7586 matched = not self._match(TokenType.NOT) 7587 self._match_text_seq("MATCHED") 7588 source = ( 7589 False 7590 if self._match_text_seq("BY", "TARGET") 7591 else self._match_text_seq("BY", "SOURCE") 7592 ) 7593 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7594 7595 self._match(TokenType.THEN) 7596 7597 if self._match(TokenType.INSERT): 7598 this = self._parse_star() 7599 if this: 7600 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7601 else: 7602 then = self.expression( 7603 exp.Insert, 7604 this=exp.var("ROW") 7605 if self._match_text_seq("ROW") 7606 else self._parse_value(values=False), 7607 expression=self._match_text_seq("VALUES") and self._parse_value(), 7608 ) 7609 elif self._match(TokenType.UPDATE): 7610 expressions = self._parse_star() 7611 if expressions: 7612 then = self.expression(exp.Update, expressions=expressions) 7613 else: 7614 then = self.expression( 7615 exp.Update, 7616 expressions=self._match(TokenType.SET) 7617 and self._parse_csv(self._parse_equality), 7618 ) 7619 elif self._match(TokenType.DELETE): 7620 then = self.expression(exp.Var, this=self._prev.text) 7621 else: 7622 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7623 7624 whens.append( 7625 self.expression( 7626 exp.When, 7627 matched=matched, 7628 source=source, 7629 condition=condition, 7630 then=then, 7631 ) 7632 ) 7633 return self.expression(exp.Whens, expressions=whens) 7634 7635 def _parse_show(self) -> t.Optional[exp.Expression]: 7636 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7637 if parser: 7638 return parser(self) 7639 return self._parse_as_command(self._prev) 7640 7641 def _parse_set_item_assignment( 7642 self, kind: t.Optional[str] = None 7643 ) -> t.Optional[exp.Expression]: 7644 index = self._index 7645 7646 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7647 return self._parse_set_transaction(global_=kind == "GLOBAL") 7648 7649 left = self._parse_primary() or self._parse_column() 7650 assignment_delimiter = self._match_texts(("=", "TO")) 7651 7652 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7653 self._retreat(index) 7654 return None 7655 7656 right = self._parse_statement() or self._parse_id_var() 7657 if isinstance(right, (exp.Column, exp.Identifier)): 7658 right = exp.var(right.name) 7659 7660 this = self.expression(exp.EQ, this=left, expression=right) 7661 return self.expression(exp.SetItem, this=this, kind=kind) 7662 7663 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7664 self._match_text_seq("TRANSACTION") 7665 characteristics = self._parse_csv( 7666 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7667 ) 7668 return self.expression( 7669 exp.SetItem, 7670 expressions=characteristics, 7671 kind="TRANSACTION", 7672 **{"global": global_}, # type: ignore 7673 ) 7674 7675 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7676 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7677 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7678 7679 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7680 index = self._index 7681 set_ = self.expression( 7682 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7683 ) 7684 7685 if self._curr: 7686 self._retreat(index) 7687 return self._parse_as_command(self._prev) 7688 7689 return set_ 7690 7691 def _parse_var_from_options( 7692 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7693 ) -> t.Optional[exp.Var]: 7694 start = self._curr 7695 if not start: 7696 return None 7697 7698 option = start.text.upper() 7699 continuations = options.get(option) 7700 7701 index = self._index 7702 self._advance() 7703 for keywords in continuations or []: 7704 if isinstance(keywords, str): 7705 keywords = (keywords,) 7706 7707 if self._match_text_seq(*keywords): 7708 option = f"{option} {' '.join(keywords)}" 7709 break 7710 else: 7711 if continuations or continuations is None: 7712 if raise_unmatched: 7713 self.raise_error(f"Unknown option {option}") 7714 7715 self._retreat(index) 7716 return None 7717 7718 return exp.var(option) 7719 7720 def _parse_as_command(self, start: Token) -> exp.Command: 7721 while self._curr: 7722 self._advance() 7723 text = self._find_sql(start, self._prev) 7724 size = len(start.text) 7725 self._warn_unsupported() 7726 return exp.Command(this=text[:size], expression=text[size:]) 7727 7728 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7729 settings = [] 7730 7731 self._match_l_paren() 7732 kind = self._parse_id_var() 7733 7734 if self._match(TokenType.L_PAREN): 7735 while True: 7736 key = self._parse_id_var() 7737 value = self._parse_primary() 7738 if not key and value is None: 7739 break 7740 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7741 self._match(TokenType.R_PAREN) 7742 7743 self._match_r_paren() 7744 7745 return self.expression( 7746 exp.DictProperty, 7747 this=this, 7748 kind=kind.this if kind else None, 7749 settings=settings, 7750 ) 7751 7752 def _parse_dict_range(self, this: str) -> exp.DictRange: 7753 self._match_l_paren() 7754 has_min = self._match_text_seq("MIN") 7755 if has_min: 7756 min = self._parse_var() or self._parse_primary() 7757 self._match_text_seq("MAX") 7758 max = self._parse_var() or self._parse_primary() 7759 else: 7760 max = self._parse_var() or self._parse_primary() 7761 min = exp.Literal.number(0) 7762 self._match_r_paren() 7763 return self.expression(exp.DictRange, this=this, min=min, max=max) 7764 7765 def _parse_comprehension( 7766 self, this: t.Optional[exp.Expression] 7767 ) -> t.Optional[exp.Comprehension]: 7768 index = self._index 7769 expression = self._parse_column() 7770 if not self._match(TokenType.IN): 7771 self._retreat(index - 1) 7772 return None 7773 iterator = self._parse_column() 7774 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7775 return self.expression( 7776 exp.Comprehension, 7777 this=this, 7778 expression=expression, 7779 iterator=iterator, 7780 condition=condition, 7781 ) 7782 7783 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7784 if self._match(TokenType.HEREDOC_STRING): 7785 return self.expression(exp.Heredoc, this=self._prev.text) 7786 7787 if not self._match_text_seq("$"): 7788 return None 7789 7790 tags = ["$"] 7791 tag_text = None 7792 7793 if self._is_connected(): 7794 self._advance() 7795 tags.append(self._prev.text.upper()) 7796 else: 7797 self.raise_error("No closing $ found") 7798 7799 if tags[-1] != "$": 7800 if self._is_connected() and self._match_text_seq("$"): 7801 tag_text = tags[-1] 7802 tags.append("$") 7803 else: 7804 self.raise_error("No closing $ found") 7805 7806 heredoc_start = self._curr 7807 7808 while self._curr: 7809 if self._match_text_seq(*tags, advance=False): 7810 this = self._find_sql(heredoc_start, self._prev) 7811 self._advance(len(tags)) 7812 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7813 7814 self._advance() 7815 7816 self.raise_error(f"No closing {''.join(tags)} found") 7817 return None 7818 7819 def _find_parser( 7820 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7821 ) -> t.Optional[t.Callable]: 7822 if not self._curr: 7823 return None 7824 7825 index = self._index 7826 this = [] 7827 while True: 7828 # The current token might be multiple words 7829 curr = self._curr.text.upper() 7830 key = curr.split(" ") 7831 this.append(curr) 7832 7833 self._advance() 7834 result, trie = in_trie(trie, key) 7835 if result == TrieResult.FAILED: 7836 break 7837 7838 if result == TrieResult.EXISTS: 7839 subparser = parsers[" ".join(this)] 7840 return subparser 7841 7842 self._retreat(index) 7843 return None 7844 7845 def _match(self, token_type, advance=True, expression=None): 7846 if not self._curr: 7847 return None 7848 7849 if self._curr.token_type == token_type: 7850 if advance: 7851 self._advance() 7852 self._add_comments(expression) 7853 return True 7854 7855 return None 7856 7857 def _match_set(self, types, advance=True): 7858 if not self._curr: 7859 return None 7860 7861 if self._curr.token_type in types: 7862 if advance: 7863 self._advance() 7864 return True 7865 7866 return None 7867 7868 def _match_pair(self, token_type_a, token_type_b, advance=True): 7869 if not self._curr or not self._next: 7870 return None 7871 7872 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7873 if advance: 7874 self._advance(2) 7875 return True 7876 7877 return None 7878 7879 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7880 if not self._match(TokenType.L_PAREN, expression=expression): 7881 self.raise_error("Expecting (") 7882 7883 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7884 if not self._match(TokenType.R_PAREN, expression=expression): 7885 self.raise_error("Expecting )") 7886 7887 def _match_texts(self, texts, advance=True): 7888 if ( 7889 self._curr 7890 and self._curr.token_type != TokenType.STRING 7891 and self._curr.text.upper() in texts 7892 ): 7893 if advance: 7894 self._advance() 7895 return True 7896 return None 7897 7898 def _match_text_seq(self, *texts, advance=True): 7899 index = self._index 7900 for text in texts: 7901 if ( 7902 self._curr 7903 and self._curr.token_type != TokenType.STRING 7904 and self._curr.text.upper() == text 7905 ): 7906 self._advance() 7907 else: 7908 self._retreat(index) 7909 return None 7910 7911 if not advance: 7912 self._retreat(index) 7913 7914 return True 7915 7916 def _replace_lambda( 7917 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7918 ) -> t.Optional[exp.Expression]: 7919 if not node: 7920 return node 7921 7922 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7923 7924 for column in node.find_all(exp.Column): 7925 typ = lambda_types.get(column.parts[0].name) 7926 if typ is not None: 7927 dot_or_id = column.to_dot() if column.table else column.this 7928 7929 if typ: 7930 dot_or_id = self.expression( 7931 exp.Cast, 7932 this=dot_or_id, 7933 to=typ, 7934 ) 7935 7936 parent = column.parent 7937 7938 while isinstance(parent, exp.Dot): 7939 if not isinstance(parent.parent, exp.Dot): 7940 parent.replace(dot_or_id) 7941 break 7942 parent = parent.parent 7943 else: 7944 if column is node: 7945 node = dot_or_id 7946 else: 7947 column.replace(dot_or_id) 7948 return node 7949 7950 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7951 start = self._prev 7952 7953 # Not to be confused with TRUNCATE(number, decimals) function call 7954 if self._match(TokenType.L_PAREN): 7955 self._retreat(self._index - 2) 7956 return self._parse_function() 7957 7958 # Clickhouse supports TRUNCATE DATABASE as well 7959 is_database = self._match(TokenType.DATABASE) 7960 7961 self._match(TokenType.TABLE) 7962 7963 exists = self._parse_exists(not_=False) 7964 7965 expressions = self._parse_csv( 7966 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7967 ) 7968 7969 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7970 7971 if self._match_text_seq("RESTART", "IDENTITY"): 7972 identity = "RESTART" 7973 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7974 identity = "CONTINUE" 7975 else: 7976 identity = None 7977 7978 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7979 option = self._prev.text 7980 else: 7981 option = None 7982 7983 partition = self._parse_partition() 7984 7985 # Fallback case 7986 if self._curr: 7987 return self._parse_as_command(start) 7988 7989 return self.expression( 7990 exp.TruncateTable, 7991 expressions=expressions, 7992 is_database=is_database, 7993 exists=exists, 7994 cluster=cluster, 7995 identity=identity, 7996 option=option, 7997 partition=partition, 7998 ) 7999 8000 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8001 this = self._parse_ordered(self._parse_opclass) 8002 8003 if not self._match(TokenType.WITH): 8004 return this 8005 8006 op = self._parse_var(any_token=True) 8007 8008 return self.expression(exp.WithOperator, this=this, op=op) 8009 8010 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8011 self._match(TokenType.EQ) 8012 self._match(TokenType.L_PAREN) 8013 8014 opts: t.List[t.Optional[exp.Expression]] = [] 8015 option: exp.Expression | None 8016 while self._curr and not self._match(TokenType.R_PAREN): 8017 if self._match_text_seq("FORMAT_NAME", "="): 8018 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8019 option = self._parse_format_name() 8020 else: 8021 option = self._parse_property() 8022 8023 if option is None: 8024 self.raise_error("Unable to parse option") 8025 break 8026 8027 opts.append(option) 8028 8029 return opts 8030 8031 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8032 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8033 8034 options = [] 8035 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8036 option = self._parse_var(any_token=True) 8037 prev = self._prev.text.upper() 8038 8039 # Different dialects might separate options and values by white space, "=" and "AS" 8040 self._match(TokenType.EQ) 8041 self._match(TokenType.ALIAS) 8042 8043 param = self.expression(exp.CopyParameter, this=option) 8044 8045 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8046 TokenType.L_PAREN, advance=False 8047 ): 8048 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8049 param.set("expressions", self._parse_wrapped_options()) 8050 elif prev == "FILE_FORMAT": 8051 # T-SQL's external file format case 8052 param.set("expression", self._parse_field()) 8053 else: 8054 param.set("expression", self._parse_unquoted_field()) 8055 8056 options.append(param) 8057 self._match(sep) 8058 8059 return options 8060 8061 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8062 expr = self.expression(exp.Credentials) 8063 8064 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8065 expr.set("storage", self._parse_field()) 8066 if self._match_text_seq("CREDENTIALS"): 8067 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8068 creds = ( 8069 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8070 ) 8071 expr.set("credentials", creds) 8072 if self._match_text_seq("ENCRYPTION"): 8073 expr.set("encryption", self._parse_wrapped_options()) 8074 if self._match_text_seq("IAM_ROLE"): 8075 expr.set("iam_role", self._parse_field()) 8076 if self._match_text_seq("REGION"): 8077 expr.set("region", self._parse_field()) 8078 8079 return expr 8080 8081 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8082 return self._parse_field() 8083 8084 def _parse_copy(self) -> exp.Copy | exp.Command: 8085 start = self._prev 8086 8087 self._match(TokenType.INTO) 8088 8089 this = ( 8090 self._parse_select(nested=True, parse_subquery_alias=False) 8091 if self._match(TokenType.L_PAREN, advance=False) 8092 else self._parse_table(schema=True) 8093 ) 8094 8095 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8096 8097 files = self._parse_csv(self._parse_file_location) 8098 credentials = self._parse_credentials() 8099 8100 self._match_text_seq("WITH") 8101 8102 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8103 8104 # Fallback case 8105 if self._curr: 8106 return self._parse_as_command(start) 8107 8108 return self.expression( 8109 exp.Copy, 8110 this=this, 8111 kind=kind, 8112 credentials=credentials, 8113 files=files, 8114 params=params, 8115 ) 8116 8117 def _parse_normalize(self) -> exp.Normalize: 8118 return self.expression( 8119 exp.Normalize, 8120 this=self._parse_bitwise(), 8121 form=self._match(TokenType.COMMA) and self._parse_var(), 8122 ) 8123 8124 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8125 args = self._parse_csv(lambda: self._parse_lambda()) 8126 8127 this = seq_get(args, 0) 8128 decimals = seq_get(args, 1) 8129 8130 return expr_type( 8131 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8132 ) 8133 8134 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8135 if self._match_text_seq("COLUMNS", "(", advance=False): 8136 this = self._parse_function() 8137 if isinstance(this, exp.Columns): 8138 this.set("unpack", True) 8139 return this 8140 8141 return self.expression( 8142 exp.Star, 8143 **{ # type: ignore 8144 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8145 "replace": self._parse_star_op("REPLACE"), 8146 "rename": self._parse_star_op("RENAME"), 8147 }, 8148 ) 8149 8150 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8151 privilege_parts = [] 8152 8153 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8154 # (end of privilege list) or L_PAREN (start of column list) are met 8155 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8156 privilege_parts.append(self._curr.text.upper()) 8157 self._advance() 8158 8159 this = exp.var(" ".join(privilege_parts)) 8160 expressions = ( 8161 self._parse_wrapped_csv(self._parse_column) 8162 if self._match(TokenType.L_PAREN, advance=False) 8163 else None 8164 ) 8165 8166 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8167 8168 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8169 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8170 principal = self._parse_id_var() 8171 8172 if not principal: 8173 return None 8174 8175 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8176 8177 def _parse_grant(self) -> exp.Grant | exp.Command: 8178 start = self._prev 8179 8180 privileges = self._parse_csv(self._parse_grant_privilege) 8181 8182 self._match(TokenType.ON) 8183 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8184 8185 # Attempt to parse the securable e.g. MySQL allows names 8186 # such as "foo.*", "*.*" which are not easily parseable yet 8187 securable = self._try_parse(self._parse_table_parts) 8188 8189 if not securable or not self._match_text_seq("TO"): 8190 return self._parse_as_command(start) 8191 8192 principals = self._parse_csv(self._parse_grant_principal) 8193 8194 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8195 8196 if self._curr: 8197 return self._parse_as_command(start) 8198 8199 return self.expression( 8200 exp.Grant, 8201 privileges=privileges, 8202 kind=kind, 8203 securable=securable, 8204 principals=principals, 8205 grant_option=grant_option, 8206 ) 8207 8208 def _parse_overlay(self) -> exp.Overlay: 8209 return self.expression( 8210 exp.Overlay, 8211 **{ # type: ignore 8212 "this": self._parse_bitwise(), 8213 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8214 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8215 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8216 }, 8217 ) 8218 8219 def _parse_format_name(self) -> exp.Property: 8220 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8221 # for FILE_FORMAT = <format_name> 8222 return self.expression( 8223 exp.Property, 8224 this=exp.var("FORMAT_NAME"), 8225 value=self._parse_string() or self._parse_table_parts(), 8226 )
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.VOID, 402 TokenType.OBJECT, 403 TokenType.OBJECT_IDENTIFIER, 404 TokenType.INET, 405 TokenType.IPADDRESS, 406 TokenType.IPPREFIX, 407 TokenType.IPV4, 408 TokenType.IPV6, 409 TokenType.UNKNOWN, 410 TokenType.NOTHING, 411 TokenType.NULL, 412 TokenType.NAME, 413 TokenType.TDIGEST, 414 TokenType.DYNAMIC, 415 *ENUM_TYPE_TOKENS, 416 *NESTED_TYPE_TOKENS, 417 *AGGREGATE_TYPE_TOKENS, 418 } 419 420 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 421 TokenType.BIGINT: TokenType.UBIGINT, 422 TokenType.INT: TokenType.UINT, 423 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 424 TokenType.SMALLINT: TokenType.USMALLINT, 425 TokenType.TINYINT: TokenType.UTINYINT, 426 TokenType.DECIMAL: TokenType.UDECIMAL, 427 TokenType.DOUBLE: TokenType.UDOUBLE, 428 } 429 430 SUBQUERY_PREDICATES = { 431 TokenType.ANY: exp.Any, 432 TokenType.ALL: exp.All, 433 TokenType.EXISTS: exp.Exists, 434 TokenType.SOME: exp.Any, 435 } 436 437 RESERVED_TOKENS = { 438 *Tokenizer.SINGLE_TOKENS.values(), 439 TokenType.SELECT, 440 } - {TokenType.IDENTIFIER} 441 442 DB_CREATABLES = { 443 TokenType.DATABASE, 444 TokenType.DICTIONARY, 445 TokenType.FILE_FORMAT, 446 TokenType.MODEL, 447 TokenType.NAMESPACE, 448 TokenType.SCHEMA, 449 TokenType.SEQUENCE, 450 TokenType.SINK, 451 TokenType.SOURCE, 452 TokenType.STAGE, 453 TokenType.STORAGE_INTEGRATION, 454 TokenType.STREAMLIT, 455 TokenType.TABLE, 456 TokenType.TAG, 457 TokenType.VIEW, 458 TokenType.WAREHOUSE, 459 } 460 461 CREATABLES = { 462 TokenType.COLUMN, 463 TokenType.CONSTRAINT, 464 TokenType.FOREIGN_KEY, 465 TokenType.FUNCTION, 466 TokenType.INDEX, 467 TokenType.PROCEDURE, 468 *DB_CREATABLES, 469 } 470 471 ALTERABLES = { 472 TokenType.INDEX, 473 TokenType.TABLE, 474 TokenType.VIEW, 475 } 476 477 # Tokens that can represent identifiers 478 ID_VAR_TOKENS = { 479 TokenType.ALL, 480 TokenType.ATTACH, 481 TokenType.VAR, 482 TokenType.ANTI, 483 TokenType.APPLY, 484 TokenType.ASC, 485 TokenType.ASOF, 486 TokenType.AUTO_INCREMENT, 487 TokenType.BEGIN, 488 TokenType.BPCHAR, 489 TokenType.CACHE, 490 TokenType.CASE, 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.COMMENT, 494 TokenType.COMMIT, 495 TokenType.CONSTRAINT, 496 TokenType.COPY, 497 TokenType.CUBE, 498 TokenType.CURRENT_SCHEMA, 499 TokenType.DEFAULT, 500 TokenType.DELETE, 501 TokenType.DESC, 502 TokenType.DESCRIBE, 503 TokenType.DETACH, 504 TokenType.DICTIONARY, 505 TokenType.DIV, 506 TokenType.END, 507 TokenType.EXECUTE, 508 TokenType.EXPORT, 509 TokenType.ESCAPE, 510 TokenType.FALSE, 511 TokenType.FIRST, 512 TokenType.FILTER, 513 TokenType.FINAL, 514 TokenType.FORMAT, 515 TokenType.FULL, 516 TokenType.IDENTIFIER, 517 TokenType.IS, 518 TokenType.ISNULL, 519 TokenType.INTERVAL, 520 TokenType.KEEP, 521 TokenType.KILL, 522 TokenType.LEFT, 523 TokenType.LIMIT, 524 TokenType.LOAD, 525 TokenType.MERGE, 526 TokenType.NATURAL, 527 TokenType.NEXT, 528 TokenType.OFFSET, 529 TokenType.OPERATOR, 530 TokenType.ORDINALITY, 531 TokenType.OVERLAPS, 532 TokenType.OVERWRITE, 533 TokenType.PARTITION, 534 TokenType.PERCENT, 535 TokenType.PIVOT, 536 TokenType.PRAGMA, 537 TokenType.PUT, 538 TokenType.RANGE, 539 TokenType.RECURSIVE, 540 TokenType.REFERENCES, 541 TokenType.REFRESH, 542 TokenType.RENAME, 543 TokenType.REPLACE, 544 TokenType.RIGHT, 545 TokenType.ROLLUP, 546 TokenType.ROW, 547 TokenType.ROWS, 548 TokenType.SEMI, 549 TokenType.SET, 550 TokenType.SETTINGS, 551 TokenType.SHOW, 552 TokenType.TEMPORARY, 553 TokenType.TOP, 554 TokenType.TRUE, 555 TokenType.TRUNCATE, 556 TokenType.UNIQUE, 557 TokenType.UNNEST, 558 TokenType.UNPIVOT, 559 TokenType.UPDATE, 560 TokenType.USE, 561 TokenType.VOLATILE, 562 TokenType.WINDOW, 563 *CREATABLES, 564 *SUBQUERY_PREDICATES, 565 *TYPE_TOKENS, 566 *NO_PAREN_FUNCTIONS, 567 } 568 ID_VAR_TOKENS.remove(TokenType.UNION) 569 570 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 571 TokenType.ANTI, 572 TokenType.APPLY, 573 TokenType.ASOF, 574 TokenType.FULL, 575 TokenType.LEFT, 576 TokenType.LOCK, 577 TokenType.NATURAL, 578 TokenType.RIGHT, 579 TokenType.SEMI, 580 TokenType.WINDOW, 581 } 582 583 ALIAS_TOKENS = ID_VAR_TOKENS 584 585 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 586 587 ARRAY_CONSTRUCTORS = { 588 "ARRAY": exp.Array, 589 "LIST": exp.List, 590 } 591 592 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 593 594 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 595 596 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 597 598 FUNC_TOKENS = { 599 TokenType.COLLATE, 600 TokenType.COMMAND, 601 TokenType.CURRENT_DATE, 602 TokenType.CURRENT_DATETIME, 603 TokenType.CURRENT_SCHEMA, 604 TokenType.CURRENT_TIMESTAMP, 605 TokenType.CURRENT_TIME, 606 TokenType.CURRENT_USER, 607 TokenType.FILTER, 608 TokenType.FIRST, 609 TokenType.FORMAT, 610 TokenType.GLOB, 611 TokenType.IDENTIFIER, 612 TokenType.INDEX, 613 TokenType.ISNULL, 614 TokenType.ILIKE, 615 TokenType.INSERT, 616 TokenType.LIKE, 617 TokenType.MERGE, 618 TokenType.NEXT, 619 TokenType.OFFSET, 620 TokenType.PRIMARY_KEY, 621 TokenType.RANGE, 622 TokenType.REPLACE, 623 TokenType.RLIKE, 624 TokenType.ROW, 625 TokenType.UNNEST, 626 TokenType.VAR, 627 TokenType.LEFT, 628 TokenType.RIGHT, 629 TokenType.SEQUENCE, 630 TokenType.DATE, 631 TokenType.DATETIME, 632 TokenType.TABLE, 633 TokenType.TIMESTAMP, 634 TokenType.TIMESTAMPTZ, 635 TokenType.TRUNCATE, 636 TokenType.WINDOW, 637 TokenType.XOR, 638 *TYPE_TOKENS, 639 *SUBQUERY_PREDICATES, 640 } 641 642 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.AND: exp.And, 644 } 645 646 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.COLON_EQ: exp.PropertyEQ, 648 } 649 650 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.OR: exp.Or, 652 } 653 654 EQUALITY = { 655 TokenType.EQ: exp.EQ, 656 TokenType.NEQ: exp.NEQ, 657 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 658 } 659 660 COMPARISON = { 661 TokenType.GT: exp.GT, 662 TokenType.GTE: exp.GTE, 663 TokenType.LT: exp.LT, 664 TokenType.LTE: exp.LTE, 665 } 666 667 BITWISE = { 668 TokenType.AMP: exp.BitwiseAnd, 669 TokenType.CARET: exp.BitwiseXor, 670 TokenType.PIPE: exp.BitwiseOr, 671 } 672 673 TERM = { 674 TokenType.DASH: exp.Sub, 675 TokenType.PLUS: exp.Add, 676 TokenType.MOD: exp.Mod, 677 TokenType.COLLATE: exp.Collate, 678 } 679 680 FACTOR = { 681 TokenType.DIV: exp.IntDiv, 682 TokenType.LR_ARROW: exp.Distance, 683 TokenType.SLASH: exp.Div, 684 TokenType.STAR: exp.Mul, 685 } 686 687 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 688 689 TIMES = { 690 TokenType.TIME, 691 TokenType.TIMETZ, 692 } 693 694 TIMESTAMPS = { 695 TokenType.TIMESTAMP, 696 TokenType.TIMESTAMPNTZ, 697 TokenType.TIMESTAMPTZ, 698 TokenType.TIMESTAMPLTZ, 699 *TIMES, 700 } 701 702 SET_OPERATIONS = { 703 TokenType.UNION, 704 TokenType.INTERSECT, 705 TokenType.EXCEPT, 706 } 707 708 JOIN_METHODS = { 709 TokenType.ASOF, 710 TokenType.NATURAL, 711 TokenType.POSITIONAL, 712 } 713 714 JOIN_SIDES = { 715 TokenType.LEFT, 716 TokenType.RIGHT, 717 TokenType.FULL, 718 } 719 720 JOIN_KINDS = { 721 TokenType.ANTI, 722 TokenType.CROSS, 723 TokenType.INNER, 724 TokenType.OUTER, 725 TokenType.SEMI, 726 TokenType.STRAIGHT_JOIN, 727 } 728 729 JOIN_HINTS: t.Set[str] = set() 730 731 LAMBDAS = { 732 TokenType.ARROW: lambda self, expressions: self.expression( 733 exp.Lambda, 734 this=self._replace_lambda( 735 self._parse_assignment(), 736 expressions, 737 ), 738 expressions=expressions, 739 ), 740 TokenType.FARROW: lambda self, expressions: self.expression( 741 exp.Kwarg, 742 this=exp.var(expressions[0].name), 743 expression=self._parse_assignment(), 744 ), 745 } 746 747 COLUMN_OPERATORS = { 748 TokenType.DOT: None, 749 TokenType.DOTCOLON: lambda self, this, to: self.expression( 750 exp.JSONCast, 751 this=this, 752 to=to, 753 ), 754 TokenType.DCOLON: lambda self, this, to: self.expression( 755 exp.Cast if self.STRICT_CAST else exp.TryCast, 756 this=this, 757 to=to, 758 ), 759 TokenType.ARROW: lambda self, this, path: self.expression( 760 exp.JSONExtract, 761 this=this, 762 expression=self.dialect.to_json_path(path), 763 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 764 ), 765 TokenType.DARROW: lambda self, this, path: self.expression( 766 exp.JSONExtractScalar, 767 this=this, 768 expression=self.dialect.to_json_path(path), 769 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 770 ), 771 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 772 exp.JSONBExtract, 773 this=this, 774 expression=path, 775 ), 776 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 777 exp.JSONBExtractScalar, 778 this=this, 779 expression=path, 780 ), 781 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 782 exp.JSONBContains, 783 this=this, 784 expression=key, 785 ), 786 } 787 788 EXPRESSION_PARSERS = { 789 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 790 exp.Column: lambda self: self._parse_column(), 791 exp.Condition: lambda self: self._parse_assignment(), 792 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 793 exp.Expression: lambda self: self._parse_expression(), 794 exp.From: lambda self: self._parse_from(joins=True), 795 exp.Group: lambda self: self._parse_group(), 796 exp.Having: lambda self: self._parse_having(), 797 exp.Hint: lambda self: self._parse_hint_body(), 798 exp.Identifier: lambda self: self._parse_id_var(), 799 exp.Join: lambda self: self._parse_join(), 800 exp.Lambda: lambda self: self._parse_lambda(), 801 exp.Lateral: lambda self: self._parse_lateral(), 802 exp.Limit: lambda self: self._parse_limit(), 803 exp.Offset: lambda self: self._parse_offset(), 804 exp.Order: lambda self: self._parse_order(), 805 exp.Ordered: lambda self: self._parse_ordered(), 806 exp.Properties: lambda self: self._parse_properties(), 807 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 808 exp.Qualify: lambda self: self._parse_qualify(), 809 exp.Returning: lambda self: self._parse_returning(), 810 exp.Select: lambda self: self._parse_select(), 811 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 812 exp.Table: lambda self: self._parse_table_parts(), 813 exp.TableAlias: lambda self: self._parse_table_alias(), 814 exp.Tuple: lambda self: self._parse_value(values=False), 815 exp.Whens: lambda self: self._parse_when_matched(), 816 exp.Where: lambda self: self._parse_where(), 817 exp.Window: lambda self: self._parse_named_window(), 818 exp.With: lambda self: self._parse_with(), 819 "JOIN_TYPE": lambda self: self._parse_join_parts(), 820 } 821 822 STATEMENT_PARSERS = { 823 TokenType.ALTER: lambda self: self._parse_alter(), 824 TokenType.ANALYZE: lambda self: self._parse_analyze(), 825 TokenType.BEGIN: lambda self: self._parse_transaction(), 826 TokenType.CACHE: lambda self: self._parse_cache(), 827 TokenType.COMMENT: lambda self: self._parse_comment(), 828 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 829 TokenType.COPY: lambda self: self._parse_copy(), 830 TokenType.CREATE: lambda self: self._parse_create(), 831 TokenType.DELETE: lambda self: self._parse_delete(), 832 TokenType.DESC: lambda self: self._parse_describe(), 833 TokenType.DESCRIBE: lambda self: self._parse_describe(), 834 TokenType.DROP: lambda self: self._parse_drop(), 835 TokenType.GRANT: lambda self: self._parse_grant(), 836 TokenType.INSERT: lambda self: self._parse_insert(), 837 TokenType.KILL: lambda self: self._parse_kill(), 838 TokenType.LOAD: lambda self: self._parse_load(), 839 TokenType.MERGE: lambda self: self._parse_merge(), 840 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 841 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 842 TokenType.REFRESH: lambda self: self._parse_refresh(), 843 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 844 TokenType.SET: lambda self: self._parse_set(), 845 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 846 TokenType.UNCACHE: lambda self: self._parse_uncache(), 847 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 848 TokenType.UPDATE: lambda self: self._parse_update(), 849 TokenType.USE: lambda self: self._parse_use(), 850 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 851 } 852 853 UNARY_PARSERS = { 854 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 855 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 856 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 857 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 858 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 859 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 860 } 861 862 STRING_PARSERS = { 863 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 864 exp.RawString, this=token.text 865 ), 866 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 867 exp.National, this=token.text 868 ), 869 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 870 TokenType.STRING: lambda self, token: self.expression( 871 exp.Literal, this=token.text, is_string=True 872 ), 873 TokenType.UNICODE_STRING: lambda self, token: self.expression( 874 exp.UnicodeString, 875 this=token.text, 876 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 877 ), 878 } 879 880 NUMERIC_PARSERS = { 881 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 882 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 883 TokenType.HEX_STRING: lambda self, token: self.expression( 884 exp.HexString, 885 this=token.text, 886 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 887 ), 888 TokenType.NUMBER: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=False 890 ), 891 } 892 893 PRIMARY_PARSERS = { 894 **STRING_PARSERS, 895 **NUMERIC_PARSERS, 896 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 897 TokenType.NULL: lambda self, _: self.expression(exp.Null), 898 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 899 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 900 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 901 TokenType.STAR: lambda self, _: self._parse_star_ops(), 902 } 903 904 PLACEHOLDER_PARSERS = { 905 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 906 TokenType.PARAMETER: lambda self: self._parse_parameter(), 907 TokenType.COLON: lambda self: ( 908 self.expression(exp.Placeholder, this=self._prev.text) 909 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 910 else None 911 ), 912 } 913 914 RANGE_PARSERS = { 915 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 916 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 917 TokenType.GLOB: binary_range_parser(exp.Glob), 918 TokenType.ILIKE: binary_range_parser(exp.ILike), 919 TokenType.IN: lambda self, this: self._parse_in(this), 920 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 921 TokenType.IS: lambda self, this: self._parse_is(this), 922 TokenType.LIKE: binary_range_parser(exp.Like), 923 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 924 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 925 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 926 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 927 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 928 } 929 930 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 931 "ALLOWED_VALUES": lambda self: self.expression( 932 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 933 ), 934 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 935 "AUTO": lambda self: self._parse_auto_property(), 936 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 937 "BACKUP": lambda self: self.expression( 938 exp.BackupProperty, this=self._parse_var(any_token=True) 939 ), 940 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 941 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 942 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 943 "CHECKSUM": lambda self: self._parse_checksum(), 944 "CLUSTER BY": lambda self: self._parse_cluster(), 945 "CLUSTERED": lambda self: self._parse_clustered_by(), 946 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 947 exp.CollateProperty, **kwargs 948 ), 949 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 950 "CONTAINS": lambda self: self._parse_contains_property(), 951 "COPY": lambda self: self._parse_copy_property(), 952 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 953 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 954 "DEFINER": lambda self: self._parse_definer(), 955 "DETERMINISTIC": lambda self: self.expression( 956 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 957 ), 958 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 959 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 960 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 961 "DISTKEY": lambda self: self._parse_distkey(), 962 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 963 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 964 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 965 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 966 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 967 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 968 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 969 "FREESPACE": lambda self: self._parse_freespace(), 970 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 971 "HEAP": lambda self: self.expression(exp.HeapProperty), 972 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 973 "IMMUTABLE": lambda self: self.expression( 974 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 975 ), 976 "INHERITS": lambda self: self.expression( 977 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 978 ), 979 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 980 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 981 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 982 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 983 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 984 "LIKE": lambda self: self._parse_create_like(), 985 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 986 "LOCK": lambda self: self._parse_locking(), 987 "LOCKING": lambda self: self._parse_locking(), 988 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 989 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 990 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 991 "MODIFIES": lambda self: self._parse_modifies_property(), 992 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 993 "NO": lambda self: self._parse_no_property(), 994 "ON": lambda self: self._parse_on_property(), 995 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 996 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 997 "PARTITION": lambda self: self._parse_partitioned_of(), 998 "PARTITION BY": lambda self: self._parse_partitioned_by(), 999 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1000 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1001 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1002 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1003 "READS": lambda self: self._parse_reads_property(), 1004 "REMOTE": lambda self: self._parse_remote_with_connection(), 1005 "RETURNS": lambda self: self._parse_returns(), 1006 "STRICT": lambda self: self.expression(exp.StrictProperty), 1007 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1008 "ROW": lambda self: self._parse_row(), 1009 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1010 "SAMPLE": lambda self: self.expression( 1011 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1012 ), 1013 "SECURE": lambda self: self.expression(exp.SecureProperty), 1014 "SECURITY": lambda self: self._parse_security(), 1015 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1016 "SETTINGS": lambda self: self._parse_settings_property(), 1017 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1018 "SORTKEY": lambda self: self._parse_sortkey(), 1019 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1020 "STABLE": lambda self: self.expression( 1021 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1022 ), 1023 "STORED": lambda self: self._parse_stored(), 1024 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1025 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1026 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1027 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1028 "TO": lambda self: self._parse_to_table(), 1029 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1030 "TRANSFORM": lambda self: self.expression( 1031 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1032 ), 1033 "TTL": lambda self: self._parse_ttl(), 1034 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1035 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1036 "VOLATILE": lambda self: self._parse_volatile_property(), 1037 "WITH": lambda self: self._parse_with_property(), 1038 } 1039 1040 CONSTRAINT_PARSERS = { 1041 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1042 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1043 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1044 "CHARACTER SET": lambda self: self.expression( 1045 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "CHECK": lambda self: self.expression( 1048 exp.CheckColumnConstraint, 1049 this=self._parse_wrapped(self._parse_assignment), 1050 enforced=self._match_text_seq("ENFORCED"), 1051 ), 1052 "COLLATE": lambda self: self.expression( 1053 exp.CollateColumnConstraint, 1054 this=self._parse_identifier() or self._parse_column(), 1055 ), 1056 "COMMENT": lambda self: self.expression( 1057 exp.CommentColumnConstraint, this=self._parse_string() 1058 ), 1059 "COMPRESS": lambda self: self._parse_compress(), 1060 "CLUSTERED": lambda self: self.expression( 1061 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1062 ), 1063 "NONCLUSTERED": lambda self: self.expression( 1064 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1065 ), 1066 "DEFAULT": lambda self: self.expression( 1067 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1068 ), 1069 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1070 "EPHEMERAL": lambda self: self.expression( 1071 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1072 ), 1073 "EXCLUDE": lambda self: self.expression( 1074 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1075 ), 1076 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1077 "FORMAT": lambda self: self.expression( 1078 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "GENERATED": lambda self: self._parse_generated_as_identity(), 1081 "IDENTITY": lambda self: self._parse_auto_increment(), 1082 "INLINE": lambda self: self._parse_inline(), 1083 "LIKE": lambda self: self._parse_create_like(), 1084 "NOT": lambda self: self._parse_not_constraint(), 1085 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1086 "ON": lambda self: ( 1087 self._match(TokenType.UPDATE) 1088 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1089 ) 1090 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1091 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1092 "PERIOD": lambda self: self._parse_period_for_system_time(), 1093 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1094 "REFERENCES": lambda self: self._parse_references(match=False), 1095 "TITLE": lambda self: self.expression( 1096 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1097 ), 1098 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1099 "UNIQUE": lambda self: self._parse_unique(), 1100 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1101 "WATERMARK": lambda self: self.expression( 1102 exp.WatermarkColumnConstraint, 1103 this=self._match(TokenType.FOR) and self._parse_column(), 1104 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1105 ), 1106 "WITH": lambda self: self.expression( 1107 exp.Properties, expressions=self._parse_wrapped_properties() 1108 ), 1109 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1110 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1111 } 1112 1113 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1114 klass = ( 1115 exp.PartitionedByBucket 1116 if self._prev.text.upper() == "BUCKET" 1117 else exp.PartitionByTruncate 1118 ) 1119 1120 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1121 this, expression = seq_get(args, 0), seq_get(args, 1) 1122 1123 if isinstance(this, exp.Literal): 1124 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1125 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1126 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1127 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1128 # 1129 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1130 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1131 this, expression = expression, this 1132 1133 return self.expression(klass, this=this, expression=expression) 1134 1135 ALTER_PARSERS = { 1136 "ADD": lambda self: self._parse_alter_table_add(), 1137 "AS": lambda self: self._parse_select(), 1138 "ALTER": lambda self: self._parse_alter_table_alter(), 1139 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1140 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1141 "DROP": lambda self: self._parse_alter_table_drop(), 1142 "RENAME": lambda self: self._parse_alter_table_rename(), 1143 "SET": lambda self: self._parse_alter_table_set(), 1144 "SWAP": lambda self: self.expression( 1145 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1146 ), 1147 } 1148 1149 ALTER_ALTER_PARSERS = { 1150 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1151 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1152 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1153 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1154 } 1155 1156 SCHEMA_UNNAMED_CONSTRAINTS = { 1157 "CHECK", 1158 "EXCLUDE", 1159 "FOREIGN KEY", 1160 "LIKE", 1161 "PERIOD", 1162 "PRIMARY KEY", 1163 "UNIQUE", 1164 "WATERMARK", 1165 "BUCKET", 1166 "TRUNCATE", 1167 } 1168 1169 NO_PAREN_FUNCTION_PARSERS = { 1170 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1171 "CASE": lambda self: self._parse_case(), 1172 "CONNECT_BY_ROOT": lambda self: self.expression( 1173 exp.ConnectByRoot, this=self._parse_column() 1174 ), 1175 "IF": lambda self: self._parse_if(), 1176 } 1177 1178 INVALID_FUNC_NAME_TOKENS = { 1179 TokenType.IDENTIFIER, 1180 TokenType.STRING, 1181 } 1182 1183 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1184 1185 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1186 1187 FUNCTION_PARSERS = { 1188 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1189 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1190 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1191 "DECODE": lambda self: self._parse_decode(), 1192 "EXTRACT": lambda self: self._parse_extract(), 1193 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1194 "GAP_FILL": lambda self: self._parse_gap_fill(), 1195 "JSON_OBJECT": lambda self: self._parse_json_object(), 1196 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1197 "JSON_TABLE": lambda self: self._parse_json_table(), 1198 "MATCH": lambda self: self._parse_match_against(), 1199 "NORMALIZE": lambda self: self._parse_normalize(), 1200 "OPENJSON": lambda self: self._parse_open_json(), 1201 "OVERLAY": lambda self: self._parse_overlay(), 1202 "POSITION": lambda self: self._parse_position(), 1203 "PREDICT": lambda self: self._parse_predict(), 1204 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1205 "STRING_AGG": lambda self: self._parse_string_agg(), 1206 "SUBSTRING": lambda self: self._parse_substring(), 1207 "TRIM": lambda self: self._parse_trim(), 1208 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1209 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1210 "XMLELEMENT": lambda self: self.expression( 1211 exp.XMLElement, 1212 this=self._match_text_seq("NAME") and self._parse_id_var(), 1213 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1214 ), 1215 "XMLTABLE": lambda self: self._parse_xml_table(), 1216 } 1217 1218 QUERY_MODIFIER_PARSERS = { 1219 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1220 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1221 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1222 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1223 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1224 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1225 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1226 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1227 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1228 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1229 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1230 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1231 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1232 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1233 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1234 TokenType.CLUSTER_BY: lambda self: ( 1235 "cluster", 1236 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1237 ), 1238 TokenType.DISTRIBUTE_BY: lambda self: ( 1239 "distribute", 1240 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1241 ), 1242 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1243 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1244 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1245 } 1246 1247 SET_PARSERS = { 1248 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1249 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1250 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1251 "TRANSACTION": lambda self: self._parse_set_transaction(), 1252 } 1253 1254 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1255 1256 TYPE_LITERAL_PARSERS = { 1257 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1258 } 1259 1260 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1261 1262 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1263 1264 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1265 1266 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1267 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1268 "ISOLATION": ( 1269 ("LEVEL", "REPEATABLE", "READ"), 1270 ("LEVEL", "READ", "COMMITTED"), 1271 ("LEVEL", "READ", "UNCOMITTED"), 1272 ("LEVEL", "SERIALIZABLE"), 1273 ), 1274 "READ": ("WRITE", "ONLY"), 1275 } 1276 1277 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1278 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1279 ) 1280 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1281 1282 CREATE_SEQUENCE: OPTIONS_TYPE = { 1283 "SCALE": ("EXTEND", "NOEXTEND"), 1284 "SHARD": ("EXTEND", "NOEXTEND"), 1285 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1286 **dict.fromkeys( 1287 ( 1288 "SESSION", 1289 "GLOBAL", 1290 "KEEP", 1291 "NOKEEP", 1292 "ORDER", 1293 "NOORDER", 1294 "NOCACHE", 1295 "CYCLE", 1296 "NOCYCLE", 1297 "NOMINVALUE", 1298 "NOMAXVALUE", 1299 "NOSCALE", 1300 "NOSHARD", 1301 ), 1302 tuple(), 1303 ), 1304 } 1305 1306 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1307 1308 USABLES: OPTIONS_TYPE = dict.fromkeys( 1309 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1310 ) 1311 1312 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1313 1314 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1315 "TYPE": ("EVOLUTION",), 1316 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1317 } 1318 1319 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1320 1321 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1322 1323 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1324 "NOT": ("ENFORCED",), 1325 "MATCH": ( 1326 "FULL", 1327 "PARTIAL", 1328 "SIMPLE", 1329 ), 1330 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1331 "USING": ( 1332 "BTREE", 1333 "HASH", 1334 ), 1335 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1336 } 1337 1338 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1339 1340 CLONE_KEYWORDS = {"CLONE", "COPY"} 1341 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1342 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1343 1344 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1345 1346 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1347 1348 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1349 1350 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1351 1352 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1353 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1354 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1355 1356 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1357 1358 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1359 1360 ADD_CONSTRAINT_TOKENS = { 1361 TokenType.CONSTRAINT, 1362 TokenType.FOREIGN_KEY, 1363 TokenType.INDEX, 1364 TokenType.KEY, 1365 TokenType.PRIMARY_KEY, 1366 TokenType.UNIQUE, 1367 } 1368 1369 DISTINCT_TOKENS = {TokenType.DISTINCT} 1370 1371 NULL_TOKENS = {TokenType.NULL} 1372 1373 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1374 1375 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1376 1377 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1378 1379 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1380 1381 ODBC_DATETIME_LITERALS = { 1382 "d": exp.Date, 1383 "t": exp.Time, 1384 "ts": exp.Timestamp, 1385 } 1386 1387 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1388 1389 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1390 1391 # The style options for the DESCRIBE statement 1392 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1393 1394 # The style options for the ANALYZE statement 1395 ANALYZE_STYLES = { 1396 "BUFFER_USAGE_LIMIT", 1397 "FULL", 1398 "LOCAL", 1399 "NO_WRITE_TO_BINLOG", 1400 "SAMPLE", 1401 "SKIP_LOCKED", 1402 "VERBOSE", 1403 } 1404 1405 ANALYZE_EXPRESSION_PARSERS = { 1406 "ALL": lambda self: self._parse_analyze_columns(), 1407 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1408 "DELETE": lambda self: self._parse_analyze_delete(), 1409 "DROP": lambda self: self._parse_analyze_histogram(), 1410 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1411 "LIST": lambda self: self._parse_analyze_list(), 1412 "PREDICATE": lambda self: self._parse_analyze_columns(), 1413 "UPDATE": lambda self: self._parse_analyze_histogram(), 1414 "VALIDATE": lambda self: self._parse_analyze_validate(), 1415 } 1416 1417 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1418 1419 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1420 1421 OPERATION_MODIFIERS: t.Set[str] = set() 1422 1423 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1424 1425 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1426 1427 STRICT_CAST = True 1428 1429 PREFIXED_PIVOT_COLUMNS = False 1430 IDENTIFY_PIVOT_STRINGS = False 1431 1432 LOG_DEFAULTS_TO_LN = False 1433 1434 # Whether ADD is present for each column added by ALTER TABLE 1435 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1436 1437 # Whether the table sample clause expects CSV syntax 1438 TABLESAMPLE_CSV = False 1439 1440 # The default method used for table sampling 1441 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1442 1443 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1444 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1445 1446 # Whether the TRIM function expects the characters to trim as its first argument 1447 TRIM_PATTERN_FIRST = False 1448 1449 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1450 STRING_ALIASES = False 1451 1452 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1453 MODIFIERS_ATTACHED_TO_SET_OP = True 1454 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1455 1456 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1457 NO_PAREN_IF_COMMANDS = True 1458 1459 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1460 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1461 1462 # Whether the `:` operator is used to extract a value from a VARIANT column 1463 COLON_IS_VARIANT_EXTRACT = False 1464 1465 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1466 # If this is True and '(' is not found, the keyword will be treated as an identifier 1467 VALUES_FOLLOWED_BY_PAREN = True 1468 1469 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1470 SUPPORTS_IMPLICIT_UNNEST = False 1471 1472 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1473 INTERVAL_SPANS = True 1474 1475 # Whether a PARTITION clause can follow a table reference 1476 SUPPORTS_PARTITION_SELECTION = False 1477 1478 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1479 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1480 1481 # Whether the 'AS' keyword is optional in the CTE definition syntax 1482 OPTIONAL_ALIAS_TOKEN_CTE = True 1483 1484 __slots__ = ( 1485 "error_level", 1486 "error_message_context", 1487 "max_errors", 1488 "dialect", 1489 "sql", 1490 "errors", 1491 "_tokens", 1492 "_index", 1493 "_curr", 1494 "_next", 1495 "_prev", 1496 "_prev_comments", 1497 ) 1498 1499 # Autofilled 1500 SHOW_TRIE: t.Dict = {} 1501 SET_TRIE: t.Dict = {} 1502 1503 def __init__( 1504 self, 1505 error_level: t.Optional[ErrorLevel] = None, 1506 error_message_context: int = 100, 1507 max_errors: int = 3, 1508 dialect: DialectType = None, 1509 ): 1510 from sqlglot.dialects import Dialect 1511 1512 self.error_level = error_level or ErrorLevel.IMMEDIATE 1513 self.error_message_context = error_message_context 1514 self.max_errors = max_errors 1515 self.dialect = Dialect.get_or_raise(dialect) 1516 self.reset() 1517 1518 def reset(self): 1519 self.sql = "" 1520 self.errors = [] 1521 self._tokens = [] 1522 self._index = 0 1523 self._curr = None 1524 self._next = None 1525 self._prev = None 1526 self._prev_comments = None 1527 1528 def parse( 1529 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1530 ) -> t.List[t.Optional[exp.Expression]]: 1531 """ 1532 Parses a list of tokens and returns a list of syntax trees, one tree 1533 per parsed SQL statement. 1534 1535 Args: 1536 raw_tokens: The list of tokens. 1537 sql: The original SQL string, used to produce helpful debug messages. 1538 1539 Returns: 1540 The list of the produced syntax trees. 1541 """ 1542 return self._parse( 1543 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1544 ) 1545 1546 def parse_into( 1547 self, 1548 expression_types: exp.IntoType, 1549 raw_tokens: t.List[Token], 1550 sql: t.Optional[str] = None, 1551 ) -> t.List[t.Optional[exp.Expression]]: 1552 """ 1553 Parses a list of tokens into a given Expression type. If a collection of Expression 1554 types is given instead, this method will try to parse the token list into each one 1555 of them, stopping at the first for which the parsing succeeds. 1556 1557 Args: 1558 expression_types: The expression type(s) to try and parse the token list into. 1559 raw_tokens: The list of tokens. 1560 sql: The original SQL string, used to produce helpful debug messages. 1561 1562 Returns: 1563 The target Expression. 1564 """ 1565 errors = [] 1566 for expression_type in ensure_list(expression_types): 1567 parser = self.EXPRESSION_PARSERS.get(expression_type) 1568 if not parser: 1569 raise TypeError(f"No parser registered for {expression_type}") 1570 1571 try: 1572 return self._parse(parser, raw_tokens, sql) 1573 except ParseError as e: 1574 e.errors[0]["into_expression"] = expression_type 1575 errors.append(e) 1576 1577 raise ParseError( 1578 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1579 errors=merge_errors(errors), 1580 ) from errors[-1] 1581 1582 def _parse( 1583 self, 1584 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1585 raw_tokens: t.List[Token], 1586 sql: t.Optional[str] = None, 1587 ) -> t.List[t.Optional[exp.Expression]]: 1588 self.reset() 1589 self.sql = sql or "" 1590 1591 total = len(raw_tokens) 1592 chunks: t.List[t.List[Token]] = [[]] 1593 1594 for i, token in enumerate(raw_tokens): 1595 if token.token_type == TokenType.SEMICOLON: 1596 if token.comments: 1597 chunks.append([token]) 1598 1599 if i < total - 1: 1600 chunks.append([]) 1601 else: 1602 chunks[-1].append(token) 1603 1604 expressions = [] 1605 1606 for tokens in chunks: 1607 self._index = -1 1608 self._tokens = tokens 1609 self._advance() 1610 1611 expressions.append(parse_method(self)) 1612 1613 if self._index < len(self._tokens): 1614 self.raise_error("Invalid expression / Unexpected token") 1615 1616 self.check_errors() 1617 1618 return expressions 1619 1620 def check_errors(self) -> None: 1621 """Logs or raises any found errors, depending on the chosen error level setting.""" 1622 if self.error_level == ErrorLevel.WARN: 1623 for error in self.errors: 1624 logger.error(str(error)) 1625 elif self.error_level == ErrorLevel.RAISE and self.errors: 1626 raise ParseError( 1627 concat_messages(self.errors, self.max_errors), 1628 errors=merge_errors(self.errors), 1629 ) 1630 1631 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1632 """ 1633 Appends an error in the list of recorded errors or raises it, depending on the chosen 1634 error level setting. 1635 """ 1636 token = token or self._curr or self._prev or Token.string("") 1637 start = token.start 1638 end = token.end + 1 1639 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1640 highlight = self.sql[start:end] 1641 end_context = self.sql[end : end + self.error_message_context] 1642 1643 error = ParseError.new( 1644 f"{message}. Line {token.line}, Col: {token.col}.\n" 1645 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1646 description=message, 1647 line=token.line, 1648 col=token.col, 1649 start_context=start_context, 1650 highlight=highlight, 1651 end_context=end_context, 1652 ) 1653 1654 if self.error_level == ErrorLevel.IMMEDIATE: 1655 raise error 1656 1657 self.errors.append(error) 1658 1659 def expression( 1660 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1661 ) -> E: 1662 """ 1663 Creates a new, validated Expression. 1664 1665 Args: 1666 exp_class: The expression class to instantiate. 1667 comments: An optional list of comments to attach to the expression. 1668 kwargs: The arguments to set for the expression along with their respective values. 1669 1670 Returns: 1671 The target expression. 1672 """ 1673 instance = exp_class(**kwargs) 1674 instance.add_comments(comments) if comments else self._add_comments(instance) 1675 return self.validate_expression(instance) 1676 1677 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1678 if expression and self._prev_comments: 1679 expression.add_comments(self._prev_comments) 1680 self._prev_comments = None 1681 1682 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1683 """ 1684 Validates an Expression, making sure that all its mandatory arguments are set. 1685 1686 Args: 1687 expression: The expression to validate. 1688 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1689 1690 Returns: 1691 The validated expression. 1692 """ 1693 if self.error_level != ErrorLevel.IGNORE: 1694 for error_message in expression.error_messages(args): 1695 self.raise_error(error_message) 1696 1697 return expression 1698 1699 def _find_sql(self, start: Token, end: Token) -> str: 1700 return self.sql[start.start : end.end + 1] 1701 1702 def _is_connected(self) -> bool: 1703 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1704 1705 def _advance(self, times: int = 1) -> None: 1706 self._index += times 1707 self._curr = seq_get(self._tokens, self._index) 1708 self._next = seq_get(self._tokens, self._index + 1) 1709 1710 if self._index > 0: 1711 self._prev = self._tokens[self._index - 1] 1712 self._prev_comments = self._prev.comments 1713 else: 1714 self._prev = None 1715 self._prev_comments = None 1716 1717 def _retreat(self, index: int) -> None: 1718 if index != self._index: 1719 self._advance(index - self._index) 1720 1721 def _warn_unsupported(self) -> None: 1722 if len(self._tokens) <= 1: 1723 return 1724 1725 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1726 # interested in emitting a warning for the one being currently processed. 1727 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1728 1729 logger.warning( 1730 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1731 ) 1732 1733 def _parse_command(self) -> exp.Command: 1734 self._warn_unsupported() 1735 return self.expression( 1736 exp.Command, 1737 comments=self._prev_comments, 1738 this=self._prev.text.upper(), 1739 expression=self._parse_string(), 1740 ) 1741 1742 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1743 """ 1744 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1745 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1746 solve this by setting & resetting the parser state accordingly 1747 """ 1748 index = self._index 1749 error_level = self.error_level 1750 1751 self.error_level = ErrorLevel.IMMEDIATE 1752 try: 1753 this = parse_method() 1754 except ParseError: 1755 this = None 1756 finally: 1757 if not this or retreat: 1758 self._retreat(index) 1759 self.error_level = error_level 1760 1761 return this 1762 1763 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1764 start = self._prev 1765 exists = self._parse_exists() if allow_exists else None 1766 1767 self._match(TokenType.ON) 1768 1769 materialized = self._match_text_seq("MATERIALIZED") 1770 kind = self._match_set(self.CREATABLES) and self._prev 1771 if not kind: 1772 return self._parse_as_command(start) 1773 1774 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1775 this = self._parse_user_defined_function(kind=kind.token_type) 1776 elif kind.token_type == TokenType.TABLE: 1777 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1778 elif kind.token_type == TokenType.COLUMN: 1779 this = self._parse_column() 1780 else: 1781 this = self._parse_id_var() 1782 1783 self._match(TokenType.IS) 1784 1785 return self.expression( 1786 exp.Comment, 1787 this=this, 1788 kind=kind.text, 1789 expression=self._parse_string(), 1790 exists=exists, 1791 materialized=materialized, 1792 ) 1793 1794 def _parse_to_table( 1795 self, 1796 ) -> exp.ToTableProperty: 1797 table = self._parse_table_parts(schema=True) 1798 return self.expression(exp.ToTableProperty, this=table) 1799 1800 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1801 def _parse_ttl(self) -> exp.Expression: 1802 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1803 this = self._parse_bitwise() 1804 1805 if self._match_text_seq("DELETE"): 1806 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1807 if self._match_text_seq("RECOMPRESS"): 1808 return self.expression( 1809 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1810 ) 1811 if self._match_text_seq("TO", "DISK"): 1812 return self.expression( 1813 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1814 ) 1815 if self._match_text_seq("TO", "VOLUME"): 1816 return self.expression( 1817 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1818 ) 1819 1820 return this 1821 1822 expressions = self._parse_csv(_parse_ttl_action) 1823 where = self._parse_where() 1824 group = self._parse_group() 1825 1826 aggregates = None 1827 if group and self._match(TokenType.SET): 1828 aggregates = self._parse_csv(self._parse_set_item) 1829 1830 return self.expression( 1831 exp.MergeTreeTTL, 1832 expressions=expressions, 1833 where=where, 1834 group=group, 1835 aggregates=aggregates, 1836 ) 1837 1838 def _parse_statement(self) -> t.Optional[exp.Expression]: 1839 if self._curr is None: 1840 return None 1841 1842 if self._match_set(self.STATEMENT_PARSERS): 1843 comments = self._prev_comments 1844 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1845 stmt.add_comments(comments, prepend=True) 1846 return stmt 1847 1848 if self._match_set(self.dialect.tokenizer.COMMANDS): 1849 return self._parse_command() 1850 1851 expression = self._parse_expression() 1852 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1853 return self._parse_query_modifiers(expression) 1854 1855 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1856 start = self._prev 1857 temporary = self._match(TokenType.TEMPORARY) 1858 materialized = self._match_text_seq("MATERIALIZED") 1859 1860 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1861 if not kind: 1862 return self._parse_as_command(start) 1863 1864 concurrently = self._match_text_seq("CONCURRENTLY") 1865 if_exists = exists or self._parse_exists() 1866 1867 if kind == "COLUMN": 1868 this = self._parse_column() 1869 else: 1870 this = self._parse_table_parts( 1871 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1872 ) 1873 1874 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1875 1876 if self._match(TokenType.L_PAREN, advance=False): 1877 expressions = self._parse_wrapped_csv(self._parse_types) 1878 else: 1879 expressions = None 1880 1881 return self.expression( 1882 exp.Drop, 1883 exists=if_exists, 1884 this=this, 1885 expressions=expressions, 1886 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1887 temporary=temporary, 1888 materialized=materialized, 1889 cascade=self._match_text_seq("CASCADE"), 1890 constraints=self._match_text_seq("CONSTRAINTS"), 1891 purge=self._match_text_seq("PURGE"), 1892 cluster=cluster, 1893 concurrently=concurrently, 1894 ) 1895 1896 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1897 return ( 1898 self._match_text_seq("IF") 1899 and (not not_ or self._match(TokenType.NOT)) 1900 and self._match(TokenType.EXISTS) 1901 ) 1902 1903 def _parse_create(self) -> exp.Create | exp.Command: 1904 # Note: this can't be None because we've matched a statement parser 1905 start = self._prev 1906 1907 replace = ( 1908 start.token_type == TokenType.REPLACE 1909 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1910 or self._match_pair(TokenType.OR, TokenType.ALTER) 1911 ) 1912 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1913 1914 unique = self._match(TokenType.UNIQUE) 1915 1916 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1917 clustered = True 1918 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1919 "COLUMNSTORE" 1920 ): 1921 clustered = False 1922 else: 1923 clustered = None 1924 1925 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1926 self._advance() 1927 1928 properties = None 1929 create_token = self._match_set(self.CREATABLES) and self._prev 1930 1931 if not create_token: 1932 # exp.Properties.Location.POST_CREATE 1933 properties = self._parse_properties() 1934 create_token = self._match_set(self.CREATABLES) and self._prev 1935 1936 if not properties or not create_token: 1937 return self._parse_as_command(start) 1938 1939 concurrently = self._match_text_seq("CONCURRENTLY") 1940 exists = self._parse_exists(not_=True) 1941 this = None 1942 expression: t.Optional[exp.Expression] = None 1943 indexes = None 1944 no_schema_binding = None 1945 begin = None 1946 end = None 1947 clone = None 1948 1949 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1950 nonlocal properties 1951 if properties and temp_props: 1952 properties.expressions.extend(temp_props.expressions) 1953 elif temp_props: 1954 properties = temp_props 1955 1956 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1957 this = self._parse_user_defined_function(kind=create_token.token_type) 1958 1959 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1960 extend_props(self._parse_properties()) 1961 1962 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1963 extend_props(self._parse_properties()) 1964 1965 if not expression: 1966 if self._match(TokenType.COMMAND): 1967 expression = self._parse_as_command(self._prev) 1968 else: 1969 begin = self._match(TokenType.BEGIN) 1970 return_ = self._match_text_seq("RETURN") 1971 1972 if self._match(TokenType.STRING, advance=False): 1973 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1974 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1975 expression = self._parse_string() 1976 extend_props(self._parse_properties()) 1977 else: 1978 expression = self._parse_user_defined_function_expression() 1979 1980 end = self._match_text_seq("END") 1981 1982 if return_: 1983 expression = self.expression(exp.Return, this=expression) 1984 elif create_token.token_type == TokenType.INDEX: 1985 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1986 if not self._match(TokenType.ON): 1987 index = self._parse_id_var() 1988 anonymous = False 1989 else: 1990 index = None 1991 anonymous = True 1992 1993 this = self._parse_index(index=index, anonymous=anonymous) 1994 elif create_token.token_type in self.DB_CREATABLES: 1995 table_parts = self._parse_table_parts( 1996 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1997 ) 1998 1999 # exp.Properties.Location.POST_NAME 2000 self._match(TokenType.COMMA) 2001 extend_props(self._parse_properties(before=True)) 2002 2003 this = self._parse_schema(this=table_parts) 2004 2005 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2006 extend_props(self._parse_properties()) 2007 2008 has_alias = self._match(TokenType.ALIAS) 2009 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2010 # exp.Properties.Location.POST_ALIAS 2011 extend_props(self._parse_properties()) 2012 2013 if create_token.token_type == TokenType.SEQUENCE: 2014 expression = self._parse_types() 2015 extend_props(self._parse_properties()) 2016 else: 2017 expression = self._parse_ddl_select() 2018 2019 # Some dialects also support using a table as an alias instead of a SELECT. 2020 # Here we fallback to this as an alternative. 2021 if not expression and has_alias: 2022 expression = self._try_parse(self._parse_table_parts) 2023 2024 if create_token.token_type == TokenType.TABLE: 2025 # exp.Properties.Location.POST_EXPRESSION 2026 extend_props(self._parse_properties()) 2027 2028 indexes = [] 2029 while True: 2030 index = self._parse_index() 2031 2032 # exp.Properties.Location.POST_INDEX 2033 extend_props(self._parse_properties()) 2034 if not index: 2035 break 2036 else: 2037 self._match(TokenType.COMMA) 2038 indexes.append(index) 2039 elif create_token.token_type == TokenType.VIEW: 2040 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2041 no_schema_binding = True 2042 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2043 extend_props(self._parse_properties()) 2044 2045 shallow = self._match_text_seq("SHALLOW") 2046 2047 if self._match_texts(self.CLONE_KEYWORDS): 2048 copy = self._prev.text.lower() == "copy" 2049 clone = self.expression( 2050 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2051 ) 2052 2053 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2054 return self._parse_as_command(start) 2055 2056 create_kind_text = create_token.text.upper() 2057 return self.expression( 2058 exp.Create, 2059 this=this, 2060 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2061 replace=replace, 2062 refresh=refresh, 2063 unique=unique, 2064 expression=expression, 2065 exists=exists, 2066 properties=properties, 2067 indexes=indexes, 2068 no_schema_binding=no_schema_binding, 2069 begin=begin, 2070 end=end, 2071 clone=clone, 2072 concurrently=concurrently, 2073 clustered=clustered, 2074 ) 2075 2076 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2077 seq = exp.SequenceProperties() 2078 2079 options = [] 2080 index = self._index 2081 2082 while self._curr: 2083 self._match(TokenType.COMMA) 2084 if self._match_text_seq("INCREMENT"): 2085 self._match_text_seq("BY") 2086 self._match_text_seq("=") 2087 seq.set("increment", self._parse_term()) 2088 elif self._match_text_seq("MINVALUE"): 2089 seq.set("minvalue", self._parse_term()) 2090 elif self._match_text_seq("MAXVALUE"): 2091 seq.set("maxvalue", self._parse_term()) 2092 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2093 self._match_text_seq("=") 2094 seq.set("start", self._parse_term()) 2095 elif self._match_text_seq("CACHE"): 2096 # T-SQL allows empty CACHE which is initialized dynamically 2097 seq.set("cache", self._parse_number() or True) 2098 elif self._match_text_seq("OWNED", "BY"): 2099 # "OWNED BY NONE" is the default 2100 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2101 else: 2102 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2103 if opt: 2104 options.append(opt) 2105 else: 2106 break 2107 2108 seq.set("options", options if options else None) 2109 return None if self._index == index else seq 2110 2111 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2112 # only used for teradata currently 2113 self._match(TokenType.COMMA) 2114 2115 kwargs = { 2116 "no": self._match_text_seq("NO"), 2117 "dual": self._match_text_seq("DUAL"), 2118 "before": self._match_text_seq("BEFORE"), 2119 "default": self._match_text_seq("DEFAULT"), 2120 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2121 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2122 "after": self._match_text_seq("AFTER"), 2123 "minimum": self._match_texts(("MIN", "MINIMUM")), 2124 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2125 } 2126 2127 if self._match_texts(self.PROPERTY_PARSERS): 2128 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2129 try: 2130 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2131 except TypeError: 2132 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2133 2134 return None 2135 2136 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2137 return self._parse_wrapped_csv(self._parse_property) 2138 2139 def _parse_property(self) -> t.Optional[exp.Expression]: 2140 if self._match_texts(self.PROPERTY_PARSERS): 2141 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2142 2143 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2144 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2145 2146 if self._match_text_seq("COMPOUND", "SORTKEY"): 2147 return self._parse_sortkey(compound=True) 2148 2149 if self._match_text_seq("SQL", "SECURITY"): 2150 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2151 2152 index = self._index 2153 key = self._parse_column() 2154 2155 if not self._match(TokenType.EQ): 2156 self._retreat(index) 2157 return self._parse_sequence_properties() 2158 2159 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2160 if isinstance(key, exp.Column): 2161 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2162 2163 value = self._parse_bitwise() or self._parse_var(any_token=True) 2164 2165 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2166 if isinstance(value, exp.Column): 2167 value = exp.var(value.name) 2168 2169 return self.expression(exp.Property, this=key, value=value) 2170 2171 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2172 if self._match_text_seq("BY"): 2173 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2174 2175 self._match(TokenType.ALIAS) 2176 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2177 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2178 2179 return self.expression( 2180 exp.FileFormatProperty, 2181 this=( 2182 self.expression( 2183 exp.InputOutputFormat, 2184 input_format=input_format, 2185 output_format=output_format, 2186 ) 2187 if input_format or output_format 2188 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2189 ), 2190 ) 2191 2192 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2193 field = self._parse_field() 2194 if isinstance(field, exp.Identifier) and not field.quoted: 2195 field = exp.var(field) 2196 2197 return field 2198 2199 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2200 self._match(TokenType.EQ) 2201 self._match(TokenType.ALIAS) 2202 2203 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2204 2205 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2206 properties = [] 2207 while True: 2208 if before: 2209 prop = self._parse_property_before() 2210 else: 2211 prop = self._parse_property() 2212 if not prop: 2213 break 2214 for p in ensure_list(prop): 2215 properties.append(p) 2216 2217 if properties: 2218 return self.expression(exp.Properties, expressions=properties) 2219 2220 return None 2221 2222 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2223 return self.expression( 2224 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2225 ) 2226 2227 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2228 if self._match_texts(("DEFINER", "INVOKER")): 2229 security_specifier = self._prev.text.upper() 2230 return self.expression(exp.SecurityProperty, this=security_specifier) 2231 return None 2232 2233 def _parse_settings_property(self) -> exp.SettingsProperty: 2234 return self.expression( 2235 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2236 ) 2237 2238 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2239 if self._index >= 2: 2240 pre_volatile_token = self._tokens[self._index - 2] 2241 else: 2242 pre_volatile_token = None 2243 2244 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2245 return exp.VolatileProperty() 2246 2247 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2248 2249 def _parse_retention_period(self) -> exp.Var: 2250 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2251 number = self._parse_number() 2252 number_str = f"{number} " if number else "" 2253 unit = self._parse_var(any_token=True) 2254 return exp.var(f"{number_str}{unit}") 2255 2256 def _parse_system_versioning_property( 2257 self, with_: bool = False 2258 ) -> exp.WithSystemVersioningProperty: 2259 self._match(TokenType.EQ) 2260 prop = self.expression( 2261 exp.WithSystemVersioningProperty, 2262 **{ # type: ignore 2263 "on": True, 2264 "with": with_, 2265 }, 2266 ) 2267 2268 if self._match_text_seq("OFF"): 2269 prop.set("on", False) 2270 return prop 2271 2272 self._match(TokenType.ON) 2273 if self._match(TokenType.L_PAREN): 2274 while self._curr and not self._match(TokenType.R_PAREN): 2275 if self._match_text_seq("HISTORY_TABLE", "="): 2276 prop.set("this", self._parse_table_parts()) 2277 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2278 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2279 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2280 prop.set("retention_period", self._parse_retention_period()) 2281 2282 self._match(TokenType.COMMA) 2283 2284 return prop 2285 2286 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2287 self._match(TokenType.EQ) 2288 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2289 prop = self.expression(exp.DataDeletionProperty, on=on) 2290 2291 if self._match(TokenType.L_PAREN): 2292 while self._curr and not self._match(TokenType.R_PAREN): 2293 if self._match_text_seq("FILTER_COLUMN", "="): 2294 prop.set("filter_column", self._parse_column()) 2295 elif self._match_text_seq("RETENTION_PERIOD", "="): 2296 prop.set("retention_period", self._parse_retention_period()) 2297 2298 self._match(TokenType.COMMA) 2299 2300 return prop 2301 2302 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2303 kind = "HASH" 2304 expressions: t.Optional[t.List[exp.Expression]] = None 2305 if self._match_text_seq("BY", "HASH"): 2306 expressions = self._parse_wrapped_csv(self._parse_id_var) 2307 elif self._match_text_seq("BY", "RANDOM"): 2308 kind = "RANDOM" 2309 2310 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2311 buckets: t.Optional[exp.Expression] = None 2312 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2313 buckets = self._parse_number() 2314 2315 return self.expression( 2316 exp.DistributedByProperty, 2317 expressions=expressions, 2318 kind=kind, 2319 buckets=buckets, 2320 order=self._parse_order(), 2321 ) 2322 2323 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2324 self._match_text_seq("KEY") 2325 expressions = self._parse_wrapped_id_vars() 2326 return self.expression(expr_type, expressions=expressions) 2327 2328 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2329 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2330 prop = self._parse_system_versioning_property(with_=True) 2331 self._match_r_paren() 2332 return prop 2333 2334 if self._match(TokenType.L_PAREN, advance=False): 2335 return self._parse_wrapped_properties() 2336 2337 if self._match_text_seq("JOURNAL"): 2338 return self._parse_withjournaltable() 2339 2340 if self._match_texts(self.VIEW_ATTRIBUTES): 2341 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2342 2343 if self._match_text_seq("DATA"): 2344 return self._parse_withdata(no=False) 2345 elif self._match_text_seq("NO", "DATA"): 2346 return self._parse_withdata(no=True) 2347 2348 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2349 return self._parse_serde_properties(with_=True) 2350 2351 if self._match(TokenType.SCHEMA): 2352 return self.expression( 2353 exp.WithSchemaBindingProperty, 2354 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2355 ) 2356 2357 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2358 return self.expression( 2359 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2360 ) 2361 2362 if not self._next: 2363 return None 2364 2365 return self._parse_withisolatedloading() 2366 2367 def _parse_procedure_option(self) -> exp.Expression | None: 2368 if self._match_text_seq("EXECUTE", "AS"): 2369 return self.expression( 2370 exp.ExecuteAsProperty, 2371 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2372 or self._parse_string(), 2373 ) 2374 2375 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2376 2377 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2378 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2379 self._match(TokenType.EQ) 2380 2381 user = self._parse_id_var() 2382 self._match(TokenType.PARAMETER) 2383 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2384 2385 if not user or not host: 2386 return None 2387 2388 return exp.DefinerProperty(this=f"{user}@{host}") 2389 2390 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2391 self._match(TokenType.TABLE) 2392 self._match(TokenType.EQ) 2393 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2394 2395 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2396 return self.expression(exp.LogProperty, no=no) 2397 2398 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2399 return self.expression(exp.JournalProperty, **kwargs) 2400 2401 def _parse_checksum(self) -> exp.ChecksumProperty: 2402 self._match(TokenType.EQ) 2403 2404 on = None 2405 if self._match(TokenType.ON): 2406 on = True 2407 elif self._match_text_seq("OFF"): 2408 on = False 2409 2410 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2411 2412 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2413 return self.expression( 2414 exp.Cluster, 2415 expressions=( 2416 self._parse_wrapped_csv(self._parse_ordered) 2417 if wrapped 2418 else self._parse_csv(self._parse_ordered) 2419 ), 2420 ) 2421 2422 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2423 self._match_text_seq("BY") 2424 2425 self._match_l_paren() 2426 expressions = self._parse_csv(self._parse_column) 2427 self._match_r_paren() 2428 2429 if self._match_text_seq("SORTED", "BY"): 2430 self._match_l_paren() 2431 sorted_by = self._parse_csv(self._parse_ordered) 2432 self._match_r_paren() 2433 else: 2434 sorted_by = None 2435 2436 self._match(TokenType.INTO) 2437 buckets = self._parse_number() 2438 self._match_text_seq("BUCKETS") 2439 2440 return self.expression( 2441 exp.ClusteredByProperty, 2442 expressions=expressions, 2443 sorted_by=sorted_by, 2444 buckets=buckets, 2445 ) 2446 2447 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2448 if not self._match_text_seq("GRANTS"): 2449 self._retreat(self._index - 1) 2450 return None 2451 2452 return self.expression(exp.CopyGrantsProperty) 2453 2454 def _parse_freespace(self) -> exp.FreespaceProperty: 2455 self._match(TokenType.EQ) 2456 return self.expression( 2457 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2458 ) 2459 2460 def _parse_mergeblockratio( 2461 self, no: bool = False, default: bool = False 2462 ) -> exp.MergeBlockRatioProperty: 2463 if self._match(TokenType.EQ): 2464 return self.expression( 2465 exp.MergeBlockRatioProperty, 2466 this=self._parse_number(), 2467 percent=self._match(TokenType.PERCENT), 2468 ) 2469 2470 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2471 2472 def _parse_datablocksize( 2473 self, 2474 default: t.Optional[bool] = None, 2475 minimum: t.Optional[bool] = None, 2476 maximum: t.Optional[bool] = None, 2477 ) -> exp.DataBlocksizeProperty: 2478 self._match(TokenType.EQ) 2479 size = self._parse_number() 2480 2481 units = None 2482 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2483 units = self._prev.text 2484 2485 return self.expression( 2486 exp.DataBlocksizeProperty, 2487 size=size, 2488 units=units, 2489 default=default, 2490 minimum=minimum, 2491 maximum=maximum, 2492 ) 2493 2494 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2495 self._match(TokenType.EQ) 2496 always = self._match_text_seq("ALWAYS") 2497 manual = self._match_text_seq("MANUAL") 2498 never = self._match_text_seq("NEVER") 2499 default = self._match_text_seq("DEFAULT") 2500 2501 autotemp = None 2502 if self._match_text_seq("AUTOTEMP"): 2503 autotemp = self._parse_schema() 2504 2505 return self.expression( 2506 exp.BlockCompressionProperty, 2507 always=always, 2508 manual=manual, 2509 never=never, 2510 default=default, 2511 autotemp=autotemp, 2512 ) 2513 2514 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2515 index = self._index 2516 no = self._match_text_seq("NO") 2517 concurrent = self._match_text_seq("CONCURRENT") 2518 2519 if not self._match_text_seq("ISOLATED", "LOADING"): 2520 self._retreat(index) 2521 return None 2522 2523 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2524 return self.expression( 2525 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2526 ) 2527 2528 def _parse_locking(self) -> exp.LockingProperty: 2529 if self._match(TokenType.TABLE): 2530 kind = "TABLE" 2531 elif self._match(TokenType.VIEW): 2532 kind = "VIEW" 2533 elif self._match(TokenType.ROW): 2534 kind = "ROW" 2535 elif self._match_text_seq("DATABASE"): 2536 kind = "DATABASE" 2537 else: 2538 kind = None 2539 2540 if kind in ("DATABASE", "TABLE", "VIEW"): 2541 this = self._parse_table_parts() 2542 else: 2543 this = None 2544 2545 if self._match(TokenType.FOR): 2546 for_or_in = "FOR" 2547 elif self._match(TokenType.IN): 2548 for_or_in = "IN" 2549 else: 2550 for_or_in = None 2551 2552 if self._match_text_seq("ACCESS"): 2553 lock_type = "ACCESS" 2554 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2555 lock_type = "EXCLUSIVE" 2556 elif self._match_text_seq("SHARE"): 2557 lock_type = "SHARE" 2558 elif self._match_text_seq("READ"): 2559 lock_type = "READ" 2560 elif self._match_text_seq("WRITE"): 2561 lock_type = "WRITE" 2562 elif self._match_text_seq("CHECKSUM"): 2563 lock_type = "CHECKSUM" 2564 else: 2565 lock_type = None 2566 2567 override = self._match_text_seq("OVERRIDE") 2568 2569 return self.expression( 2570 exp.LockingProperty, 2571 this=this, 2572 kind=kind, 2573 for_or_in=for_or_in, 2574 lock_type=lock_type, 2575 override=override, 2576 ) 2577 2578 def _parse_partition_by(self) -> t.List[exp.Expression]: 2579 if self._match(TokenType.PARTITION_BY): 2580 return self._parse_csv(self._parse_assignment) 2581 return [] 2582 2583 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2584 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2585 if self._match_text_seq("MINVALUE"): 2586 return exp.var("MINVALUE") 2587 if self._match_text_seq("MAXVALUE"): 2588 return exp.var("MAXVALUE") 2589 return self._parse_bitwise() 2590 2591 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2592 expression = None 2593 from_expressions = None 2594 to_expressions = None 2595 2596 if self._match(TokenType.IN): 2597 this = self._parse_wrapped_csv(self._parse_bitwise) 2598 elif self._match(TokenType.FROM): 2599 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2600 self._match_text_seq("TO") 2601 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2602 elif self._match_text_seq("WITH", "(", "MODULUS"): 2603 this = self._parse_number() 2604 self._match_text_seq(",", "REMAINDER") 2605 expression = self._parse_number() 2606 self._match_r_paren() 2607 else: 2608 self.raise_error("Failed to parse partition bound spec.") 2609 2610 return self.expression( 2611 exp.PartitionBoundSpec, 2612 this=this, 2613 expression=expression, 2614 from_expressions=from_expressions, 2615 to_expressions=to_expressions, 2616 ) 2617 2618 # https://www.postgresql.org/docs/current/sql-createtable.html 2619 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2620 if not self._match_text_seq("OF"): 2621 self._retreat(self._index - 1) 2622 return None 2623 2624 this = self._parse_table(schema=True) 2625 2626 if self._match(TokenType.DEFAULT): 2627 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2628 elif self._match_text_seq("FOR", "VALUES"): 2629 expression = self._parse_partition_bound_spec() 2630 else: 2631 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2632 2633 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2634 2635 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2636 self._match(TokenType.EQ) 2637 return self.expression( 2638 exp.PartitionedByProperty, 2639 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2640 ) 2641 2642 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2643 if self._match_text_seq("AND", "STATISTICS"): 2644 statistics = True 2645 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2646 statistics = False 2647 else: 2648 statistics = None 2649 2650 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2651 2652 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2653 if self._match_text_seq("SQL"): 2654 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2655 return None 2656 2657 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2658 if self._match_text_seq("SQL", "DATA"): 2659 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2660 return None 2661 2662 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2663 if self._match_text_seq("PRIMARY", "INDEX"): 2664 return exp.NoPrimaryIndexProperty() 2665 if self._match_text_seq("SQL"): 2666 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2667 return None 2668 2669 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2670 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2671 return exp.OnCommitProperty() 2672 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2673 return exp.OnCommitProperty(delete=True) 2674 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2675 2676 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2677 if self._match_text_seq("SQL", "DATA"): 2678 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2679 return None 2680 2681 def _parse_distkey(self) -> exp.DistKeyProperty: 2682 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2683 2684 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2685 table = self._parse_table(schema=True) 2686 2687 options = [] 2688 while self._match_texts(("INCLUDING", "EXCLUDING")): 2689 this = self._prev.text.upper() 2690 2691 id_var = self._parse_id_var() 2692 if not id_var: 2693 return None 2694 2695 options.append( 2696 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2697 ) 2698 2699 return self.expression(exp.LikeProperty, this=table, expressions=options) 2700 2701 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2702 return self.expression( 2703 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2704 ) 2705 2706 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2707 self._match(TokenType.EQ) 2708 return self.expression( 2709 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2710 ) 2711 2712 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2713 self._match_text_seq("WITH", "CONNECTION") 2714 return self.expression( 2715 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2716 ) 2717 2718 def _parse_returns(self) -> exp.ReturnsProperty: 2719 value: t.Optional[exp.Expression] 2720 null = None 2721 is_table = self._match(TokenType.TABLE) 2722 2723 if is_table: 2724 if self._match(TokenType.LT): 2725 value = self.expression( 2726 exp.Schema, 2727 this="TABLE", 2728 expressions=self._parse_csv(self._parse_struct_types), 2729 ) 2730 if not self._match(TokenType.GT): 2731 self.raise_error("Expecting >") 2732 else: 2733 value = self._parse_schema(exp.var("TABLE")) 2734 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2735 null = True 2736 value = None 2737 else: 2738 value = self._parse_types() 2739 2740 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2741 2742 def _parse_describe(self) -> exp.Describe: 2743 kind = self._match_set(self.CREATABLES) and self._prev.text 2744 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2745 if self._match(TokenType.DOT): 2746 style = None 2747 self._retreat(self._index - 2) 2748 2749 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2750 2751 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2752 this = self._parse_statement() 2753 else: 2754 this = self._parse_table(schema=True) 2755 2756 properties = self._parse_properties() 2757 expressions = properties.expressions if properties else None 2758 partition = self._parse_partition() 2759 return self.expression( 2760 exp.Describe, 2761 this=this, 2762 style=style, 2763 kind=kind, 2764 expressions=expressions, 2765 partition=partition, 2766 format=format, 2767 ) 2768 2769 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2770 kind = self._prev.text.upper() 2771 expressions = [] 2772 2773 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2774 if self._match(TokenType.WHEN): 2775 expression = self._parse_disjunction() 2776 self._match(TokenType.THEN) 2777 else: 2778 expression = None 2779 2780 else_ = self._match(TokenType.ELSE) 2781 2782 if not self._match(TokenType.INTO): 2783 return None 2784 2785 return self.expression( 2786 exp.ConditionalInsert, 2787 this=self.expression( 2788 exp.Insert, 2789 this=self._parse_table(schema=True), 2790 expression=self._parse_derived_table_values(), 2791 ), 2792 expression=expression, 2793 else_=else_, 2794 ) 2795 2796 expression = parse_conditional_insert() 2797 while expression is not None: 2798 expressions.append(expression) 2799 expression = parse_conditional_insert() 2800 2801 return self.expression( 2802 exp.MultitableInserts, 2803 kind=kind, 2804 comments=comments, 2805 expressions=expressions, 2806 source=self._parse_table(), 2807 ) 2808 2809 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2810 comments = [] 2811 hint = self._parse_hint() 2812 overwrite = self._match(TokenType.OVERWRITE) 2813 ignore = self._match(TokenType.IGNORE) 2814 local = self._match_text_seq("LOCAL") 2815 alternative = None 2816 is_function = None 2817 2818 if self._match_text_seq("DIRECTORY"): 2819 this: t.Optional[exp.Expression] = self.expression( 2820 exp.Directory, 2821 this=self._parse_var_or_string(), 2822 local=local, 2823 row_format=self._parse_row_format(match_row=True), 2824 ) 2825 else: 2826 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2827 comments += ensure_list(self._prev_comments) 2828 return self._parse_multitable_inserts(comments) 2829 2830 if self._match(TokenType.OR): 2831 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2832 2833 self._match(TokenType.INTO) 2834 comments += ensure_list(self._prev_comments) 2835 self._match(TokenType.TABLE) 2836 is_function = self._match(TokenType.FUNCTION) 2837 2838 this = ( 2839 self._parse_table(schema=True, parse_partition=True) 2840 if not is_function 2841 else self._parse_function() 2842 ) 2843 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2844 this.set("alias", self._parse_table_alias()) 2845 2846 returning = self._parse_returning() 2847 2848 return self.expression( 2849 exp.Insert, 2850 comments=comments, 2851 hint=hint, 2852 is_function=is_function, 2853 this=this, 2854 stored=self._match_text_seq("STORED") and self._parse_stored(), 2855 by_name=self._match_text_seq("BY", "NAME"), 2856 exists=self._parse_exists(), 2857 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2858 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2859 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2860 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2861 conflict=self._parse_on_conflict(), 2862 returning=returning or self._parse_returning(), 2863 overwrite=overwrite, 2864 alternative=alternative, 2865 ignore=ignore, 2866 source=self._match(TokenType.TABLE) and self._parse_table(), 2867 ) 2868 2869 def _parse_kill(self) -> exp.Kill: 2870 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2871 2872 return self.expression( 2873 exp.Kill, 2874 this=self._parse_primary(), 2875 kind=kind, 2876 ) 2877 2878 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2879 conflict = self._match_text_seq("ON", "CONFLICT") 2880 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2881 2882 if not conflict and not duplicate: 2883 return None 2884 2885 conflict_keys = None 2886 constraint = None 2887 2888 if conflict: 2889 if self._match_text_seq("ON", "CONSTRAINT"): 2890 constraint = self._parse_id_var() 2891 elif self._match(TokenType.L_PAREN): 2892 conflict_keys = self._parse_csv(self._parse_id_var) 2893 self._match_r_paren() 2894 2895 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2896 if self._prev.token_type == TokenType.UPDATE: 2897 self._match(TokenType.SET) 2898 expressions = self._parse_csv(self._parse_equality) 2899 else: 2900 expressions = None 2901 2902 return self.expression( 2903 exp.OnConflict, 2904 duplicate=duplicate, 2905 expressions=expressions, 2906 action=action, 2907 conflict_keys=conflict_keys, 2908 constraint=constraint, 2909 where=self._parse_where(), 2910 ) 2911 2912 def _parse_returning(self) -> t.Optional[exp.Returning]: 2913 if not self._match(TokenType.RETURNING): 2914 return None 2915 return self.expression( 2916 exp.Returning, 2917 expressions=self._parse_csv(self._parse_expression), 2918 into=self._match(TokenType.INTO) and self._parse_table_part(), 2919 ) 2920 2921 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2922 if not self._match(TokenType.FORMAT): 2923 return None 2924 return self._parse_row_format() 2925 2926 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2927 index = self._index 2928 with_ = with_ or self._match_text_seq("WITH") 2929 2930 if not self._match(TokenType.SERDE_PROPERTIES): 2931 self._retreat(index) 2932 return None 2933 return self.expression( 2934 exp.SerdeProperties, 2935 **{ # type: ignore 2936 "expressions": self._parse_wrapped_properties(), 2937 "with": with_, 2938 }, 2939 ) 2940 2941 def _parse_row_format( 2942 self, match_row: bool = False 2943 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2944 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2945 return None 2946 2947 if self._match_text_seq("SERDE"): 2948 this = self._parse_string() 2949 2950 serde_properties = self._parse_serde_properties() 2951 2952 return self.expression( 2953 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2954 ) 2955 2956 self._match_text_seq("DELIMITED") 2957 2958 kwargs = {} 2959 2960 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2961 kwargs["fields"] = self._parse_string() 2962 if self._match_text_seq("ESCAPED", "BY"): 2963 kwargs["escaped"] = self._parse_string() 2964 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2965 kwargs["collection_items"] = self._parse_string() 2966 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2967 kwargs["map_keys"] = self._parse_string() 2968 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2969 kwargs["lines"] = self._parse_string() 2970 if self._match_text_seq("NULL", "DEFINED", "AS"): 2971 kwargs["null"] = self._parse_string() 2972 2973 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2974 2975 def _parse_load(self) -> exp.LoadData | exp.Command: 2976 if self._match_text_seq("DATA"): 2977 local = self._match_text_seq("LOCAL") 2978 self._match_text_seq("INPATH") 2979 inpath = self._parse_string() 2980 overwrite = self._match(TokenType.OVERWRITE) 2981 self._match_pair(TokenType.INTO, TokenType.TABLE) 2982 2983 return self.expression( 2984 exp.LoadData, 2985 this=self._parse_table(schema=True), 2986 local=local, 2987 overwrite=overwrite, 2988 inpath=inpath, 2989 partition=self._parse_partition(), 2990 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2991 serde=self._match_text_seq("SERDE") and self._parse_string(), 2992 ) 2993 return self._parse_as_command(self._prev) 2994 2995 def _parse_delete(self) -> exp.Delete: 2996 # This handles MySQL's "Multiple-Table Syntax" 2997 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2998 tables = None 2999 if not self._match(TokenType.FROM, advance=False): 3000 tables = self._parse_csv(self._parse_table) or None 3001 3002 returning = self._parse_returning() 3003 3004 return self.expression( 3005 exp.Delete, 3006 tables=tables, 3007 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3008 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3009 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3010 where=self._parse_where(), 3011 returning=returning or self._parse_returning(), 3012 limit=self._parse_limit(), 3013 ) 3014 3015 def _parse_update(self) -> exp.Update: 3016 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3017 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3018 returning = self._parse_returning() 3019 return self.expression( 3020 exp.Update, 3021 **{ # type: ignore 3022 "this": this, 3023 "expressions": expressions, 3024 "from": self._parse_from(joins=True), 3025 "where": self._parse_where(), 3026 "returning": returning or self._parse_returning(), 3027 "order": self._parse_order(), 3028 "limit": self._parse_limit(), 3029 }, 3030 ) 3031 3032 def _parse_use(self) -> exp.Use: 3033 return self.expression( 3034 exp.Use, 3035 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3036 this=self._parse_table(schema=False), 3037 ) 3038 3039 def _parse_uncache(self) -> exp.Uncache: 3040 if not self._match(TokenType.TABLE): 3041 self.raise_error("Expecting TABLE after UNCACHE") 3042 3043 return self.expression( 3044 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3045 ) 3046 3047 def _parse_cache(self) -> exp.Cache: 3048 lazy = self._match_text_seq("LAZY") 3049 self._match(TokenType.TABLE) 3050 table = self._parse_table(schema=True) 3051 3052 options = [] 3053 if self._match_text_seq("OPTIONS"): 3054 self._match_l_paren() 3055 k = self._parse_string() 3056 self._match(TokenType.EQ) 3057 v = self._parse_string() 3058 options = [k, v] 3059 self._match_r_paren() 3060 3061 self._match(TokenType.ALIAS) 3062 return self.expression( 3063 exp.Cache, 3064 this=table, 3065 lazy=lazy, 3066 options=options, 3067 expression=self._parse_select(nested=True), 3068 ) 3069 3070 def _parse_partition(self) -> t.Optional[exp.Partition]: 3071 if not self._match_texts(self.PARTITION_KEYWORDS): 3072 return None 3073 3074 return self.expression( 3075 exp.Partition, 3076 subpartition=self._prev.text.upper() == "SUBPARTITION", 3077 expressions=self._parse_wrapped_csv(self._parse_assignment), 3078 ) 3079 3080 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3081 def _parse_value_expression() -> t.Optional[exp.Expression]: 3082 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3083 return exp.var(self._prev.text.upper()) 3084 return self._parse_expression() 3085 3086 if self._match(TokenType.L_PAREN): 3087 expressions = self._parse_csv(_parse_value_expression) 3088 self._match_r_paren() 3089 return self.expression(exp.Tuple, expressions=expressions) 3090 3091 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3092 expression = self._parse_expression() 3093 if expression: 3094 return self.expression(exp.Tuple, expressions=[expression]) 3095 return None 3096 3097 def _parse_projections(self) -> t.List[exp.Expression]: 3098 return self._parse_expressions() 3099 3100 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3101 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3102 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3103 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3104 ) 3105 elif self._match(TokenType.FROM): 3106 from_ = self._parse_from(skip_from_token=True) 3107 # Support parentheses for duckdb FROM-first syntax 3108 select = self._parse_select() 3109 if select: 3110 select.set("from", from_) 3111 this = select 3112 else: 3113 this = exp.select("*").from_(t.cast(exp.From, from_)) 3114 else: 3115 this = ( 3116 self._parse_table() 3117 if table 3118 else self._parse_select(nested=True, parse_set_operation=False) 3119 ) 3120 3121 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3122 # in case a modifier (e.g. join) is following 3123 if table and isinstance(this, exp.Values) and this.alias: 3124 alias = this.args["alias"].pop() 3125 this = exp.Table(this=this, alias=alias) 3126 3127 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3128 3129 return this 3130 3131 def _parse_select( 3132 self, 3133 nested: bool = False, 3134 table: bool = False, 3135 parse_subquery_alias: bool = True, 3136 parse_set_operation: bool = True, 3137 ) -> t.Optional[exp.Expression]: 3138 cte = self._parse_with() 3139 3140 if cte: 3141 this = self._parse_statement() 3142 3143 if not this: 3144 self.raise_error("Failed to parse any statement following CTE") 3145 return cte 3146 3147 if "with" in this.arg_types: 3148 this.set("with", cte) 3149 else: 3150 self.raise_error(f"{this.key} does not support CTE") 3151 this = cte 3152 3153 return this 3154 3155 # duckdb supports leading with FROM x 3156 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3157 3158 if self._match(TokenType.SELECT): 3159 comments = self._prev_comments 3160 3161 hint = self._parse_hint() 3162 3163 if self._next and not self._next.token_type == TokenType.DOT: 3164 all_ = self._match(TokenType.ALL) 3165 distinct = self._match_set(self.DISTINCT_TOKENS) 3166 else: 3167 all_, distinct = None, None 3168 3169 kind = ( 3170 self._match(TokenType.ALIAS) 3171 and self._match_texts(("STRUCT", "VALUE")) 3172 and self._prev.text.upper() 3173 ) 3174 3175 if distinct: 3176 distinct = self.expression( 3177 exp.Distinct, 3178 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3179 ) 3180 3181 if all_ and distinct: 3182 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3183 3184 operation_modifiers = [] 3185 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3186 operation_modifiers.append(exp.var(self._prev.text.upper())) 3187 3188 limit = self._parse_limit(top=True) 3189 projections = self._parse_projections() 3190 3191 this = self.expression( 3192 exp.Select, 3193 kind=kind, 3194 hint=hint, 3195 distinct=distinct, 3196 expressions=projections, 3197 limit=limit, 3198 operation_modifiers=operation_modifiers or None, 3199 ) 3200 this.comments = comments 3201 3202 into = self._parse_into() 3203 if into: 3204 this.set("into", into) 3205 3206 if not from_: 3207 from_ = self._parse_from() 3208 3209 if from_: 3210 this.set("from", from_) 3211 3212 this = self._parse_query_modifiers(this) 3213 elif (table or nested) and self._match(TokenType.L_PAREN): 3214 this = self._parse_wrapped_select(table=table) 3215 3216 # We return early here so that the UNION isn't attached to the subquery by the 3217 # following call to _parse_set_operations, but instead becomes the parent node 3218 self._match_r_paren() 3219 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3220 elif self._match(TokenType.VALUES, advance=False): 3221 this = self._parse_derived_table_values() 3222 elif from_: 3223 this = exp.select("*").from_(from_.this, copy=False) 3224 elif self._match(TokenType.SUMMARIZE): 3225 table = self._match(TokenType.TABLE) 3226 this = self._parse_select() or self._parse_string() or self._parse_table() 3227 return self.expression(exp.Summarize, this=this, table=table) 3228 elif self._match(TokenType.DESCRIBE): 3229 this = self._parse_describe() 3230 elif self._match_text_seq("STREAM"): 3231 this = self._parse_function() 3232 if this: 3233 this = self.expression(exp.Stream, this=this) 3234 else: 3235 self._retreat(self._index - 1) 3236 else: 3237 this = None 3238 3239 return self._parse_set_operations(this) if parse_set_operation else this 3240 3241 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3242 self._match_text_seq("SEARCH") 3243 3244 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3245 3246 if not kind: 3247 return None 3248 3249 self._match_text_seq("FIRST", "BY") 3250 3251 return self.expression( 3252 exp.RecursiveWithSearch, 3253 kind=kind, 3254 this=self._parse_id_var(), 3255 expression=self._match_text_seq("SET") and self._parse_id_var(), 3256 using=self._match_text_seq("USING") and self._parse_id_var(), 3257 ) 3258 3259 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3260 if not skip_with_token and not self._match(TokenType.WITH): 3261 return None 3262 3263 comments = self._prev_comments 3264 recursive = self._match(TokenType.RECURSIVE) 3265 3266 last_comments = None 3267 expressions = [] 3268 while True: 3269 cte = self._parse_cte() 3270 if isinstance(cte, exp.CTE): 3271 expressions.append(cte) 3272 if last_comments: 3273 cte.add_comments(last_comments) 3274 3275 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3276 break 3277 else: 3278 self._match(TokenType.WITH) 3279 3280 last_comments = self._prev_comments 3281 3282 return self.expression( 3283 exp.With, 3284 comments=comments, 3285 expressions=expressions, 3286 recursive=recursive, 3287 search=self._parse_recursive_with_search(), 3288 ) 3289 3290 def _parse_cte(self) -> t.Optional[exp.CTE]: 3291 index = self._index 3292 3293 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3294 if not alias or not alias.this: 3295 self.raise_error("Expected CTE to have alias") 3296 3297 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3298 self._retreat(index) 3299 return None 3300 3301 comments = self._prev_comments 3302 3303 if self._match_text_seq("NOT", "MATERIALIZED"): 3304 materialized = False 3305 elif self._match_text_seq("MATERIALIZED"): 3306 materialized = True 3307 else: 3308 materialized = None 3309 3310 cte = self.expression( 3311 exp.CTE, 3312 this=self._parse_wrapped(self._parse_statement), 3313 alias=alias, 3314 materialized=materialized, 3315 comments=comments, 3316 ) 3317 3318 if isinstance(cte.this, exp.Values): 3319 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3320 3321 return cte 3322 3323 def _parse_table_alias( 3324 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3325 ) -> t.Optional[exp.TableAlias]: 3326 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3327 # so this section tries to parse the clause version and if it fails, it treats the token 3328 # as an identifier (alias) 3329 if self._can_parse_limit_or_offset(): 3330 return None 3331 3332 any_token = self._match(TokenType.ALIAS) 3333 alias = ( 3334 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3335 or self._parse_string_as_identifier() 3336 ) 3337 3338 index = self._index 3339 if self._match(TokenType.L_PAREN): 3340 columns = self._parse_csv(self._parse_function_parameter) 3341 self._match_r_paren() if columns else self._retreat(index) 3342 else: 3343 columns = None 3344 3345 if not alias and not columns: 3346 return None 3347 3348 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3349 3350 # We bubble up comments from the Identifier to the TableAlias 3351 if isinstance(alias, exp.Identifier): 3352 table_alias.add_comments(alias.pop_comments()) 3353 3354 return table_alias 3355 3356 def _parse_subquery( 3357 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3358 ) -> t.Optional[exp.Subquery]: 3359 if not this: 3360 return None 3361 3362 return self.expression( 3363 exp.Subquery, 3364 this=this, 3365 pivots=self._parse_pivots(), 3366 alias=self._parse_table_alias() if parse_alias else None, 3367 sample=self._parse_table_sample(), 3368 ) 3369 3370 def _implicit_unnests_to_explicit(self, this: E) -> E: 3371 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3372 3373 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3374 for i, join in enumerate(this.args.get("joins") or []): 3375 table = join.this 3376 normalized_table = table.copy() 3377 normalized_table.meta["maybe_column"] = True 3378 normalized_table = _norm(normalized_table, dialect=self.dialect) 3379 3380 if isinstance(table, exp.Table) and not join.args.get("on"): 3381 if normalized_table.parts[0].name in refs: 3382 table_as_column = table.to_column() 3383 unnest = exp.Unnest(expressions=[table_as_column]) 3384 3385 # Table.to_column creates a parent Alias node that we want to convert to 3386 # a TableAlias and attach to the Unnest, so it matches the parser's output 3387 if isinstance(table.args.get("alias"), exp.TableAlias): 3388 table_as_column.replace(table_as_column.this) 3389 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3390 3391 table.replace(unnest) 3392 3393 refs.add(normalized_table.alias_or_name) 3394 3395 return this 3396 3397 def _parse_query_modifiers( 3398 self, this: t.Optional[exp.Expression] 3399 ) -> t.Optional[exp.Expression]: 3400 if isinstance(this, self.MODIFIABLES): 3401 for join in self._parse_joins(): 3402 this.append("joins", join) 3403 for lateral in iter(self._parse_lateral, None): 3404 this.append("laterals", lateral) 3405 3406 while True: 3407 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3408 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3409 key, expression = parser(self) 3410 3411 if expression: 3412 this.set(key, expression) 3413 if key == "limit": 3414 offset = expression.args.pop("offset", None) 3415 3416 if offset: 3417 offset = exp.Offset(expression=offset) 3418 this.set("offset", offset) 3419 3420 limit_by_expressions = expression.expressions 3421 expression.set("expressions", None) 3422 offset.set("expressions", limit_by_expressions) 3423 continue 3424 break 3425 3426 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3427 this = self._implicit_unnests_to_explicit(this) 3428 3429 return this 3430 3431 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3432 start = self._curr 3433 while self._curr: 3434 self._advance() 3435 3436 end = self._tokens[self._index - 1] 3437 return exp.Hint(expressions=[self._find_sql(start, end)]) 3438 3439 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3440 return self._parse_function_call() 3441 3442 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3443 start_index = self._index 3444 should_fallback_to_string = False 3445 3446 hints = [] 3447 try: 3448 for hint in iter( 3449 lambda: self._parse_csv( 3450 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3451 ), 3452 [], 3453 ): 3454 hints.extend(hint) 3455 except ParseError: 3456 should_fallback_to_string = True 3457 3458 if should_fallback_to_string or self._curr: 3459 self._retreat(start_index) 3460 return self._parse_hint_fallback_to_string() 3461 3462 return self.expression(exp.Hint, expressions=hints) 3463 3464 def _parse_hint(self) -> t.Optional[exp.Hint]: 3465 if self._match(TokenType.HINT) and self._prev_comments: 3466 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3467 3468 return None 3469 3470 def _parse_into(self) -> t.Optional[exp.Into]: 3471 if not self._match(TokenType.INTO): 3472 return None 3473 3474 temp = self._match(TokenType.TEMPORARY) 3475 unlogged = self._match_text_seq("UNLOGGED") 3476 self._match(TokenType.TABLE) 3477 3478 return self.expression( 3479 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3480 ) 3481 3482 def _parse_from( 3483 self, joins: bool = False, skip_from_token: bool = False 3484 ) -> t.Optional[exp.From]: 3485 if not skip_from_token and not self._match(TokenType.FROM): 3486 return None 3487 3488 return self.expression( 3489 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3490 ) 3491 3492 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3493 return self.expression( 3494 exp.MatchRecognizeMeasure, 3495 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3496 this=self._parse_expression(), 3497 ) 3498 3499 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3500 if not self._match(TokenType.MATCH_RECOGNIZE): 3501 return None 3502 3503 self._match_l_paren() 3504 3505 partition = self._parse_partition_by() 3506 order = self._parse_order() 3507 3508 measures = ( 3509 self._parse_csv(self._parse_match_recognize_measure) 3510 if self._match_text_seq("MEASURES") 3511 else None 3512 ) 3513 3514 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3515 rows = exp.var("ONE ROW PER MATCH") 3516 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3517 text = "ALL ROWS PER MATCH" 3518 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3519 text += " SHOW EMPTY MATCHES" 3520 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3521 text += " OMIT EMPTY MATCHES" 3522 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3523 text += " WITH UNMATCHED ROWS" 3524 rows = exp.var(text) 3525 else: 3526 rows = None 3527 3528 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3529 text = "AFTER MATCH SKIP" 3530 if self._match_text_seq("PAST", "LAST", "ROW"): 3531 text += " PAST LAST ROW" 3532 elif self._match_text_seq("TO", "NEXT", "ROW"): 3533 text += " TO NEXT ROW" 3534 elif self._match_text_seq("TO", "FIRST"): 3535 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3536 elif self._match_text_seq("TO", "LAST"): 3537 text += f" TO LAST {self._advance_any().text}" # type: ignore 3538 after = exp.var(text) 3539 else: 3540 after = None 3541 3542 if self._match_text_seq("PATTERN"): 3543 self._match_l_paren() 3544 3545 if not self._curr: 3546 self.raise_error("Expecting )", self._curr) 3547 3548 paren = 1 3549 start = self._curr 3550 3551 while self._curr and paren > 0: 3552 if self._curr.token_type == TokenType.L_PAREN: 3553 paren += 1 3554 if self._curr.token_type == TokenType.R_PAREN: 3555 paren -= 1 3556 3557 end = self._prev 3558 self._advance() 3559 3560 if paren > 0: 3561 self.raise_error("Expecting )", self._curr) 3562 3563 pattern = exp.var(self._find_sql(start, end)) 3564 else: 3565 pattern = None 3566 3567 define = ( 3568 self._parse_csv(self._parse_name_as_expression) 3569 if self._match_text_seq("DEFINE") 3570 else None 3571 ) 3572 3573 self._match_r_paren() 3574 3575 return self.expression( 3576 exp.MatchRecognize, 3577 partition_by=partition, 3578 order=order, 3579 measures=measures, 3580 rows=rows, 3581 after=after, 3582 pattern=pattern, 3583 define=define, 3584 alias=self._parse_table_alias(), 3585 ) 3586 3587 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3588 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3589 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3590 cross_apply = False 3591 3592 if cross_apply is not None: 3593 this = self._parse_select(table=True) 3594 view = None 3595 outer = None 3596 elif self._match(TokenType.LATERAL): 3597 this = self._parse_select(table=True) 3598 view = self._match(TokenType.VIEW) 3599 outer = self._match(TokenType.OUTER) 3600 else: 3601 return None 3602 3603 if not this: 3604 this = ( 3605 self._parse_unnest() 3606 or self._parse_function() 3607 or self._parse_id_var(any_token=False) 3608 ) 3609 3610 while self._match(TokenType.DOT): 3611 this = exp.Dot( 3612 this=this, 3613 expression=self._parse_function() or self._parse_id_var(any_token=False), 3614 ) 3615 3616 ordinality: t.Optional[bool] = None 3617 3618 if view: 3619 table = self._parse_id_var(any_token=False) 3620 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3621 table_alias: t.Optional[exp.TableAlias] = self.expression( 3622 exp.TableAlias, this=table, columns=columns 3623 ) 3624 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3625 # We move the alias from the lateral's child node to the lateral itself 3626 table_alias = this.args["alias"].pop() 3627 else: 3628 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3629 table_alias = self._parse_table_alias() 3630 3631 return self.expression( 3632 exp.Lateral, 3633 this=this, 3634 view=view, 3635 outer=outer, 3636 alias=table_alias, 3637 cross_apply=cross_apply, 3638 ordinality=ordinality, 3639 ) 3640 3641 def _parse_join_parts( 3642 self, 3643 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3644 return ( 3645 self._match_set(self.JOIN_METHODS) and self._prev, 3646 self._match_set(self.JOIN_SIDES) and self._prev, 3647 self._match_set(self.JOIN_KINDS) and self._prev, 3648 ) 3649 3650 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3651 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3652 this = self._parse_column() 3653 if isinstance(this, exp.Column): 3654 return this.this 3655 return this 3656 3657 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3658 3659 def _parse_join( 3660 self, skip_join_token: bool = False, parse_bracket: bool = False 3661 ) -> t.Optional[exp.Join]: 3662 if self._match(TokenType.COMMA): 3663 table = self._try_parse(self._parse_table) 3664 if table: 3665 return self.expression(exp.Join, this=table) 3666 return None 3667 3668 index = self._index 3669 method, side, kind = self._parse_join_parts() 3670 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3671 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3672 3673 if not skip_join_token and not join: 3674 self._retreat(index) 3675 kind = None 3676 method = None 3677 side = None 3678 3679 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3680 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3681 3682 if not skip_join_token and not join and not outer_apply and not cross_apply: 3683 return None 3684 3685 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3686 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3687 kwargs["expressions"] = self._parse_csv( 3688 lambda: self._parse_table(parse_bracket=parse_bracket) 3689 ) 3690 3691 if method: 3692 kwargs["method"] = method.text 3693 if side: 3694 kwargs["side"] = side.text 3695 if kind: 3696 kwargs["kind"] = kind.text 3697 if hint: 3698 kwargs["hint"] = hint 3699 3700 if self._match(TokenType.MATCH_CONDITION): 3701 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3702 3703 if self._match(TokenType.ON): 3704 kwargs["on"] = self._parse_assignment() 3705 elif self._match(TokenType.USING): 3706 kwargs["using"] = self._parse_using_identifiers() 3707 elif ( 3708 not (outer_apply or cross_apply) 3709 and not isinstance(kwargs["this"], exp.Unnest) 3710 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3711 ): 3712 index = self._index 3713 joins: t.Optional[list] = list(self._parse_joins()) 3714 3715 if joins and self._match(TokenType.ON): 3716 kwargs["on"] = self._parse_assignment() 3717 elif joins and self._match(TokenType.USING): 3718 kwargs["using"] = self._parse_using_identifiers() 3719 else: 3720 joins = None 3721 self._retreat(index) 3722 3723 kwargs["this"].set("joins", joins if joins else None) 3724 3725 comments = [c for token in (method, side, kind) if token for c in token.comments] 3726 return self.expression(exp.Join, comments=comments, **kwargs) 3727 3728 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3729 this = self._parse_assignment() 3730 3731 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3732 return this 3733 3734 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3735 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3736 3737 return this 3738 3739 def _parse_index_params(self) -> exp.IndexParameters: 3740 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3741 3742 if self._match(TokenType.L_PAREN, advance=False): 3743 columns = self._parse_wrapped_csv(self._parse_with_operator) 3744 else: 3745 columns = None 3746 3747 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3748 partition_by = self._parse_partition_by() 3749 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3750 tablespace = ( 3751 self._parse_var(any_token=True) 3752 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3753 else None 3754 ) 3755 where = self._parse_where() 3756 3757 on = self._parse_field() if self._match(TokenType.ON) else None 3758 3759 return self.expression( 3760 exp.IndexParameters, 3761 using=using, 3762 columns=columns, 3763 include=include, 3764 partition_by=partition_by, 3765 where=where, 3766 with_storage=with_storage, 3767 tablespace=tablespace, 3768 on=on, 3769 ) 3770 3771 def _parse_index( 3772 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3773 ) -> t.Optional[exp.Index]: 3774 if index or anonymous: 3775 unique = None 3776 primary = None 3777 amp = None 3778 3779 self._match(TokenType.ON) 3780 self._match(TokenType.TABLE) # hive 3781 table = self._parse_table_parts(schema=True) 3782 else: 3783 unique = self._match(TokenType.UNIQUE) 3784 primary = self._match_text_seq("PRIMARY") 3785 amp = self._match_text_seq("AMP") 3786 3787 if not self._match(TokenType.INDEX): 3788 return None 3789 3790 index = self._parse_id_var() 3791 table = None 3792 3793 params = self._parse_index_params() 3794 3795 return self.expression( 3796 exp.Index, 3797 this=index, 3798 table=table, 3799 unique=unique, 3800 primary=primary, 3801 amp=amp, 3802 params=params, 3803 ) 3804 3805 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3806 hints: t.List[exp.Expression] = [] 3807 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3808 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3809 hints.append( 3810 self.expression( 3811 exp.WithTableHint, 3812 expressions=self._parse_csv( 3813 lambda: self._parse_function() or self._parse_var(any_token=True) 3814 ), 3815 ) 3816 ) 3817 self._match_r_paren() 3818 else: 3819 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3820 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3821 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3822 3823 self._match_set((TokenType.INDEX, TokenType.KEY)) 3824 if self._match(TokenType.FOR): 3825 hint.set("target", self._advance_any() and self._prev.text.upper()) 3826 3827 hint.set("expressions", self._parse_wrapped_id_vars()) 3828 hints.append(hint) 3829 3830 return hints or None 3831 3832 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3833 return ( 3834 (not schema and self._parse_function(optional_parens=False)) 3835 or self._parse_id_var(any_token=False) 3836 or self._parse_string_as_identifier() 3837 or self._parse_placeholder() 3838 ) 3839 3840 def _parse_table_parts( 3841 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3842 ) -> exp.Table: 3843 catalog = None 3844 db = None 3845 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3846 3847 while self._match(TokenType.DOT): 3848 if catalog: 3849 # This allows nesting the table in arbitrarily many dot expressions if needed 3850 table = self.expression( 3851 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3852 ) 3853 else: 3854 catalog = db 3855 db = table 3856 # "" used for tsql FROM a..b case 3857 table = self._parse_table_part(schema=schema) or "" 3858 3859 if ( 3860 wildcard 3861 and self._is_connected() 3862 and (isinstance(table, exp.Identifier) or not table) 3863 and self._match(TokenType.STAR) 3864 ): 3865 if isinstance(table, exp.Identifier): 3866 table.args["this"] += "*" 3867 else: 3868 table = exp.Identifier(this="*") 3869 3870 # We bubble up comments from the Identifier to the Table 3871 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3872 3873 if is_db_reference: 3874 catalog = db 3875 db = table 3876 table = None 3877 3878 if not table and not is_db_reference: 3879 self.raise_error(f"Expected table name but got {self._curr}") 3880 if not db and is_db_reference: 3881 self.raise_error(f"Expected database name but got {self._curr}") 3882 3883 table = self.expression( 3884 exp.Table, 3885 comments=comments, 3886 this=table, 3887 db=db, 3888 catalog=catalog, 3889 ) 3890 3891 changes = self._parse_changes() 3892 if changes: 3893 table.set("changes", changes) 3894 3895 at_before = self._parse_historical_data() 3896 if at_before: 3897 table.set("when", at_before) 3898 3899 pivots = self._parse_pivots() 3900 if pivots: 3901 table.set("pivots", pivots) 3902 3903 return table 3904 3905 def _parse_table( 3906 self, 3907 schema: bool = False, 3908 joins: bool = False, 3909 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3910 parse_bracket: bool = False, 3911 is_db_reference: bool = False, 3912 parse_partition: bool = False, 3913 ) -> t.Optional[exp.Expression]: 3914 lateral = self._parse_lateral() 3915 if lateral: 3916 return lateral 3917 3918 unnest = self._parse_unnest() 3919 if unnest: 3920 return unnest 3921 3922 values = self._parse_derived_table_values() 3923 if values: 3924 return values 3925 3926 subquery = self._parse_select(table=True) 3927 if subquery: 3928 if not subquery.args.get("pivots"): 3929 subquery.set("pivots", self._parse_pivots()) 3930 return subquery 3931 3932 bracket = parse_bracket and self._parse_bracket(None) 3933 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3934 3935 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3936 self._parse_table 3937 ) 3938 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3939 3940 only = self._match(TokenType.ONLY) 3941 3942 this = t.cast( 3943 exp.Expression, 3944 bracket 3945 or rows_from 3946 or self._parse_bracket( 3947 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3948 ), 3949 ) 3950 3951 if only: 3952 this.set("only", only) 3953 3954 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3955 self._match_text_seq("*") 3956 3957 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3958 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3959 this.set("partition", self._parse_partition()) 3960 3961 if schema: 3962 return self._parse_schema(this=this) 3963 3964 version = self._parse_version() 3965 3966 if version: 3967 this.set("version", version) 3968 3969 if self.dialect.ALIAS_POST_TABLESAMPLE: 3970 this.set("sample", self._parse_table_sample()) 3971 3972 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3973 if alias: 3974 this.set("alias", alias) 3975 3976 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3977 return self.expression( 3978 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3979 ) 3980 3981 this.set("hints", self._parse_table_hints()) 3982 3983 if not this.args.get("pivots"): 3984 this.set("pivots", self._parse_pivots()) 3985 3986 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3987 this.set("sample", self._parse_table_sample()) 3988 3989 if joins: 3990 for join in self._parse_joins(): 3991 this.append("joins", join) 3992 3993 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3994 this.set("ordinality", True) 3995 this.set("alias", self._parse_table_alias()) 3996 3997 return this 3998 3999 def _parse_version(self) -> t.Optional[exp.Version]: 4000 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4001 this = "TIMESTAMP" 4002 elif self._match(TokenType.VERSION_SNAPSHOT): 4003 this = "VERSION" 4004 else: 4005 return None 4006 4007 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4008 kind = self._prev.text.upper() 4009 start = self._parse_bitwise() 4010 self._match_texts(("TO", "AND")) 4011 end = self._parse_bitwise() 4012 expression: t.Optional[exp.Expression] = self.expression( 4013 exp.Tuple, expressions=[start, end] 4014 ) 4015 elif self._match_text_seq("CONTAINED", "IN"): 4016 kind = "CONTAINED IN" 4017 expression = self.expression( 4018 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4019 ) 4020 elif self._match(TokenType.ALL): 4021 kind = "ALL" 4022 expression = None 4023 else: 4024 self._match_text_seq("AS", "OF") 4025 kind = "AS OF" 4026 expression = self._parse_type() 4027 4028 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4029 4030 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4031 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4032 index = self._index 4033 historical_data = None 4034 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4035 this = self._prev.text.upper() 4036 kind = ( 4037 self._match(TokenType.L_PAREN) 4038 and self._match_texts(self.HISTORICAL_DATA_KIND) 4039 and self._prev.text.upper() 4040 ) 4041 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4042 4043 if expression: 4044 self._match_r_paren() 4045 historical_data = self.expression( 4046 exp.HistoricalData, this=this, kind=kind, expression=expression 4047 ) 4048 else: 4049 self._retreat(index) 4050 4051 return historical_data 4052 4053 def _parse_changes(self) -> t.Optional[exp.Changes]: 4054 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4055 return None 4056 4057 information = self._parse_var(any_token=True) 4058 self._match_r_paren() 4059 4060 return self.expression( 4061 exp.Changes, 4062 information=information, 4063 at_before=self._parse_historical_data(), 4064 end=self._parse_historical_data(), 4065 ) 4066 4067 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4068 if not self._match(TokenType.UNNEST): 4069 return None 4070 4071 expressions = self._parse_wrapped_csv(self._parse_equality) 4072 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4073 4074 alias = self._parse_table_alias() if with_alias else None 4075 4076 if alias: 4077 if self.dialect.UNNEST_COLUMN_ONLY: 4078 if alias.args.get("columns"): 4079 self.raise_error("Unexpected extra column alias in unnest.") 4080 4081 alias.set("columns", [alias.this]) 4082 alias.set("this", None) 4083 4084 columns = alias.args.get("columns") or [] 4085 if offset and len(expressions) < len(columns): 4086 offset = columns.pop() 4087 4088 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4089 self._match(TokenType.ALIAS) 4090 offset = self._parse_id_var( 4091 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4092 ) or exp.to_identifier("offset") 4093 4094 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4095 4096 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4097 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4098 if not is_derived and not ( 4099 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4100 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4101 ): 4102 return None 4103 4104 expressions = self._parse_csv(self._parse_value) 4105 alias = self._parse_table_alias() 4106 4107 if is_derived: 4108 self._match_r_paren() 4109 4110 return self.expression( 4111 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4112 ) 4113 4114 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4115 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4116 as_modifier and self._match_text_seq("USING", "SAMPLE") 4117 ): 4118 return None 4119 4120 bucket_numerator = None 4121 bucket_denominator = None 4122 bucket_field = None 4123 percent = None 4124 size = None 4125 seed = None 4126 4127 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4128 matched_l_paren = self._match(TokenType.L_PAREN) 4129 4130 if self.TABLESAMPLE_CSV: 4131 num = None 4132 expressions = self._parse_csv(self._parse_primary) 4133 else: 4134 expressions = None 4135 num = ( 4136 self._parse_factor() 4137 if self._match(TokenType.NUMBER, advance=False) 4138 else self._parse_primary() or self._parse_placeholder() 4139 ) 4140 4141 if self._match_text_seq("BUCKET"): 4142 bucket_numerator = self._parse_number() 4143 self._match_text_seq("OUT", "OF") 4144 bucket_denominator = bucket_denominator = self._parse_number() 4145 self._match(TokenType.ON) 4146 bucket_field = self._parse_field() 4147 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4148 percent = num 4149 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4150 size = num 4151 else: 4152 percent = num 4153 4154 if matched_l_paren: 4155 self._match_r_paren() 4156 4157 if self._match(TokenType.L_PAREN): 4158 method = self._parse_var(upper=True) 4159 seed = self._match(TokenType.COMMA) and self._parse_number() 4160 self._match_r_paren() 4161 elif self._match_texts(("SEED", "REPEATABLE")): 4162 seed = self._parse_wrapped(self._parse_number) 4163 4164 if not method and self.DEFAULT_SAMPLING_METHOD: 4165 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4166 4167 return self.expression( 4168 exp.TableSample, 4169 expressions=expressions, 4170 method=method, 4171 bucket_numerator=bucket_numerator, 4172 bucket_denominator=bucket_denominator, 4173 bucket_field=bucket_field, 4174 percent=percent, 4175 size=size, 4176 seed=seed, 4177 ) 4178 4179 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4180 return list(iter(self._parse_pivot, None)) or None 4181 4182 def _parse_joins(self) -> t.Iterator[exp.Join]: 4183 return iter(self._parse_join, None) 4184 4185 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4186 if not self._match(TokenType.INTO): 4187 return None 4188 4189 return self.expression( 4190 exp.UnpivotColumns, 4191 this=self._match_text_seq("NAME") and self._parse_column(), 4192 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4193 ) 4194 4195 # https://duckdb.org/docs/sql/statements/pivot 4196 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4197 def _parse_on() -> t.Optional[exp.Expression]: 4198 this = self._parse_bitwise() 4199 4200 if self._match(TokenType.IN): 4201 # PIVOT ... ON col IN (row_val1, row_val2) 4202 return self._parse_in(this) 4203 if self._match(TokenType.ALIAS, advance=False): 4204 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4205 return self._parse_alias(this) 4206 4207 return this 4208 4209 this = self._parse_table() 4210 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4211 into = self._parse_unpivot_columns() 4212 using = self._match(TokenType.USING) and self._parse_csv( 4213 lambda: self._parse_alias(self._parse_function()) 4214 ) 4215 group = self._parse_group() 4216 4217 return self.expression( 4218 exp.Pivot, 4219 this=this, 4220 expressions=expressions, 4221 using=using, 4222 group=group, 4223 unpivot=is_unpivot, 4224 into=into, 4225 ) 4226 4227 def _parse_pivot_in(self) -> exp.In: 4228 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4229 this = self._parse_select_or_expression() 4230 4231 self._match(TokenType.ALIAS) 4232 alias = self._parse_bitwise() 4233 if alias: 4234 if isinstance(alias, exp.Column) and not alias.db: 4235 alias = alias.this 4236 return self.expression(exp.PivotAlias, this=this, alias=alias) 4237 4238 return this 4239 4240 value = self._parse_column() 4241 4242 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4243 self.raise_error("Expecting IN (") 4244 4245 if self._match(TokenType.ANY): 4246 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4247 else: 4248 exprs = self._parse_csv(_parse_aliased_expression) 4249 4250 self._match_r_paren() 4251 return self.expression(exp.In, this=value, expressions=exprs) 4252 4253 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4254 index = self._index 4255 include_nulls = None 4256 4257 if self._match(TokenType.PIVOT): 4258 unpivot = False 4259 elif self._match(TokenType.UNPIVOT): 4260 unpivot = True 4261 4262 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4263 if self._match_text_seq("INCLUDE", "NULLS"): 4264 include_nulls = True 4265 elif self._match_text_seq("EXCLUDE", "NULLS"): 4266 include_nulls = False 4267 else: 4268 return None 4269 4270 expressions = [] 4271 4272 if not self._match(TokenType.L_PAREN): 4273 self._retreat(index) 4274 return None 4275 4276 if unpivot: 4277 expressions = self._parse_csv(self._parse_column) 4278 else: 4279 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4280 4281 if not expressions: 4282 self.raise_error("Failed to parse PIVOT's aggregation list") 4283 4284 if not self._match(TokenType.FOR): 4285 self.raise_error("Expecting FOR") 4286 4287 fields = [] 4288 while True: 4289 field = self._try_parse(self._parse_pivot_in) 4290 if not field: 4291 break 4292 fields.append(field) 4293 4294 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4295 self._parse_bitwise 4296 ) 4297 4298 group = self._parse_group() 4299 4300 self._match_r_paren() 4301 4302 pivot = self.expression( 4303 exp.Pivot, 4304 expressions=expressions, 4305 fields=fields, 4306 unpivot=unpivot, 4307 include_nulls=include_nulls, 4308 default_on_null=default_on_null, 4309 group=group, 4310 ) 4311 4312 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4313 pivot.set("alias", self._parse_table_alias()) 4314 4315 if not unpivot: 4316 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4317 4318 columns: t.List[exp.Expression] = [] 4319 all_fields = [] 4320 for pivot_field in pivot.fields: 4321 pivot_field_expressions = pivot_field.expressions 4322 4323 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4324 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4325 continue 4326 4327 all_fields.append( 4328 [ 4329 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4330 for fld in pivot_field_expressions 4331 ] 4332 ) 4333 4334 if all_fields: 4335 if names: 4336 all_fields.append(names) 4337 4338 # Generate all possible combinations of the pivot columns 4339 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4340 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4341 for fld_parts_tuple in itertools.product(*all_fields): 4342 fld_parts = list(fld_parts_tuple) 4343 4344 if names and self.PREFIXED_PIVOT_COLUMNS: 4345 # Move the "name" to the front of the list 4346 fld_parts.insert(0, fld_parts.pop(-1)) 4347 4348 columns.append(exp.to_identifier("_".join(fld_parts))) 4349 4350 pivot.set("columns", columns) 4351 4352 return pivot 4353 4354 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4355 return [agg.alias for agg in aggregations if agg.alias] 4356 4357 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4358 if not skip_where_token and not self._match(TokenType.PREWHERE): 4359 return None 4360 4361 return self.expression( 4362 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4363 ) 4364 4365 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4366 if not skip_where_token and not self._match(TokenType.WHERE): 4367 return None 4368 4369 return self.expression( 4370 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4371 ) 4372 4373 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4374 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4375 return None 4376 4377 elements: t.Dict[str, t.Any] = defaultdict(list) 4378 4379 if self._match(TokenType.ALL): 4380 elements["all"] = True 4381 elif self._match(TokenType.DISTINCT): 4382 elements["all"] = False 4383 4384 while True: 4385 index = self._index 4386 4387 elements["expressions"].extend( 4388 self._parse_csv( 4389 lambda: None 4390 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4391 else self._parse_assignment() 4392 ) 4393 ) 4394 4395 before_with_index = self._index 4396 with_prefix = self._match(TokenType.WITH) 4397 4398 if self._match(TokenType.ROLLUP): 4399 elements["rollup"].append( 4400 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4401 ) 4402 elif self._match(TokenType.CUBE): 4403 elements["cube"].append( 4404 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4405 ) 4406 elif self._match(TokenType.GROUPING_SETS): 4407 elements["grouping_sets"].append( 4408 self.expression( 4409 exp.GroupingSets, 4410 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4411 ) 4412 ) 4413 elif self._match_text_seq("TOTALS"): 4414 elements["totals"] = True # type: ignore 4415 4416 if before_with_index <= self._index <= before_with_index + 1: 4417 self._retreat(before_with_index) 4418 break 4419 4420 if index == self._index: 4421 break 4422 4423 return self.expression(exp.Group, **elements) # type: ignore 4424 4425 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4426 return self.expression( 4427 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4428 ) 4429 4430 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4431 if self._match(TokenType.L_PAREN): 4432 grouping_set = self._parse_csv(self._parse_column) 4433 self._match_r_paren() 4434 return self.expression(exp.Tuple, expressions=grouping_set) 4435 4436 return self._parse_column() 4437 4438 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4439 if not skip_having_token and not self._match(TokenType.HAVING): 4440 return None 4441 return self.expression(exp.Having, this=self._parse_assignment()) 4442 4443 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4444 if not self._match(TokenType.QUALIFY): 4445 return None 4446 return self.expression(exp.Qualify, this=self._parse_assignment()) 4447 4448 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4449 if skip_start_token: 4450 start = None 4451 elif self._match(TokenType.START_WITH): 4452 start = self._parse_assignment() 4453 else: 4454 return None 4455 4456 self._match(TokenType.CONNECT_BY) 4457 nocycle = self._match_text_seq("NOCYCLE") 4458 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4459 exp.Prior, this=self._parse_bitwise() 4460 ) 4461 connect = self._parse_assignment() 4462 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4463 4464 if not start and self._match(TokenType.START_WITH): 4465 start = self._parse_assignment() 4466 4467 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4468 4469 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4470 this = self._parse_id_var(any_token=True) 4471 if self._match(TokenType.ALIAS): 4472 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4473 return this 4474 4475 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4476 if self._match_text_seq("INTERPOLATE"): 4477 return self._parse_wrapped_csv(self._parse_name_as_expression) 4478 return None 4479 4480 def _parse_order( 4481 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4482 ) -> t.Optional[exp.Expression]: 4483 siblings = None 4484 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4485 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4486 return this 4487 4488 siblings = True 4489 4490 return self.expression( 4491 exp.Order, 4492 this=this, 4493 expressions=self._parse_csv(self._parse_ordered), 4494 siblings=siblings, 4495 ) 4496 4497 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4498 if not self._match(token): 4499 return None 4500 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4501 4502 def _parse_ordered( 4503 self, parse_method: t.Optional[t.Callable] = None 4504 ) -> t.Optional[exp.Ordered]: 4505 this = parse_method() if parse_method else self._parse_assignment() 4506 if not this: 4507 return None 4508 4509 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4510 this = exp.var("ALL") 4511 4512 asc = self._match(TokenType.ASC) 4513 desc = self._match(TokenType.DESC) or (asc and False) 4514 4515 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4516 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4517 4518 nulls_first = is_nulls_first or False 4519 explicitly_null_ordered = is_nulls_first or is_nulls_last 4520 4521 if ( 4522 not explicitly_null_ordered 4523 and ( 4524 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4525 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4526 ) 4527 and self.dialect.NULL_ORDERING != "nulls_are_last" 4528 ): 4529 nulls_first = True 4530 4531 if self._match_text_seq("WITH", "FILL"): 4532 with_fill = self.expression( 4533 exp.WithFill, 4534 **{ # type: ignore 4535 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4536 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4537 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4538 "interpolate": self._parse_interpolate(), 4539 }, 4540 ) 4541 else: 4542 with_fill = None 4543 4544 return self.expression( 4545 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4546 ) 4547 4548 def _parse_limit_options(self) -> exp.LimitOptions: 4549 percent = self._match(TokenType.PERCENT) 4550 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4551 self._match_text_seq("ONLY") 4552 with_ties = self._match_text_seq("WITH", "TIES") 4553 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4554 4555 def _parse_limit( 4556 self, 4557 this: t.Optional[exp.Expression] = None, 4558 top: bool = False, 4559 skip_limit_token: bool = False, 4560 ) -> t.Optional[exp.Expression]: 4561 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4562 comments = self._prev_comments 4563 if top: 4564 limit_paren = self._match(TokenType.L_PAREN) 4565 expression = self._parse_term() if limit_paren else self._parse_number() 4566 4567 if limit_paren: 4568 self._match_r_paren() 4569 4570 limit_options = self._parse_limit_options() 4571 else: 4572 limit_options = None 4573 expression = self._parse_term() 4574 4575 if self._match(TokenType.COMMA): 4576 offset = expression 4577 expression = self._parse_term() 4578 else: 4579 offset = None 4580 4581 limit_exp = self.expression( 4582 exp.Limit, 4583 this=this, 4584 expression=expression, 4585 offset=offset, 4586 comments=comments, 4587 limit_options=limit_options, 4588 expressions=self._parse_limit_by(), 4589 ) 4590 4591 return limit_exp 4592 4593 if self._match(TokenType.FETCH): 4594 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4595 direction = self._prev.text.upper() if direction else "FIRST" 4596 4597 count = self._parse_field(tokens=self.FETCH_TOKENS) 4598 4599 return self.expression( 4600 exp.Fetch, 4601 direction=direction, 4602 count=count, 4603 limit_options=self._parse_limit_options(), 4604 ) 4605 4606 return this 4607 4608 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4609 if not self._match(TokenType.OFFSET): 4610 return this 4611 4612 count = self._parse_term() 4613 self._match_set((TokenType.ROW, TokenType.ROWS)) 4614 4615 return self.expression( 4616 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4617 ) 4618 4619 def _can_parse_limit_or_offset(self) -> bool: 4620 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4621 return False 4622 4623 index = self._index 4624 result = bool( 4625 self._try_parse(self._parse_limit, retreat=True) 4626 or self._try_parse(self._parse_offset, retreat=True) 4627 ) 4628 self._retreat(index) 4629 return result 4630 4631 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4632 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4633 4634 def _parse_locks(self) -> t.List[exp.Lock]: 4635 locks = [] 4636 while True: 4637 if self._match_text_seq("FOR", "UPDATE"): 4638 update = True 4639 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4640 "LOCK", "IN", "SHARE", "MODE" 4641 ): 4642 update = False 4643 else: 4644 break 4645 4646 expressions = None 4647 if self._match_text_seq("OF"): 4648 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4649 4650 wait: t.Optional[bool | exp.Expression] = None 4651 if self._match_text_seq("NOWAIT"): 4652 wait = True 4653 elif self._match_text_seq("WAIT"): 4654 wait = self._parse_primary() 4655 elif self._match_text_seq("SKIP", "LOCKED"): 4656 wait = False 4657 4658 locks.append( 4659 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4660 ) 4661 4662 return locks 4663 4664 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4665 start = self._index 4666 _, side_token, kind_token = self._parse_join_parts() 4667 4668 side = side_token.text if side_token else None 4669 kind = kind_token.text if kind_token else None 4670 4671 if not self._match_set(self.SET_OPERATIONS): 4672 self._retreat(start) 4673 return None 4674 4675 token_type = self._prev.token_type 4676 4677 if token_type == TokenType.UNION: 4678 operation: t.Type[exp.SetOperation] = exp.Union 4679 elif token_type == TokenType.EXCEPT: 4680 operation = exp.Except 4681 else: 4682 operation = exp.Intersect 4683 4684 comments = self._prev.comments 4685 4686 if self._match(TokenType.DISTINCT): 4687 distinct: t.Optional[bool] = True 4688 elif self._match(TokenType.ALL): 4689 distinct = False 4690 else: 4691 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4692 if distinct is None: 4693 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4694 4695 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4696 "STRICT", "CORRESPONDING" 4697 ) 4698 if self._match_text_seq("CORRESPONDING"): 4699 by_name = True 4700 if not side and not kind: 4701 kind = "INNER" 4702 4703 on_column_list = None 4704 if by_name and self._match_texts(("ON", "BY")): 4705 on_column_list = self._parse_wrapped_csv(self._parse_column) 4706 4707 expression = self._parse_select(nested=True, parse_set_operation=False) 4708 4709 return self.expression( 4710 operation, 4711 comments=comments, 4712 this=this, 4713 distinct=distinct, 4714 by_name=by_name, 4715 expression=expression, 4716 side=side, 4717 kind=kind, 4718 on=on_column_list, 4719 ) 4720 4721 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4722 while True: 4723 setop = self.parse_set_operation(this) 4724 if not setop: 4725 break 4726 this = setop 4727 4728 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4729 expression = this.expression 4730 4731 if expression: 4732 for arg in self.SET_OP_MODIFIERS: 4733 expr = expression.args.get(arg) 4734 if expr: 4735 this.set(arg, expr.pop()) 4736 4737 return this 4738 4739 def _parse_expression(self) -> t.Optional[exp.Expression]: 4740 return self._parse_alias(self._parse_assignment()) 4741 4742 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4743 this = self._parse_disjunction() 4744 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4745 # This allows us to parse <non-identifier token> := <expr> 4746 this = exp.column( 4747 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4748 ) 4749 4750 while self._match_set(self.ASSIGNMENT): 4751 if isinstance(this, exp.Column) and len(this.parts) == 1: 4752 this = this.this 4753 4754 this = self.expression( 4755 self.ASSIGNMENT[self._prev.token_type], 4756 this=this, 4757 comments=self._prev_comments, 4758 expression=self._parse_assignment(), 4759 ) 4760 4761 return this 4762 4763 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4764 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4765 4766 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4767 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4768 4769 def _parse_equality(self) -> t.Optional[exp.Expression]: 4770 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4771 4772 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4773 return self._parse_tokens(self._parse_range, self.COMPARISON) 4774 4775 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4776 this = this or self._parse_bitwise() 4777 negate = self._match(TokenType.NOT) 4778 4779 if self._match_set(self.RANGE_PARSERS): 4780 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4781 if not expression: 4782 return this 4783 4784 this = expression 4785 elif self._match(TokenType.ISNULL): 4786 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4787 4788 # Postgres supports ISNULL and NOTNULL for conditions. 4789 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4790 if self._match(TokenType.NOTNULL): 4791 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4792 this = self.expression(exp.Not, this=this) 4793 4794 if negate: 4795 this = self._negate_range(this) 4796 4797 if self._match(TokenType.IS): 4798 this = self._parse_is(this) 4799 4800 return this 4801 4802 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4803 if not this: 4804 return this 4805 4806 return self.expression(exp.Not, this=this) 4807 4808 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4809 index = self._index - 1 4810 negate = self._match(TokenType.NOT) 4811 4812 if self._match_text_seq("DISTINCT", "FROM"): 4813 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4814 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4815 4816 if self._match(TokenType.JSON): 4817 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4818 4819 if self._match_text_seq("WITH"): 4820 _with = True 4821 elif self._match_text_seq("WITHOUT"): 4822 _with = False 4823 else: 4824 _with = None 4825 4826 unique = self._match(TokenType.UNIQUE) 4827 self._match_text_seq("KEYS") 4828 expression: t.Optional[exp.Expression] = self.expression( 4829 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4830 ) 4831 else: 4832 expression = self._parse_primary() or self._parse_null() 4833 if not expression: 4834 self._retreat(index) 4835 return None 4836 4837 this = self.expression(exp.Is, this=this, expression=expression) 4838 return self.expression(exp.Not, this=this) if negate else this 4839 4840 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4841 unnest = self._parse_unnest(with_alias=False) 4842 if unnest: 4843 this = self.expression(exp.In, this=this, unnest=unnest) 4844 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4845 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4846 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4847 4848 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4849 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4850 else: 4851 this = self.expression(exp.In, this=this, expressions=expressions) 4852 4853 if matched_l_paren: 4854 self._match_r_paren(this) 4855 elif not self._match(TokenType.R_BRACKET, expression=this): 4856 self.raise_error("Expecting ]") 4857 else: 4858 this = self.expression(exp.In, this=this, field=self._parse_column()) 4859 4860 return this 4861 4862 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4863 low = self._parse_bitwise() 4864 self._match(TokenType.AND) 4865 high = self._parse_bitwise() 4866 return self.expression(exp.Between, this=this, low=low, high=high) 4867 4868 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4869 if not self._match(TokenType.ESCAPE): 4870 return this 4871 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4872 4873 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4874 index = self._index 4875 4876 if not self._match(TokenType.INTERVAL) and match_interval: 4877 return None 4878 4879 if self._match(TokenType.STRING, advance=False): 4880 this = self._parse_primary() 4881 else: 4882 this = self._parse_term() 4883 4884 if not this or ( 4885 isinstance(this, exp.Column) 4886 and not this.table 4887 and not this.this.quoted 4888 and this.name.upper() == "IS" 4889 ): 4890 self._retreat(index) 4891 return None 4892 4893 unit = self._parse_function() or ( 4894 not self._match(TokenType.ALIAS, advance=False) 4895 and self._parse_var(any_token=True, upper=True) 4896 ) 4897 4898 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4899 # each INTERVAL expression into this canonical form so it's easy to transpile 4900 if this and this.is_number: 4901 this = exp.Literal.string(this.to_py()) 4902 elif this and this.is_string: 4903 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4904 if parts and unit: 4905 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4906 unit = None 4907 self._retreat(self._index - 1) 4908 4909 if len(parts) == 1: 4910 this = exp.Literal.string(parts[0][0]) 4911 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4912 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4913 unit = self.expression( 4914 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4915 ) 4916 4917 interval = self.expression(exp.Interval, this=this, unit=unit) 4918 4919 index = self._index 4920 self._match(TokenType.PLUS) 4921 4922 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4923 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4924 return self.expression( 4925 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4926 ) 4927 4928 self._retreat(index) 4929 return interval 4930 4931 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4932 this = self._parse_term() 4933 4934 while True: 4935 if self._match_set(self.BITWISE): 4936 this = self.expression( 4937 self.BITWISE[self._prev.token_type], 4938 this=this, 4939 expression=self._parse_term(), 4940 ) 4941 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4942 this = self.expression( 4943 exp.DPipe, 4944 this=this, 4945 expression=self._parse_term(), 4946 safe=not self.dialect.STRICT_STRING_CONCAT, 4947 ) 4948 elif self._match(TokenType.DQMARK): 4949 this = self.expression( 4950 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4951 ) 4952 elif self._match_pair(TokenType.LT, TokenType.LT): 4953 this = self.expression( 4954 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4955 ) 4956 elif self._match_pair(TokenType.GT, TokenType.GT): 4957 this = self.expression( 4958 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4959 ) 4960 else: 4961 break 4962 4963 return this 4964 4965 def _parse_term(self) -> t.Optional[exp.Expression]: 4966 this = self._parse_factor() 4967 4968 while self._match_set(self.TERM): 4969 klass = self.TERM[self._prev.token_type] 4970 comments = self._prev_comments 4971 expression = self._parse_factor() 4972 4973 this = self.expression(klass, this=this, comments=comments, expression=expression) 4974 4975 if isinstance(this, exp.Collate): 4976 expr = this.expression 4977 4978 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4979 # fallback to Identifier / Var 4980 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4981 ident = expr.this 4982 if isinstance(ident, exp.Identifier): 4983 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4984 4985 return this 4986 4987 def _parse_factor(self) -> t.Optional[exp.Expression]: 4988 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4989 this = parse_method() 4990 4991 while self._match_set(self.FACTOR): 4992 klass = self.FACTOR[self._prev.token_type] 4993 comments = self._prev_comments 4994 expression = parse_method() 4995 4996 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4997 self._retreat(self._index - 1) 4998 return this 4999 5000 this = self.expression(klass, this=this, comments=comments, expression=expression) 5001 5002 if isinstance(this, exp.Div): 5003 this.args["typed"] = self.dialect.TYPED_DIVISION 5004 this.args["safe"] = self.dialect.SAFE_DIVISION 5005 5006 return this 5007 5008 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5009 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5010 5011 def _parse_unary(self) -> t.Optional[exp.Expression]: 5012 if self._match_set(self.UNARY_PARSERS): 5013 return self.UNARY_PARSERS[self._prev.token_type](self) 5014 return self._parse_at_time_zone(self._parse_type()) 5015 5016 def _parse_type( 5017 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5018 ) -> t.Optional[exp.Expression]: 5019 interval = parse_interval and self._parse_interval() 5020 if interval: 5021 return interval 5022 5023 index = self._index 5024 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5025 5026 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5027 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5028 if isinstance(data_type, exp.Cast): 5029 # This constructor can contain ops directly after it, for instance struct unnesting: 5030 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5031 return self._parse_column_ops(data_type) 5032 5033 if data_type: 5034 index2 = self._index 5035 this = self._parse_primary() 5036 5037 if isinstance(this, exp.Literal): 5038 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5039 if parser: 5040 return parser(self, this, data_type) 5041 5042 return self.expression(exp.Cast, this=this, to=data_type) 5043 5044 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5045 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5046 # 5047 # If the index difference here is greater than 1, that means the parser itself must have 5048 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5049 # 5050 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5051 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5052 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5053 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5054 # 5055 # In these cases, we don't really want to return the converted type, but instead retreat 5056 # and try to parse a Column or Identifier in the section below. 5057 if data_type.expressions and index2 - index > 1: 5058 self._retreat(index2) 5059 return self._parse_column_ops(data_type) 5060 5061 self._retreat(index) 5062 5063 if fallback_to_identifier: 5064 return self._parse_id_var() 5065 5066 this = self._parse_column() 5067 return this and self._parse_column_ops(this) 5068 5069 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5070 this = self._parse_type() 5071 if not this: 5072 return None 5073 5074 if isinstance(this, exp.Column) and not this.table: 5075 this = exp.var(this.name.upper()) 5076 5077 return self.expression( 5078 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5079 ) 5080 5081 def _parse_types( 5082 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5083 ) -> t.Optional[exp.Expression]: 5084 index = self._index 5085 5086 this: t.Optional[exp.Expression] = None 5087 prefix = self._match_text_seq("SYSUDTLIB", ".") 5088 5089 if not self._match_set(self.TYPE_TOKENS): 5090 identifier = allow_identifiers and self._parse_id_var( 5091 any_token=False, tokens=(TokenType.VAR,) 5092 ) 5093 if isinstance(identifier, exp.Identifier): 5094 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5095 5096 if len(tokens) != 1: 5097 self.raise_error("Unexpected identifier", self._prev) 5098 5099 if tokens[0].token_type in self.TYPE_TOKENS: 5100 self._prev = tokens[0] 5101 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5102 type_name = identifier.name 5103 5104 while self._match(TokenType.DOT): 5105 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5106 5107 this = exp.DataType.build(type_name, udt=True) 5108 else: 5109 self._retreat(self._index - 1) 5110 return None 5111 else: 5112 return None 5113 5114 type_token = self._prev.token_type 5115 5116 if type_token == TokenType.PSEUDO_TYPE: 5117 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5118 5119 if type_token == TokenType.OBJECT_IDENTIFIER: 5120 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5121 5122 # https://materialize.com/docs/sql/types/map/ 5123 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5124 key_type = self._parse_types( 5125 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5126 ) 5127 if not self._match(TokenType.FARROW): 5128 self._retreat(index) 5129 return None 5130 5131 value_type = self._parse_types( 5132 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5133 ) 5134 if not self._match(TokenType.R_BRACKET): 5135 self._retreat(index) 5136 return None 5137 5138 return exp.DataType( 5139 this=exp.DataType.Type.MAP, 5140 expressions=[key_type, value_type], 5141 nested=True, 5142 prefix=prefix, 5143 ) 5144 5145 nested = type_token in self.NESTED_TYPE_TOKENS 5146 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5147 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5148 expressions = None 5149 maybe_func = False 5150 5151 if self._match(TokenType.L_PAREN): 5152 if is_struct: 5153 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5154 elif nested: 5155 expressions = self._parse_csv( 5156 lambda: self._parse_types( 5157 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5158 ) 5159 ) 5160 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5161 this = expressions[0] 5162 this.set("nullable", True) 5163 self._match_r_paren() 5164 return this 5165 elif type_token in self.ENUM_TYPE_TOKENS: 5166 expressions = self._parse_csv(self._parse_equality) 5167 elif is_aggregate: 5168 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5169 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5170 ) 5171 if not func_or_ident: 5172 return None 5173 expressions = [func_or_ident] 5174 if self._match(TokenType.COMMA): 5175 expressions.extend( 5176 self._parse_csv( 5177 lambda: self._parse_types( 5178 check_func=check_func, 5179 schema=schema, 5180 allow_identifiers=allow_identifiers, 5181 ) 5182 ) 5183 ) 5184 else: 5185 expressions = self._parse_csv(self._parse_type_size) 5186 5187 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5188 if type_token == TokenType.VECTOR and len(expressions) == 2: 5189 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5190 5191 if not expressions or not self._match(TokenType.R_PAREN): 5192 self._retreat(index) 5193 return None 5194 5195 maybe_func = True 5196 5197 values: t.Optional[t.List[exp.Expression]] = None 5198 5199 if nested and self._match(TokenType.LT): 5200 if is_struct: 5201 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5202 else: 5203 expressions = self._parse_csv( 5204 lambda: self._parse_types( 5205 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5206 ) 5207 ) 5208 5209 if not self._match(TokenType.GT): 5210 self.raise_error("Expecting >") 5211 5212 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5213 values = self._parse_csv(self._parse_assignment) 5214 if not values and is_struct: 5215 values = None 5216 self._retreat(self._index - 1) 5217 else: 5218 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5219 5220 if type_token in self.TIMESTAMPS: 5221 if self._match_text_seq("WITH", "TIME", "ZONE"): 5222 maybe_func = False 5223 tz_type = ( 5224 exp.DataType.Type.TIMETZ 5225 if type_token in self.TIMES 5226 else exp.DataType.Type.TIMESTAMPTZ 5227 ) 5228 this = exp.DataType(this=tz_type, expressions=expressions) 5229 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5230 maybe_func = False 5231 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5232 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5233 maybe_func = False 5234 elif type_token == TokenType.INTERVAL: 5235 unit = self._parse_var(upper=True) 5236 if unit: 5237 if self._match_text_seq("TO"): 5238 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5239 5240 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5241 else: 5242 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5243 elif type_token == TokenType.VOID: 5244 this = exp.DataType(this=exp.DataType.Type.NULL) 5245 5246 if maybe_func and check_func: 5247 index2 = self._index 5248 peek = self._parse_string() 5249 5250 if not peek: 5251 self._retreat(index) 5252 return None 5253 5254 self._retreat(index2) 5255 5256 if not this: 5257 if self._match_text_seq("UNSIGNED"): 5258 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5259 if not unsigned_type_token: 5260 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5261 5262 type_token = unsigned_type_token or type_token 5263 5264 this = exp.DataType( 5265 this=exp.DataType.Type[type_token.value], 5266 expressions=expressions, 5267 nested=nested, 5268 prefix=prefix, 5269 ) 5270 5271 # Empty arrays/structs are allowed 5272 if values is not None: 5273 cls = exp.Struct if is_struct else exp.Array 5274 this = exp.cast(cls(expressions=values), this, copy=False) 5275 5276 elif expressions: 5277 this.set("expressions", expressions) 5278 5279 # https://materialize.com/docs/sql/types/list/#type-name 5280 while self._match(TokenType.LIST): 5281 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5282 5283 index = self._index 5284 5285 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5286 matched_array = self._match(TokenType.ARRAY) 5287 5288 while self._curr: 5289 datatype_token = self._prev.token_type 5290 matched_l_bracket = self._match(TokenType.L_BRACKET) 5291 5292 if (not matched_l_bracket and not matched_array) or ( 5293 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5294 ): 5295 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5296 # not to be confused with the fixed size array parsing 5297 break 5298 5299 matched_array = False 5300 values = self._parse_csv(self._parse_assignment) or None 5301 if ( 5302 values 5303 and not schema 5304 and ( 5305 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5306 ) 5307 ): 5308 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5309 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5310 self._retreat(index) 5311 break 5312 5313 this = exp.DataType( 5314 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5315 ) 5316 self._match(TokenType.R_BRACKET) 5317 5318 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5319 converter = self.TYPE_CONVERTERS.get(this.this) 5320 if converter: 5321 this = converter(t.cast(exp.DataType, this)) 5322 5323 return this 5324 5325 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5326 index = self._index 5327 5328 if ( 5329 self._curr 5330 and self._next 5331 and self._curr.token_type in self.TYPE_TOKENS 5332 and self._next.token_type in self.TYPE_TOKENS 5333 ): 5334 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5335 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5336 this = self._parse_id_var() 5337 else: 5338 this = ( 5339 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5340 or self._parse_id_var() 5341 ) 5342 5343 self._match(TokenType.COLON) 5344 5345 if ( 5346 type_required 5347 and not isinstance(this, exp.DataType) 5348 and not self._match_set(self.TYPE_TOKENS, advance=False) 5349 ): 5350 self._retreat(index) 5351 return self._parse_types() 5352 5353 return self._parse_column_def(this) 5354 5355 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5356 if not self._match_text_seq("AT", "TIME", "ZONE"): 5357 return this 5358 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5359 5360 def _parse_column(self) -> t.Optional[exp.Expression]: 5361 this = self._parse_column_reference() 5362 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5363 5364 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5365 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5366 5367 return column 5368 5369 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5370 this = self._parse_field() 5371 if ( 5372 not this 5373 and self._match(TokenType.VALUES, advance=False) 5374 and self.VALUES_FOLLOWED_BY_PAREN 5375 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5376 ): 5377 this = self._parse_id_var() 5378 5379 if isinstance(this, exp.Identifier): 5380 # We bubble up comments from the Identifier to the Column 5381 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5382 5383 return this 5384 5385 def _parse_colon_as_variant_extract( 5386 self, this: t.Optional[exp.Expression] 5387 ) -> t.Optional[exp.Expression]: 5388 casts = [] 5389 json_path = [] 5390 escape = None 5391 5392 while self._match(TokenType.COLON): 5393 start_index = self._index 5394 5395 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5396 path = self._parse_column_ops( 5397 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5398 ) 5399 5400 # The cast :: operator has a lower precedence than the extraction operator :, so 5401 # we rearrange the AST appropriately to avoid casting the JSON path 5402 while isinstance(path, exp.Cast): 5403 casts.append(path.to) 5404 path = path.this 5405 5406 if casts: 5407 dcolon_offset = next( 5408 i 5409 for i, t in enumerate(self._tokens[start_index:]) 5410 if t.token_type == TokenType.DCOLON 5411 ) 5412 end_token = self._tokens[start_index + dcolon_offset - 1] 5413 else: 5414 end_token = self._prev 5415 5416 if path: 5417 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5418 # it'll roundtrip to a string literal in GET_PATH 5419 if isinstance(path, exp.Identifier) and path.quoted: 5420 escape = True 5421 5422 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5423 5424 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5425 # Databricks transforms it back to the colon/dot notation 5426 if json_path: 5427 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5428 5429 if json_path_expr: 5430 json_path_expr.set("escape", escape) 5431 5432 this = self.expression( 5433 exp.JSONExtract, 5434 this=this, 5435 expression=json_path_expr, 5436 variant_extract=True, 5437 ) 5438 5439 while casts: 5440 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5441 5442 return this 5443 5444 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5445 return self._parse_types() 5446 5447 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5448 this = self._parse_bracket(this) 5449 5450 while self._match_set(self.COLUMN_OPERATORS): 5451 op_token = self._prev.token_type 5452 op = self.COLUMN_OPERATORS.get(op_token) 5453 5454 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5455 field = self._parse_dcolon() 5456 if not field: 5457 self.raise_error("Expected type") 5458 elif op and self._curr: 5459 field = self._parse_column_reference() or self._parse_bracket() 5460 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5461 field = self._parse_column_ops(field) 5462 else: 5463 field = self._parse_field(any_token=True, anonymous_func=True) 5464 5465 if isinstance(field, (exp.Func, exp.Window)) and this: 5466 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5467 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5468 this = exp.replace_tree( 5469 this, 5470 lambda n: ( 5471 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5472 if n.table 5473 else n.this 5474 ) 5475 if isinstance(n, exp.Column) 5476 else n, 5477 ) 5478 5479 if op: 5480 this = op(self, this, field) 5481 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5482 this = self.expression( 5483 exp.Column, 5484 comments=this.comments, 5485 this=field, 5486 table=this.this, 5487 db=this.args.get("table"), 5488 catalog=this.args.get("db"), 5489 ) 5490 elif isinstance(field, exp.Window): 5491 # Move the exp.Dot's to the window's function 5492 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5493 field.set("this", window_func) 5494 this = field 5495 else: 5496 this = self.expression(exp.Dot, this=this, expression=field) 5497 5498 if field and field.comments: 5499 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5500 5501 this = self._parse_bracket(this) 5502 5503 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5504 5505 def _parse_primary(self) -> t.Optional[exp.Expression]: 5506 if self._match_set(self.PRIMARY_PARSERS): 5507 token_type = self._prev.token_type 5508 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5509 5510 if token_type == TokenType.STRING: 5511 expressions = [primary] 5512 while self._match(TokenType.STRING): 5513 expressions.append(exp.Literal.string(self._prev.text)) 5514 5515 if len(expressions) > 1: 5516 return self.expression(exp.Concat, expressions=expressions) 5517 5518 return primary 5519 5520 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5521 return exp.Literal.number(f"0.{self._prev.text}") 5522 5523 if self._match(TokenType.L_PAREN): 5524 comments = self._prev_comments 5525 query = self._parse_select() 5526 5527 if query: 5528 expressions = [query] 5529 else: 5530 expressions = self._parse_expressions() 5531 5532 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5533 5534 if not this and self._match(TokenType.R_PAREN, advance=False): 5535 this = self.expression(exp.Tuple) 5536 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5537 this = self._parse_subquery(this=this, parse_alias=False) 5538 elif isinstance(this, exp.Subquery): 5539 this = self._parse_subquery( 5540 this=self._parse_set_operations(this), parse_alias=False 5541 ) 5542 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5543 this = self.expression(exp.Tuple, expressions=expressions) 5544 else: 5545 this = self.expression(exp.Paren, this=this) 5546 5547 if this: 5548 this.add_comments(comments) 5549 5550 self._match_r_paren(expression=this) 5551 return this 5552 5553 return None 5554 5555 def _parse_field( 5556 self, 5557 any_token: bool = False, 5558 tokens: t.Optional[t.Collection[TokenType]] = None, 5559 anonymous_func: bool = False, 5560 ) -> t.Optional[exp.Expression]: 5561 if anonymous_func: 5562 field = ( 5563 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5564 or self._parse_primary() 5565 ) 5566 else: 5567 field = self._parse_primary() or self._parse_function( 5568 anonymous=anonymous_func, any_token=any_token 5569 ) 5570 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5571 5572 def _parse_function( 5573 self, 5574 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5575 anonymous: bool = False, 5576 optional_parens: bool = True, 5577 any_token: bool = False, 5578 ) -> t.Optional[exp.Expression]: 5579 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5580 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5581 fn_syntax = False 5582 if ( 5583 self._match(TokenType.L_BRACE, advance=False) 5584 and self._next 5585 and self._next.text.upper() == "FN" 5586 ): 5587 self._advance(2) 5588 fn_syntax = True 5589 5590 func = self._parse_function_call( 5591 functions=functions, 5592 anonymous=anonymous, 5593 optional_parens=optional_parens, 5594 any_token=any_token, 5595 ) 5596 5597 if fn_syntax: 5598 self._match(TokenType.R_BRACE) 5599 5600 return func 5601 5602 def _parse_function_call( 5603 self, 5604 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5605 anonymous: bool = False, 5606 optional_parens: bool = True, 5607 any_token: bool = False, 5608 ) -> t.Optional[exp.Expression]: 5609 if not self._curr: 5610 return None 5611 5612 comments = self._curr.comments 5613 token_type = self._curr.token_type 5614 this = self._curr.text 5615 upper = this.upper() 5616 5617 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5618 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5619 self._advance() 5620 return self._parse_window(parser(self)) 5621 5622 if not self._next or self._next.token_type != TokenType.L_PAREN: 5623 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5624 self._advance() 5625 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5626 5627 return None 5628 5629 if any_token: 5630 if token_type in self.RESERVED_TOKENS: 5631 return None 5632 elif token_type not in self.FUNC_TOKENS: 5633 return None 5634 5635 self._advance(2) 5636 5637 parser = self.FUNCTION_PARSERS.get(upper) 5638 if parser and not anonymous: 5639 this = parser(self) 5640 else: 5641 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5642 5643 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5644 this = self.expression( 5645 subquery_predicate, comments=comments, this=self._parse_select() 5646 ) 5647 self._match_r_paren() 5648 return this 5649 5650 if functions is None: 5651 functions = self.FUNCTIONS 5652 5653 function = functions.get(upper) 5654 known_function = function and not anonymous 5655 5656 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5657 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5658 5659 post_func_comments = self._curr and self._curr.comments 5660 if known_function and post_func_comments: 5661 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5662 # call we'll construct it as exp.Anonymous, even if it's "known" 5663 if any( 5664 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5665 for comment in post_func_comments 5666 ): 5667 known_function = False 5668 5669 if alias and known_function: 5670 args = self._kv_to_prop_eq(args) 5671 5672 if known_function: 5673 func_builder = t.cast(t.Callable, function) 5674 5675 if "dialect" in func_builder.__code__.co_varnames: 5676 func = func_builder(args, dialect=self.dialect) 5677 else: 5678 func = func_builder(args) 5679 5680 func = self.validate_expression(func, args) 5681 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5682 func.meta["name"] = this 5683 5684 this = func 5685 else: 5686 if token_type == TokenType.IDENTIFIER: 5687 this = exp.Identifier(this=this, quoted=True) 5688 this = self.expression(exp.Anonymous, this=this, expressions=args) 5689 5690 if isinstance(this, exp.Expression): 5691 this.add_comments(comments) 5692 5693 self._match_r_paren(this) 5694 return self._parse_window(this) 5695 5696 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5697 return expression 5698 5699 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5700 transformed = [] 5701 5702 for index, e in enumerate(expressions): 5703 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5704 if isinstance(e, exp.Alias): 5705 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5706 5707 if not isinstance(e, exp.PropertyEQ): 5708 e = self.expression( 5709 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5710 ) 5711 5712 if isinstance(e.this, exp.Column): 5713 e.this.replace(e.this.this) 5714 else: 5715 e = self._to_prop_eq(e, index) 5716 5717 transformed.append(e) 5718 5719 return transformed 5720 5721 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5722 return self._parse_statement() 5723 5724 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5725 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5726 5727 def _parse_user_defined_function( 5728 self, kind: t.Optional[TokenType] = None 5729 ) -> t.Optional[exp.Expression]: 5730 this = self._parse_table_parts(schema=True) 5731 5732 if not self._match(TokenType.L_PAREN): 5733 return this 5734 5735 expressions = self._parse_csv(self._parse_function_parameter) 5736 self._match_r_paren() 5737 return self.expression( 5738 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5739 ) 5740 5741 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5742 literal = self._parse_primary() 5743 if literal: 5744 return self.expression(exp.Introducer, this=token.text, expression=literal) 5745 5746 return self.expression(exp.Identifier, this=token.text) 5747 5748 def _parse_session_parameter(self) -> exp.SessionParameter: 5749 kind = None 5750 this = self._parse_id_var() or self._parse_primary() 5751 5752 if this and self._match(TokenType.DOT): 5753 kind = this.name 5754 this = self._parse_var() or self._parse_primary() 5755 5756 return self.expression(exp.SessionParameter, this=this, kind=kind) 5757 5758 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5759 return self._parse_id_var() 5760 5761 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5762 index = self._index 5763 5764 if self._match(TokenType.L_PAREN): 5765 expressions = t.cast( 5766 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5767 ) 5768 5769 if not self._match(TokenType.R_PAREN): 5770 self._retreat(index) 5771 else: 5772 expressions = [self._parse_lambda_arg()] 5773 5774 if self._match_set(self.LAMBDAS): 5775 return self.LAMBDAS[self._prev.token_type](self, expressions) 5776 5777 self._retreat(index) 5778 5779 this: t.Optional[exp.Expression] 5780 5781 if self._match(TokenType.DISTINCT): 5782 this = self.expression( 5783 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5784 ) 5785 else: 5786 this = self._parse_select_or_expression(alias=alias) 5787 5788 return self._parse_limit( 5789 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5790 ) 5791 5792 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5793 index = self._index 5794 if not self._match(TokenType.L_PAREN): 5795 return this 5796 5797 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5798 # expr can be of both types 5799 if self._match_set(self.SELECT_START_TOKENS): 5800 self._retreat(index) 5801 return this 5802 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5803 self._match_r_paren() 5804 return self.expression(exp.Schema, this=this, expressions=args) 5805 5806 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5807 return self._parse_column_def(self._parse_field(any_token=True)) 5808 5809 def _parse_column_def( 5810 self, this: t.Optional[exp.Expression], computed_column: bool = True 5811 ) -> t.Optional[exp.Expression]: 5812 # column defs are not really columns, they're identifiers 5813 if isinstance(this, exp.Column): 5814 this = this.this 5815 5816 if not computed_column: 5817 self._match(TokenType.ALIAS) 5818 5819 kind = self._parse_types(schema=True) 5820 5821 if self._match_text_seq("FOR", "ORDINALITY"): 5822 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5823 5824 constraints: t.List[exp.Expression] = [] 5825 5826 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5827 ("ALIAS", "MATERIALIZED") 5828 ): 5829 persisted = self._prev.text.upper() == "MATERIALIZED" 5830 constraint_kind = exp.ComputedColumnConstraint( 5831 this=self._parse_assignment(), 5832 persisted=persisted or self._match_text_seq("PERSISTED"), 5833 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5834 ) 5835 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5836 elif ( 5837 kind 5838 and self._match(TokenType.ALIAS, advance=False) 5839 and ( 5840 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5841 or (self._next and self._next.token_type == TokenType.L_PAREN) 5842 ) 5843 ): 5844 self._advance() 5845 constraints.append( 5846 self.expression( 5847 exp.ColumnConstraint, 5848 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5849 ) 5850 ) 5851 5852 while True: 5853 constraint = self._parse_column_constraint() 5854 if not constraint: 5855 break 5856 constraints.append(constraint) 5857 5858 if not kind and not constraints: 5859 return this 5860 5861 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5862 5863 def _parse_auto_increment( 5864 self, 5865 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5866 start = None 5867 increment = None 5868 5869 if self._match(TokenType.L_PAREN, advance=False): 5870 args = self._parse_wrapped_csv(self._parse_bitwise) 5871 start = seq_get(args, 0) 5872 increment = seq_get(args, 1) 5873 elif self._match_text_seq("START"): 5874 start = self._parse_bitwise() 5875 self._match_text_seq("INCREMENT") 5876 increment = self._parse_bitwise() 5877 5878 if start and increment: 5879 return exp.GeneratedAsIdentityColumnConstraint( 5880 start=start, increment=increment, this=False 5881 ) 5882 5883 return exp.AutoIncrementColumnConstraint() 5884 5885 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5886 if not self._match_text_seq("REFRESH"): 5887 self._retreat(self._index - 1) 5888 return None 5889 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5890 5891 def _parse_compress(self) -> exp.CompressColumnConstraint: 5892 if self._match(TokenType.L_PAREN, advance=False): 5893 return self.expression( 5894 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5895 ) 5896 5897 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5898 5899 def _parse_generated_as_identity( 5900 self, 5901 ) -> ( 5902 exp.GeneratedAsIdentityColumnConstraint 5903 | exp.ComputedColumnConstraint 5904 | exp.GeneratedAsRowColumnConstraint 5905 ): 5906 if self._match_text_seq("BY", "DEFAULT"): 5907 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5908 this = self.expression( 5909 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5910 ) 5911 else: 5912 self._match_text_seq("ALWAYS") 5913 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5914 5915 self._match(TokenType.ALIAS) 5916 5917 if self._match_text_seq("ROW"): 5918 start = self._match_text_seq("START") 5919 if not start: 5920 self._match(TokenType.END) 5921 hidden = self._match_text_seq("HIDDEN") 5922 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5923 5924 identity = self._match_text_seq("IDENTITY") 5925 5926 if self._match(TokenType.L_PAREN): 5927 if self._match(TokenType.START_WITH): 5928 this.set("start", self._parse_bitwise()) 5929 if self._match_text_seq("INCREMENT", "BY"): 5930 this.set("increment", self._parse_bitwise()) 5931 if self._match_text_seq("MINVALUE"): 5932 this.set("minvalue", self._parse_bitwise()) 5933 if self._match_text_seq("MAXVALUE"): 5934 this.set("maxvalue", self._parse_bitwise()) 5935 5936 if self._match_text_seq("CYCLE"): 5937 this.set("cycle", True) 5938 elif self._match_text_seq("NO", "CYCLE"): 5939 this.set("cycle", False) 5940 5941 if not identity: 5942 this.set("expression", self._parse_range()) 5943 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5944 args = self._parse_csv(self._parse_bitwise) 5945 this.set("start", seq_get(args, 0)) 5946 this.set("increment", seq_get(args, 1)) 5947 5948 self._match_r_paren() 5949 5950 return this 5951 5952 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5953 self._match_text_seq("LENGTH") 5954 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5955 5956 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5957 if self._match_text_seq("NULL"): 5958 return self.expression(exp.NotNullColumnConstraint) 5959 if self._match_text_seq("CASESPECIFIC"): 5960 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5961 if self._match_text_seq("FOR", "REPLICATION"): 5962 return self.expression(exp.NotForReplicationColumnConstraint) 5963 5964 # Unconsume the `NOT` token 5965 self._retreat(self._index - 1) 5966 return None 5967 5968 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5969 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5970 5971 procedure_option_follows = ( 5972 self._match(TokenType.WITH, advance=False) 5973 and self._next 5974 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5975 ) 5976 5977 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5978 return self.expression( 5979 exp.ColumnConstraint, 5980 this=this, 5981 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5982 ) 5983 5984 return this 5985 5986 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5987 if not self._match(TokenType.CONSTRAINT): 5988 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5989 5990 return self.expression( 5991 exp.Constraint, 5992 this=self._parse_id_var(), 5993 expressions=self._parse_unnamed_constraints(), 5994 ) 5995 5996 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5997 constraints = [] 5998 while True: 5999 constraint = self._parse_unnamed_constraint() or self._parse_function() 6000 if not constraint: 6001 break 6002 constraints.append(constraint) 6003 6004 return constraints 6005 6006 def _parse_unnamed_constraint( 6007 self, constraints: t.Optional[t.Collection[str]] = None 6008 ) -> t.Optional[exp.Expression]: 6009 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6010 constraints or self.CONSTRAINT_PARSERS 6011 ): 6012 return None 6013 6014 constraint = self._prev.text.upper() 6015 if constraint not in self.CONSTRAINT_PARSERS: 6016 self.raise_error(f"No parser found for schema constraint {constraint}.") 6017 6018 return self.CONSTRAINT_PARSERS[constraint](self) 6019 6020 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6021 return self._parse_id_var(any_token=False) 6022 6023 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6024 self._match_text_seq("KEY") 6025 return self.expression( 6026 exp.UniqueColumnConstraint, 6027 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6028 this=self._parse_schema(self._parse_unique_key()), 6029 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6030 on_conflict=self._parse_on_conflict(), 6031 options=self._parse_key_constraint_options(), 6032 ) 6033 6034 def _parse_key_constraint_options(self) -> t.List[str]: 6035 options = [] 6036 while True: 6037 if not self._curr: 6038 break 6039 6040 if self._match(TokenType.ON): 6041 action = None 6042 on = self._advance_any() and self._prev.text 6043 6044 if self._match_text_seq("NO", "ACTION"): 6045 action = "NO ACTION" 6046 elif self._match_text_seq("CASCADE"): 6047 action = "CASCADE" 6048 elif self._match_text_seq("RESTRICT"): 6049 action = "RESTRICT" 6050 elif self._match_pair(TokenType.SET, TokenType.NULL): 6051 action = "SET NULL" 6052 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6053 action = "SET DEFAULT" 6054 else: 6055 self.raise_error("Invalid key constraint") 6056 6057 options.append(f"ON {on} {action}") 6058 else: 6059 var = self._parse_var_from_options( 6060 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6061 ) 6062 if not var: 6063 break 6064 options.append(var.name) 6065 6066 return options 6067 6068 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6069 if match and not self._match(TokenType.REFERENCES): 6070 return None 6071 6072 expressions = None 6073 this = self._parse_table(schema=True) 6074 options = self._parse_key_constraint_options() 6075 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6076 6077 def _parse_foreign_key(self) -> exp.ForeignKey: 6078 expressions = self._parse_wrapped_id_vars() 6079 reference = self._parse_references() 6080 on_options = {} 6081 6082 while self._match(TokenType.ON): 6083 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6084 self.raise_error("Expected DELETE or UPDATE") 6085 6086 kind = self._prev.text.lower() 6087 6088 if self._match_text_seq("NO", "ACTION"): 6089 action = "NO ACTION" 6090 elif self._match(TokenType.SET): 6091 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6092 action = "SET " + self._prev.text.upper() 6093 else: 6094 self._advance() 6095 action = self._prev.text.upper() 6096 6097 on_options[kind] = action 6098 6099 return self.expression( 6100 exp.ForeignKey, 6101 expressions=expressions, 6102 reference=reference, 6103 options=self._parse_key_constraint_options(), 6104 **on_options, # type: ignore 6105 ) 6106 6107 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6108 return self._parse_ordered() or self._parse_field() 6109 6110 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6111 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6112 self._retreat(self._index - 1) 6113 return None 6114 6115 id_vars = self._parse_wrapped_id_vars() 6116 return self.expression( 6117 exp.PeriodForSystemTimeConstraint, 6118 this=seq_get(id_vars, 0), 6119 expression=seq_get(id_vars, 1), 6120 ) 6121 6122 def _parse_primary_key( 6123 self, wrapped_optional: bool = False, in_props: bool = False 6124 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6125 desc = ( 6126 self._match_set((TokenType.ASC, TokenType.DESC)) 6127 and self._prev.token_type == TokenType.DESC 6128 ) 6129 6130 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6131 return self.expression( 6132 exp.PrimaryKeyColumnConstraint, 6133 desc=desc, 6134 options=self._parse_key_constraint_options(), 6135 ) 6136 6137 expressions = self._parse_wrapped_csv( 6138 self._parse_primary_key_part, optional=wrapped_optional 6139 ) 6140 options = self._parse_key_constraint_options() 6141 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6142 6143 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6144 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6145 6146 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6147 """ 6148 Parses a datetime column in ODBC format. We parse the column into the corresponding 6149 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6150 same as we did for `DATE('yyyy-mm-dd')`. 6151 6152 Reference: 6153 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6154 """ 6155 self._match(TokenType.VAR) 6156 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6157 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6158 if not self._match(TokenType.R_BRACE): 6159 self.raise_error("Expected }") 6160 return expression 6161 6162 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6163 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6164 return this 6165 6166 bracket_kind = self._prev.token_type 6167 if ( 6168 bracket_kind == TokenType.L_BRACE 6169 and self._curr 6170 and self._curr.token_type == TokenType.VAR 6171 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6172 ): 6173 return self._parse_odbc_datetime_literal() 6174 6175 expressions = self._parse_csv( 6176 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6177 ) 6178 6179 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6180 self.raise_error("Expected ]") 6181 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6182 self.raise_error("Expected }") 6183 6184 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6185 if bracket_kind == TokenType.L_BRACE: 6186 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6187 elif not this: 6188 this = build_array_constructor( 6189 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6190 ) 6191 else: 6192 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6193 if constructor_type: 6194 return build_array_constructor( 6195 constructor_type, 6196 args=expressions, 6197 bracket_kind=bracket_kind, 6198 dialect=self.dialect, 6199 ) 6200 6201 expressions = apply_index_offset( 6202 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6203 ) 6204 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6205 6206 self._add_comments(this) 6207 return self._parse_bracket(this) 6208 6209 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6210 if self._match(TokenType.COLON): 6211 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6212 return this 6213 6214 def _parse_case(self) -> t.Optional[exp.Expression]: 6215 ifs = [] 6216 default = None 6217 6218 comments = self._prev_comments 6219 expression = self._parse_assignment() 6220 6221 while self._match(TokenType.WHEN): 6222 this = self._parse_assignment() 6223 self._match(TokenType.THEN) 6224 then = self._parse_assignment() 6225 ifs.append(self.expression(exp.If, this=this, true=then)) 6226 6227 if self._match(TokenType.ELSE): 6228 default = self._parse_assignment() 6229 6230 if not self._match(TokenType.END): 6231 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6232 default = exp.column("interval") 6233 else: 6234 self.raise_error("Expected END after CASE", self._prev) 6235 6236 return self.expression( 6237 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6238 ) 6239 6240 def _parse_if(self) -> t.Optional[exp.Expression]: 6241 if self._match(TokenType.L_PAREN): 6242 args = self._parse_csv( 6243 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6244 ) 6245 this = self.validate_expression(exp.If.from_arg_list(args), args) 6246 self._match_r_paren() 6247 else: 6248 index = self._index - 1 6249 6250 if self.NO_PAREN_IF_COMMANDS and index == 0: 6251 return self._parse_as_command(self._prev) 6252 6253 condition = self._parse_assignment() 6254 6255 if not condition: 6256 self._retreat(index) 6257 return None 6258 6259 self._match(TokenType.THEN) 6260 true = self._parse_assignment() 6261 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6262 self._match(TokenType.END) 6263 this = self.expression(exp.If, this=condition, true=true, false=false) 6264 6265 return this 6266 6267 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6268 if not self._match_text_seq("VALUE", "FOR"): 6269 self._retreat(self._index - 1) 6270 return None 6271 6272 return self.expression( 6273 exp.NextValueFor, 6274 this=self._parse_column(), 6275 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6276 ) 6277 6278 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6279 this = self._parse_function() or self._parse_var_or_string(upper=True) 6280 6281 if self._match(TokenType.FROM): 6282 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6283 6284 if not self._match(TokenType.COMMA): 6285 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6286 6287 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6288 6289 def _parse_gap_fill(self) -> exp.GapFill: 6290 self._match(TokenType.TABLE) 6291 this = self._parse_table() 6292 6293 self._match(TokenType.COMMA) 6294 args = [this, *self._parse_csv(self._parse_lambda)] 6295 6296 gap_fill = exp.GapFill.from_arg_list(args) 6297 return self.validate_expression(gap_fill, args) 6298 6299 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6300 this = self._parse_assignment() 6301 6302 if not self._match(TokenType.ALIAS): 6303 if self._match(TokenType.COMMA): 6304 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6305 6306 self.raise_error("Expected AS after CAST") 6307 6308 fmt = None 6309 to = self._parse_types() 6310 6311 default = self._match(TokenType.DEFAULT) 6312 if default: 6313 default = self._parse_bitwise() 6314 self._match_text_seq("ON", "CONVERSION", "ERROR") 6315 6316 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6317 fmt_string = self._parse_string() 6318 fmt = self._parse_at_time_zone(fmt_string) 6319 6320 if not to: 6321 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6322 if to.this in exp.DataType.TEMPORAL_TYPES: 6323 this = self.expression( 6324 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6325 this=this, 6326 format=exp.Literal.string( 6327 format_time( 6328 fmt_string.this if fmt_string else "", 6329 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6330 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6331 ) 6332 ), 6333 safe=safe, 6334 ) 6335 6336 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6337 this.set("zone", fmt.args["zone"]) 6338 return this 6339 elif not to: 6340 self.raise_error("Expected TYPE after CAST") 6341 elif isinstance(to, exp.Identifier): 6342 to = exp.DataType.build(to.name, udt=True) 6343 elif to.this == exp.DataType.Type.CHAR: 6344 if self._match(TokenType.CHARACTER_SET): 6345 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6346 6347 return self.expression( 6348 exp.Cast if strict else exp.TryCast, 6349 this=this, 6350 to=to, 6351 format=fmt, 6352 safe=safe, 6353 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6354 default=default, 6355 ) 6356 6357 def _parse_string_agg(self) -> exp.GroupConcat: 6358 if self._match(TokenType.DISTINCT): 6359 args: t.List[t.Optional[exp.Expression]] = [ 6360 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6361 ] 6362 if self._match(TokenType.COMMA): 6363 args.extend(self._parse_csv(self._parse_assignment)) 6364 else: 6365 args = self._parse_csv(self._parse_assignment) # type: ignore 6366 6367 if self._match_text_seq("ON", "OVERFLOW"): 6368 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6369 if self._match_text_seq("ERROR"): 6370 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6371 else: 6372 self._match_text_seq("TRUNCATE") 6373 on_overflow = self.expression( 6374 exp.OverflowTruncateBehavior, 6375 this=self._parse_string(), 6376 with_count=( 6377 self._match_text_seq("WITH", "COUNT") 6378 or not self._match_text_seq("WITHOUT", "COUNT") 6379 ), 6380 ) 6381 else: 6382 on_overflow = None 6383 6384 index = self._index 6385 if not self._match(TokenType.R_PAREN) and args: 6386 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6387 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6388 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6389 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6390 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6391 6392 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6393 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6394 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6395 if not self._match_text_seq("WITHIN", "GROUP"): 6396 self._retreat(index) 6397 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6398 6399 # The corresponding match_r_paren will be called in parse_function (caller) 6400 self._match_l_paren() 6401 6402 return self.expression( 6403 exp.GroupConcat, 6404 this=self._parse_order(this=seq_get(args, 0)), 6405 separator=seq_get(args, 1), 6406 on_overflow=on_overflow, 6407 ) 6408 6409 def _parse_convert( 6410 self, strict: bool, safe: t.Optional[bool] = None 6411 ) -> t.Optional[exp.Expression]: 6412 this = self._parse_bitwise() 6413 6414 if self._match(TokenType.USING): 6415 to: t.Optional[exp.Expression] = self.expression( 6416 exp.CharacterSet, this=self._parse_var() 6417 ) 6418 elif self._match(TokenType.COMMA): 6419 to = self._parse_types() 6420 else: 6421 to = None 6422 6423 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6424 6425 def _parse_xml_table(self) -> exp.XMLTable: 6426 namespaces = None 6427 passing = None 6428 columns = None 6429 6430 if self._match_text_seq("XMLNAMESPACES", "("): 6431 namespaces = self._parse_xml_namespace() 6432 self._match_text_seq(")", ",") 6433 6434 this = self._parse_string() 6435 6436 if self._match_text_seq("PASSING"): 6437 # The BY VALUE keywords are optional and are provided for semantic clarity 6438 self._match_text_seq("BY", "VALUE") 6439 passing = self._parse_csv(self._parse_column) 6440 6441 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6442 6443 if self._match_text_seq("COLUMNS"): 6444 columns = self._parse_csv(self._parse_field_def) 6445 6446 return self.expression( 6447 exp.XMLTable, 6448 this=this, 6449 namespaces=namespaces, 6450 passing=passing, 6451 columns=columns, 6452 by_ref=by_ref, 6453 ) 6454 6455 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6456 namespaces = [] 6457 6458 while True: 6459 if self._match(TokenType.DEFAULT): 6460 uri = self._parse_string() 6461 else: 6462 uri = self._parse_alias(self._parse_string()) 6463 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6464 if not self._match(TokenType.COMMA): 6465 break 6466 6467 return namespaces 6468 6469 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6470 """ 6471 There are generally two variants of the DECODE function: 6472 6473 - DECODE(bin, charset) 6474 - DECODE(expression, search, result [, search, result] ... [, default]) 6475 6476 The second variant will always be parsed into a CASE expression. Note that NULL 6477 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6478 instead of relying on pattern matching. 6479 """ 6480 args = self._parse_csv(self._parse_assignment) 6481 6482 if len(args) < 3: 6483 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6484 6485 expression, *expressions = args 6486 if not expression: 6487 return None 6488 6489 ifs = [] 6490 for search, result in zip(expressions[::2], expressions[1::2]): 6491 if not search or not result: 6492 return None 6493 6494 if isinstance(search, exp.Literal): 6495 ifs.append( 6496 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6497 ) 6498 elif isinstance(search, exp.Null): 6499 ifs.append( 6500 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6501 ) 6502 else: 6503 cond = exp.or_( 6504 exp.EQ(this=expression.copy(), expression=search), 6505 exp.and_( 6506 exp.Is(this=expression.copy(), expression=exp.Null()), 6507 exp.Is(this=search.copy(), expression=exp.Null()), 6508 copy=False, 6509 ), 6510 copy=False, 6511 ) 6512 ifs.append(exp.If(this=cond, true=result)) 6513 6514 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6515 6516 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6517 self._match_text_seq("KEY") 6518 key = self._parse_column() 6519 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6520 self._match_text_seq("VALUE") 6521 value = self._parse_bitwise() 6522 6523 if not key and not value: 6524 return None 6525 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6526 6527 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6528 if not this or not self._match_text_seq("FORMAT", "JSON"): 6529 return this 6530 6531 return self.expression(exp.FormatJson, this=this) 6532 6533 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6534 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6535 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6536 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6537 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6538 else: 6539 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6540 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6541 6542 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6543 6544 if not empty and not error and not null: 6545 return None 6546 6547 return self.expression( 6548 exp.OnCondition, 6549 empty=empty, 6550 error=error, 6551 null=null, 6552 ) 6553 6554 def _parse_on_handling( 6555 self, on: str, *values: str 6556 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6557 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6558 for value in values: 6559 if self._match_text_seq(value, "ON", on): 6560 return f"{value} ON {on}" 6561 6562 index = self._index 6563 if self._match(TokenType.DEFAULT): 6564 default_value = self._parse_bitwise() 6565 if self._match_text_seq("ON", on): 6566 return default_value 6567 6568 self._retreat(index) 6569 6570 return None 6571 6572 @t.overload 6573 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6574 6575 @t.overload 6576 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6577 6578 def _parse_json_object(self, agg=False): 6579 star = self._parse_star() 6580 expressions = ( 6581 [star] 6582 if star 6583 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6584 ) 6585 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6586 6587 unique_keys = None 6588 if self._match_text_seq("WITH", "UNIQUE"): 6589 unique_keys = True 6590 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6591 unique_keys = False 6592 6593 self._match_text_seq("KEYS") 6594 6595 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6596 self._parse_type() 6597 ) 6598 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6599 6600 return self.expression( 6601 exp.JSONObjectAgg if agg else exp.JSONObject, 6602 expressions=expressions, 6603 null_handling=null_handling, 6604 unique_keys=unique_keys, 6605 return_type=return_type, 6606 encoding=encoding, 6607 ) 6608 6609 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6610 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6611 if not self._match_text_seq("NESTED"): 6612 this = self._parse_id_var() 6613 kind = self._parse_types(allow_identifiers=False) 6614 nested = None 6615 else: 6616 this = None 6617 kind = None 6618 nested = True 6619 6620 path = self._match_text_seq("PATH") and self._parse_string() 6621 nested_schema = nested and self._parse_json_schema() 6622 6623 return self.expression( 6624 exp.JSONColumnDef, 6625 this=this, 6626 kind=kind, 6627 path=path, 6628 nested_schema=nested_schema, 6629 ) 6630 6631 def _parse_json_schema(self) -> exp.JSONSchema: 6632 self._match_text_seq("COLUMNS") 6633 return self.expression( 6634 exp.JSONSchema, 6635 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6636 ) 6637 6638 def _parse_json_table(self) -> exp.JSONTable: 6639 this = self._parse_format_json(self._parse_bitwise()) 6640 path = self._match(TokenType.COMMA) and self._parse_string() 6641 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6642 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6643 schema = self._parse_json_schema() 6644 6645 return exp.JSONTable( 6646 this=this, 6647 schema=schema, 6648 path=path, 6649 error_handling=error_handling, 6650 empty_handling=empty_handling, 6651 ) 6652 6653 def _parse_match_against(self) -> exp.MatchAgainst: 6654 expressions = self._parse_csv(self._parse_column) 6655 6656 self._match_text_seq(")", "AGAINST", "(") 6657 6658 this = self._parse_string() 6659 6660 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6661 modifier = "IN NATURAL LANGUAGE MODE" 6662 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6663 modifier = f"{modifier} WITH QUERY EXPANSION" 6664 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6665 modifier = "IN BOOLEAN MODE" 6666 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6667 modifier = "WITH QUERY EXPANSION" 6668 else: 6669 modifier = None 6670 6671 return self.expression( 6672 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6673 ) 6674 6675 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6676 def _parse_open_json(self) -> exp.OpenJSON: 6677 this = self._parse_bitwise() 6678 path = self._match(TokenType.COMMA) and self._parse_string() 6679 6680 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6681 this = self._parse_field(any_token=True) 6682 kind = self._parse_types() 6683 path = self._parse_string() 6684 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6685 6686 return self.expression( 6687 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6688 ) 6689 6690 expressions = None 6691 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6692 self._match_l_paren() 6693 expressions = self._parse_csv(_parse_open_json_column_def) 6694 6695 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6696 6697 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6698 args = self._parse_csv(self._parse_bitwise) 6699 6700 if self._match(TokenType.IN): 6701 return self.expression( 6702 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6703 ) 6704 6705 if haystack_first: 6706 haystack = seq_get(args, 0) 6707 needle = seq_get(args, 1) 6708 else: 6709 haystack = seq_get(args, 1) 6710 needle = seq_get(args, 0) 6711 6712 return self.expression( 6713 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6714 ) 6715 6716 def _parse_predict(self) -> exp.Predict: 6717 self._match_text_seq("MODEL") 6718 this = self._parse_table() 6719 6720 self._match(TokenType.COMMA) 6721 self._match_text_seq("TABLE") 6722 6723 return self.expression( 6724 exp.Predict, 6725 this=this, 6726 expression=self._parse_table(), 6727 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6728 ) 6729 6730 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6731 args = self._parse_csv(self._parse_table) 6732 return exp.JoinHint(this=func_name.upper(), expressions=args) 6733 6734 def _parse_substring(self) -> exp.Substring: 6735 # Postgres supports the form: substring(string [from int] [for int]) 6736 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6737 6738 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6739 6740 if self._match(TokenType.FROM): 6741 args.append(self._parse_bitwise()) 6742 if self._match(TokenType.FOR): 6743 if len(args) == 1: 6744 args.append(exp.Literal.number(1)) 6745 args.append(self._parse_bitwise()) 6746 6747 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6748 6749 def _parse_trim(self) -> exp.Trim: 6750 # https://www.w3resource.com/sql/character-functions/trim.php 6751 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6752 6753 position = None 6754 collation = None 6755 expression = None 6756 6757 if self._match_texts(self.TRIM_TYPES): 6758 position = self._prev.text.upper() 6759 6760 this = self._parse_bitwise() 6761 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6762 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6763 expression = self._parse_bitwise() 6764 6765 if invert_order: 6766 this, expression = expression, this 6767 6768 if self._match(TokenType.COLLATE): 6769 collation = self._parse_bitwise() 6770 6771 return self.expression( 6772 exp.Trim, this=this, position=position, expression=expression, collation=collation 6773 ) 6774 6775 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6776 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6777 6778 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6779 return self._parse_window(self._parse_id_var(), alias=True) 6780 6781 def _parse_respect_or_ignore_nulls( 6782 self, this: t.Optional[exp.Expression] 6783 ) -> t.Optional[exp.Expression]: 6784 if self._match_text_seq("IGNORE", "NULLS"): 6785 return self.expression(exp.IgnoreNulls, this=this) 6786 if self._match_text_seq("RESPECT", "NULLS"): 6787 return self.expression(exp.RespectNulls, this=this) 6788 return this 6789 6790 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6791 if self._match(TokenType.HAVING): 6792 self._match_texts(("MAX", "MIN")) 6793 max = self._prev.text.upper() != "MIN" 6794 return self.expression( 6795 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6796 ) 6797 6798 return this 6799 6800 def _parse_window( 6801 self, this: t.Optional[exp.Expression], alias: bool = False 6802 ) -> t.Optional[exp.Expression]: 6803 func = this 6804 comments = func.comments if isinstance(func, exp.Expression) else None 6805 6806 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6807 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6808 if self._match_text_seq("WITHIN", "GROUP"): 6809 order = self._parse_wrapped(self._parse_order) 6810 this = self.expression(exp.WithinGroup, this=this, expression=order) 6811 6812 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6813 self._match(TokenType.WHERE) 6814 this = self.expression( 6815 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6816 ) 6817 self._match_r_paren() 6818 6819 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6820 # Some dialects choose to implement and some do not. 6821 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6822 6823 # There is some code above in _parse_lambda that handles 6824 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6825 6826 # The below changes handle 6827 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6828 6829 # Oracle allows both formats 6830 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6831 # and Snowflake chose to do the same for familiarity 6832 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6833 if isinstance(this, exp.AggFunc): 6834 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6835 6836 if ignore_respect and ignore_respect is not this: 6837 ignore_respect.replace(ignore_respect.this) 6838 this = self.expression(ignore_respect.__class__, this=this) 6839 6840 this = self._parse_respect_or_ignore_nulls(this) 6841 6842 # bigquery select from window x AS (partition by ...) 6843 if alias: 6844 over = None 6845 self._match(TokenType.ALIAS) 6846 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6847 return this 6848 else: 6849 over = self._prev.text.upper() 6850 6851 if comments and isinstance(func, exp.Expression): 6852 func.pop_comments() 6853 6854 if not self._match(TokenType.L_PAREN): 6855 return self.expression( 6856 exp.Window, 6857 comments=comments, 6858 this=this, 6859 alias=self._parse_id_var(False), 6860 over=over, 6861 ) 6862 6863 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6864 6865 first = self._match(TokenType.FIRST) 6866 if self._match_text_seq("LAST"): 6867 first = False 6868 6869 partition, order = self._parse_partition_and_order() 6870 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6871 6872 if kind: 6873 self._match(TokenType.BETWEEN) 6874 start = self._parse_window_spec() 6875 self._match(TokenType.AND) 6876 end = self._parse_window_spec() 6877 6878 spec = self.expression( 6879 exp.WindowSpec, 6880 kind=kind, 6881 start=start["value"], 6882 start_side=start["side"], 6883 end=end["value"], 6884 end_side=end["side"], 6885 ) 6886 else: 6887 spec = None 6888 6889 self._match_r_paren() 6890 6891 window = self.expression( 6892 exp.Window, 6893 comments=comments, 6894 this=this, 6895 partition_by=partition, 6896 order=order, 6897 spec=spec, 6898 alias=window_alias, 6899 over=over, 6900 first=first, 6901 ) 6902 6903 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6904 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6905 return self._parse_window(window, alias=alias) 6906 6907 return window 6908 6909 def _parse_partition_and_order( 6910 self, 6911 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6912 return self._parse_partition_by(), self._parse_order() 6913 6914 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6915 self._match(TokenType.BETWEEN) 6916 6917 return { 6918 "value": ( 6919 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6920 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6921 or self._parse_bitwise() 6922 ), 6923 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6924 } 6925 6926 def _parse_alias( 6927 self, this: t.Optional[exp.Expression], explicit: bool = False 6928 ) -> t.Optional[exp.Expression]: 6929 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6930 # so this section tries to parse the clause version and if it fails, it treats the token 6931 # as an identifier (alias) 6932 if self._can_parse_limit_or_offset(): 6933 return this 6934 6935 any_token = self._match(TokenType.ALIAS) 6936 comments = self._prev_comments or [] 6937 6938 if explicit and not any_token: 6939 return this 6940 6941 if self._match(TokenType.L_PAREN): 6942 aliases = self.expression( 6943 exp.Aliases, 6944 comments=comments, 6945 this=this, 6946 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6947 ) 6948 self._match_r_paren(aliases) 6949 return aliases 6950 6951 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6952 self.STRING_ALIASES and self._parse_string_as_identifier() 6953 ) 6954 6955 if alias: 6956 comments.extend(alias.pop_comments()) 6957 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6958 column = this.this 6959 6960 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6961 if not this.comments and column and column.comments: 6962 this.comments = column.pop_comments() 6963 6964 return this 6965 6966 def _parse_id_var( 6967 self, 6968 any_token: bool = True, 6969 tokens: t.Optional[t.Collection[TokenType]] = None, 6970 ) -> t.Optional[exp.Expression]: 6971 expression = self._parse_identifier() 6972 if not expression and ( 6973 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6974 ): 6975 quoted = self._prev.token_type == TokenType.STRING 6976 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6977 6978 return expression 6979 6980 def _parse_string(self) -> t.Optional[exp.Expression]: 6981 if self._match_set(self.STRING_PARSERS): 6982 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6983 return self._parse_placeholder() 6984 6985 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6986 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6987 6988 def _parse_number(self) -> t.Optional[exp.Expression]: 6989 if self._match_set(self.NUMERIC_PARSERS): 6990 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6991 return self._parse_placeholder() 6992 6993 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6994 if self._match(TokenType.IDENTIFIER): 6995 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6996 return self._parse_placeholder() 6997 6998 def _parse_var( 6999 self, 7000 any_token: bool = False, 7001 tokens: t.Optional[t.Collection[TokenType]] = None, 7002 upper: bool = False, 7003 ) -> t.Optional[exp.Expression]: 7004 if ( 7005 (any_token and self._advance_any()) 7006 or self._match(TokenType.VAR) 7007 or (self._match_set(tokens) if tokens else False) 7008 ): 7009 return self.expression( 7010 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7011 ) 7012 return self._parse_placeholder() 7013 7014 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7015 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7016 self._advance() 7017 return self._prev 7018 return None 7019 7020 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7021 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7022 7023 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7024 return self._parse_primary() or self._parse_var(any_token=True) 7025 7026 def _parse_null(self) -> t.Optional[exp.Expression]: 7027 if self._match_set(self.NULL_TOKENS): 7028 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7029 return self._parse_placeholder() 7030 7031 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7032 if self._match(TokenType.TRUE): 7033 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7034 if self._match(TokenType.FALSE): 7035 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7036 return self._parse_placeholder() 7037 7038 def _parse_star(self) -> t.Optional[exp.Expression]: 7039 if self._match(TokenType.STAR): 7040 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7041 return self._parse_placeholder() 7042 7043 def _parse_parameter(self) -> exp.Parameter: 7044 this = self._parse_identifier() or self._parse_primary_or_var() 7045 return self.expression(exp.Parameter, this=this) 7046 7047 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7048 if self._match_set(self.PLACEHOLDER_PARSERS): 7049 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7050 if placeholder: 7051 return placeholder 7052 self._advance(-1) 7053 return None 7054 7055 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7056 if not self._match_texts(keywords): 7057 return None 7058 if self._match(TokenType.L_PAREN, advance=False): 7059 return self._parse_wrapped_csv(self._parse_expression) 7060 7061 expression = self._parse_expression() 7062 return [expression] if expression else None 7063 7064 def _parse_csv( 7065 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7066 ) -> t.List[exp.Expression]: 7067 parse_result = parse_method() 7068 items = [parse_result] if parse_result is not None else [] 7069 7070 while self._match(sep): 7071 self._add_comments(parse_result) 7072 parse_result = parse_method() 7073 if parse_result is not None: 7074 items.append(parse_result) 7075 7076 return items 7077 7078 def _parse_tokens( 7079 self, parse_method: t.Callable, expressions: t.Dict 7080 ) -> t.Optional[exp.Expression]: 7081 this = parse_method() 7082 7083 while self._match_set(expressions): 7084 this = self.expression( 7085 expressions[self._prev.token_type], 7086 this=this, 7087 comments=self._prev_comments, 7088 expression=parse_method(), 7089 ) 7090 7091 return this 7092 7093 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7094 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7095 7096 def _parse_wrapped_csv( 7097 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7098 ) -> t.List[exp.Expression]: 7099 return self._parse_wrapped( 7100 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7101 ) 7102 7103 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7104 wrapped = self._match(TokenType.L_PAREN) 7105 if not wrapped and not optional: 7106 self.raise_error("Expecting (") 7107 parse_result = parse_method() 7108 if wrapped: 7109 self._match_r_paren() 7110 return parse_result 7111 7112 def _parse_expressions(self) -> t.List[exp.Expression]: 7113 return self._parse_csv(self._parse_expression) 7114 7115 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7116 return self._parse_select() or self._parse_set_operations( 7117 self._parse_alias(self._parse_assignment(), explicit=True) 7118 if alias 7119 else self._parse_assignment() 7120 ) 7121 7122 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7123 return self._parse_query_modifiers( 7124 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7125 ) 7126 7127 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7128 this = None 7129 if self._match_texts(self.TRANSACTION_KIND): 7130 this = self._prev.text 7131 7132 self._match_texts(("TRANSACTION", "WORK")) 7133 7134 modes = [] 7135 while True: 7136 mode = [] 7137 while self._match(TokenType.VAR): 7138 mode.append(self._prev.text) 7139 7140 if mode: 7141 modes.append(" ".join(mode)) 7142 if not self._match(TokenType.COMMA): 7143 break 7144 7145 return self.expression(exp.Transaction, this=this, modes=modes) 7146 7147 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7148 chain = None 7149 savepoint = None 7150 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7151 7152 self._match_texts(("TRANSACTION", "WORK")) 7153 7154 if self._match_text_seq("TO"): 7155 self._match_text_seq("SAVEPOINT") 7156 savepoint = self._parse_id_var() 7157 7158 if self._match(TokenType.AND): 7159 chain = not self._match_text_seq("NO") 7160 self._match_text_seq("CHAIN") 7161 7162 if is_rollback: 7163 return self.expression(exp.Rollback, savepoint=savepoint) 7164 7165 return self.expression(exp.Commit, chain=chain) 7166 7167 def _parse_refresh(self) -> exp.Refresh: 7168 self._match(TokenType.TABLE) 7169 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7170 7171 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7172 if not self._match_text_seq("ADD"): 7173 return None 7174 7175 self._match(TokenType.COLUMN) 7176 exists_column = self._parse_exists(not_=True) 7177 expression = self._parse_field_def() 7178 7179 if expression: 7180 expression.set("exists", exists_column) 7181 7182 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7183 if self._match_texts(("FIRST", "AFTER")): 7184 position = self._prev.text 7185 column_position = self.expression( 7186 exp.ColumnPosition, this=self._parse_column(), position=position 7187 ) 7188 expression.set("position", column_position) 7189 7190 return expression 7191 7192 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7193 drop = self._match(TokenType.DROP) and self._parse_drop() 7194 if drop and not isinstance(drop, exp.Command): 7195 drop.set("kind", drop.args.get("kind", "COLUMN")) 7196 return drop 7197 7198 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7199 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7200 return self.expression( 7201 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7202 ) 7203 7204 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7205 index = self._index - 1 7206 7207 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7208 return self._parse_csv( 7209 lambda: self.expression( 7210 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7211 ) 7212 ) 7213 7214 self._retreat(index) 7215 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7216 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7217 7218 if self._match_text_seq("ADD", "COLUMNS"): 7219 schema = self._parse_schema() 7220 if schema: 7221 return [schema] 7222 return [] 7223 7224 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7225 7226 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7227 if self._match_texts(self.ALTER_ALTER_PARSERS): 7228 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7229 7230 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7231 # keyword after ALTER we default to parsing this statement 7232 self._match(TokenType.COLUMN) 7233 column = self._parse_field(any_token=True) 7234 7235 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7236 return self.expression(exp.AlterColumn, this=column, drop=True) 7237 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7238 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7239 if self._match(TokenType.COMMENT): 7240 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7241 if self._match_text_seq("DROP", "NOT", "NULL"): 7242 return self.expression( 7243 exp.AlterColumn, 7244 this=column, 7245 drop=True, 7246 allow_null=True, 7247 ) 7248 if self._match_text_seq("SET", "NOT", "NULL"): 7249 return self.expression( 7250 exp.AlterColumn, 7251 this=column, 7252 allow_null=False, 7253 ) 7254 7255 if self._match_text_seq("SET", "VISIBLE"): 7256 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7257 if self._match_text_seq("SET", "INVISIBLE"): 7258 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7259 7260 self._match_text_seq("SET", "DATA") 7261 self._match_text_seq("TYPE") 7262 return self.expression( 7263 exp.AlterColumn, 7264 this=column, 7265 dtype=self._parse_types(), 7266 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7267 using=self._match(TokenType.USING) and self._parse_assignment(), 7268 ) 7269 7270 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7271 if self._match_texts(("ALL", "EVEN", "AUTO")): 7272 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7273 7274 self._match_text_seq("KEY", "DISTKEY") 7275 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7276 7277 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7278 if compound: 7279 self._match_text_seq("SORTKEY") 7280 7281 if self._match(TokenType.L_PAREN, advance=False): 7282 return self.expression( 7283 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7284 ) 7285 7286 self._match_texts(("AUTO", "NONE")) 7287 return self.expression( 7288 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7289 ) 7290 7291 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7292 index = self._index - 1 7293 7294 partition_exists = self._parse_exists() 7295 if self._match(TokenType.PARTITION, advance=False): 7296 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7297 7298 self._retreat(index) 7299 return self._parse_csv(self._parse_drop_column) 7300 7301 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7302 if self._match(TokenType.COLUMN): 7303 exists = self._parse_exists() 7304 old_column = self._parse_column() 7305 to = self._match_text_seq("TO") 7306 new_column = self._parse_column() 7307 7308 if old_column is None or to is None or new_column is None: 7309 return None 7310 7311 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7312 7313 self._match_text_seq("TO") 7314 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7315 7316 def _parse_alter_table_set(self) -> exp.AlterSet: 7317 alter_set = self.expression(exp.AlterSet) 7318 7319 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7320 "TABLE", "PROPERTIES" 7321 ): 7322 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7323 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7324 alter_set.set("expressions", [self._parse_assignment()]) 7325 elif self._match_texts(("LOGGED", "UNLOGGED")): 7326 alter_set.set("option", exp.var(self._prev.text.upper())) 7327 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7328 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7329 elif self._match_text_seq("LOCATION"): 7330 alter_set.set("location", self._parse_field()) 7331 elif self._match_text_seq("ACCESS", "METHOD"): 7332 alter_set.set("access_method", self._parse_field()) 7333 elif self._match_text_seq("TABLESPACE"): 7334 alter_set.set("tablespace", self._parse_field()) 7335 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7336 alter_set.set("file_format", [self._parse_field()]) 7337 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7338 alter_set.set("file_format", self._parse_wrapped_options()) 7339 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7340 alter_set.set("copy_options", self._parse_wrapped_options()) 7341 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7342 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7343 else: 7344 if self._match_text_seq("SERDE"): 7345 alter_set.set("serde", self._parse_field()) 7346 7347 alter_set.set("expressions", [self._parse_properties()]) 7348 7349 return alter_set 7350 7351 def _parse_alter(self) -> exp.Alter | exp.Command: 7352 start = self._prev 7353 7354 alter_token = self._match_set(self.ALTERABLES) and self._prev 7355 if not alter_token: 7356 return self._parse_as_command(start) 7357 7358 exists = self._parse_exists() 7359 only = self._match_text_seq("ONLY") 7360 this = self._parse_table(schema=True) 7361 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7362 7363 if self._next: 7364 self._advance() 7365 7366 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7367 if parser: 7368 actions = ensure_list(parser(self)) 7369 not_valid = self._match_text_seq("NOT", "VALID") 7370 options = self._parse_csv(self._parse_property) 7371 7372 if not self._curr and actions: 7373 return self.expression( 7374 exp.Alter, 7375 this=this, 7376 kind=alter_token.text.upper(), 7377 exists=exists, 7378 actions=actions, 7379 only=only, 7380 options=options, 7381 cluster=cluster, 7382 not_valid=not_valid, 7383 ) 7384 7385 return self._parse_as_command(start) 7386 7387 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7388 start = self._prev 7389 # https://duckdb.org/docs/sql/statements/analyze 7390 if not self._curr: 7391 return self.expression(exp.Analyze) 7392 7393 options = [] 7394 while self._match_texts(self.ANALYZE_STYLES): 7395 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7396 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7397 else: 7398 options.append(self._prev.text.upper()) 7399 7400 this: t.Optional[exp.Expression] = None 7401 inner_expression: t.Optional[exp.Expression] = None 7402 7403 kind = self._curr and self._curr.text.upper() 7404 7405 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7406 this = self._parse_table_parts() 7407 elif self._match_text_seq("TABLES"): 7408 if self._match_set((TokenType.FROM, TokenType.IN)): 7409 kind = f"{kind} {self._prev.text.upper()}" 7410 this = self._parse_table(schema=True, is_db_reference=True) 7411 elif self._match_text_seq("DATABASE"): 7412 this = self._parse_table(schema=True, is_db_reference=True) 7413 elif self._match_text_seq("CLUSTER"): 7414 this = self._parse_table() 7415 # Try matching inner expr keywords before fallback to parse table. 7416 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7417 kind = None 7418 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7419 else: 7420 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7421 kind = None 7422 this = self._parse_table_parts() 7423 7424 partition = self._try_parse(self._parse_partition) 7425 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7426 return self._parse_as_command(start) 7427 7428 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7429 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7430 "WITH", "ASYNC", "MODE" 7431 ): 7432 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7433 else: 7434 mode = None 7435 7436 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7437 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7438 7439 properties = self._parse_properties() 7440 return self.expression( 7441 exp.Analyze, 7442 kind=kind, 7443 this=this, 7444 mode=mode, 7445 partition=partition, 7446 properties=properties, 7447 expression=inner_expression, 7448 options=options, 7449 ) 7450 7451 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7452 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7453 this = None 7454 kind = self._prev.text.upper() 7455 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7456 expressions = [] 7457 7458 if not self._match_text_seq("STATISTICS"): 7459 self.raise_error("Expecting token STATISTICS") 7460 7461 if self._match_text_seq("NOSCAN"): 7462 this = "NOSCAN" 7463 elif self._match(TokenType.FOR): 7464 if self._match_text_seq("ALL", "COLUMNS"): 7465 this = "FOR ALL COLUMNS" 7466 if self._match_texts("COLUMNS"): 7467 this = "FOR COLUMNS" 7468 expressions = self._parse_csv(self._parse_column_reference) 7469 elif self._match_text_seq("SAMPLE"): 7470 sample = self._parse_number() 7471 expressions = [ 7472 self.expression( 7473 exp.AnalyzeSample, 7474 sample=sample, 7475 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7476 ) 7477 ] 7478 7479 return self.expression( 7480 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7481 ) 7482 7483 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7484 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7485 kind = None 7486 this = None 7487 expression: t.Optional[exp.Expression] = None 7488 if self._match_text_seq("REF", "UPDATE"): 7489 kind = "REF" 7490 this = "UPDATE" 7491 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7492 this = "UPDATE SET DANGLING TO NULL" 7493 elif self._match_text_seq("STRUCTURE"): 7494 kind = "STRUCTURE" 7495 if self._match_text_seq("CASCADE", "FAST"): 7496 this = "CASCADE FAST" 7497 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7498 ("ONLINE", "OFFLINE") 7499 ): 7500 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7501 expression = self._parse_into() 7502 7503 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7504 7505 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7506 this = self._prev.text.upper() 7507 if self._match_text_seq("COLUMNS"): 7508 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7509 return None 7510 7511 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7512 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7513 if self._match_text_seq("STATISTICS"): 7514 return self.expression(exp.AnalyzeDelete, kind=kind) 7515 return None 7516 7517 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7518 if self._match_text_seq("CHAINED", "ROWS"): 7519 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7520 return None 7521 7522 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7523 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7524 this = self._prev.text.upper() 7525 expression: t.Optional[exp.Expression] = None 7526 expressions = [] 7527 update_options = None 7528 7529 if self._match_text_seq("HISTOGRAM", "ON"): 7530 expressions = self._parse_csv(self._parse_column_reference) 7531 with_expressions = [] 7532 while self._match(TokenType.WITH): 7533 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7534 if self._match_texts(("SYNC", "ASYNC")): 7535 if self._match_text_seq("MODE", advance=False): 7536 with_expressions.append(f"{self._prev.text.upper()} MODE") 7537 self._advance() 7538 else: 7539 buckets = self._parse_number() 7540 if self._match_text_seq("BUCKETS"): 7541 with_expressions.append(f"{buckets} BUCKETS") 7542 if with_expressions: 7543 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7544 7545 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7546 TokenType.UPDATE, advance=False 7547 ): 7548 update_options = self._prev.text.upper() 7549 self._advance() 7550 elif self._match_text_seq("USING", "DATA"): 7551 expression = self.expression(exp.UsingData, this=self._parse_string()) 7552 7553 return self.expression( 7554 exp.AnalyzeHistogram, 7555 this=this, 7556 expressions=expressions, 7557 expression=expression, 7558 update_options=update_options, 7559 ) 7560 7561 def _parse_merge(self) -> exp.Merge: 7562 self._match(TokenType.INTO) 7563 target = self._parse_table() 7564 7565 if target and self._match(TokenType.ALIAS, advance=False): 7566 target.set("alias", self._parse_table_alias()) 7567 7568 self._match(TokenType.USING) 7569 using = self._parse_table() 7570 7571 self._match(TokenType.ON) 7572 on = self._parse_assignment() 7573 7574 return self.expression( 7575 exp.Merge, 7576 this=target, 7577 using=using, 7578 on=on, 7579 whens=self._parse_when_matched(), 7580 returning=self._parse_returning(), 7581 ) 7582 7583 def _parse_when_matched(self) -> exp.Whens: 7584 whens = [] 7585 7586 while self._match(TokenType.WHEN): 7587 matched = not self._match(TokenType.NOT) 7588 self._match_text_seq("MATCHED") 7589 source = ( 7590 False 7591 if self._match_text_seq("BY", "TARGET") 7592 else self._match_text_seq("BY", "SOURCE") 7593 ) 7594 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7595 7596 self._match(TokenType.THEN) 7597 7598 if self._match(TokenType.INSERT): 7599 this = self._parse_star() 7600 if this: 7601 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7602 else: 7603 then = self.expression( 7604 exp.Insert, 7605 this=exp.var("ROW") 7606 if self._match_text_seq("ROW") 7607 else self._parse_value(values=False), 7608 expression=self._match_text_seq("VALUES") and self._parse_value(), 7609 ) 7610 elif self._match(TokenType.UPDATE): 7611 expressions = self._parse_star() 7612 if expressions: 7613 then = self.expression(exp.Update, expressions=expressions) 7614 else: 7615 then = self.expression( 7616 exp.Update, 7617 expressions=self._match(TokenType.SET) 7618 and self._parse_csv(self._parse_equality), 7619 ) 7620 elif self._match(TokenType.DELETE): 7621 then = self.expression(exp.Var, this=self._prev.text) 7622 else: 7623 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7624 7625 whens.append( 7626 self.expression( 7627 exp.When, 7628 matched=matched, 7629 source=source, 7630 condition=condition, 7631 then=then, 7632 ) 7633 ) 7634 return self.expression(exp.Whens, expressions=whens) 7635 7636 def _parse_show(self) -> t.Optional[exp.Expression]: 7637 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7638 if parser: 7639 return parser(self) 7640 return self._parse_as_command(self._prev) 7641 7642 def _parse_set_item_assignment( 7643 self, kind: t.Optional[str] = None 7644 ) -> t.Optional[exp.Expression]: 7645 index = self._index 7646 7647 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7648 return self._parse_set_transaction(global_=kind == "GLOBAL") 7649 7650 left = self._parse_primary() or self._parse_column() 7651 assignment_delimiter = self._match_texts(("=", "TO")) 7652 7653 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7654 self._retreat(index) 7655 return None 7656 7657 right = self._parse_statement() or self._parse_id_var() 7658 if isinstance(right, (exp.Column, exp.Identifier)): 7659 right = exp.var(right.name) 7660 7661 this = self.expression(exp.EQ, this=left, expression=right) 7662 return self.expression(exp.SetItem, this=this, kind=kind) 7663 7664 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7665 self._match_text_seq("TRANSACTION") 7666 characteristics = self._parse_csv( 7667 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7668 ) 7669 return self.expression( 7670 exp.SetItem, 7671 expressions=characteristics, 7672 kind="TRANSACTION", 7673 **{"global": global_}, # type: ignore 7674 ) 7675 7676 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7677 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7678 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7679 7680 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7681 index = self._index 7682 set_ = self.expression( 7683 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7684 ) 7685 7686 if self._curr: 7687 self._retreat(index) 7688 return self._parse_as_command(self._prev) 7689 7690 return set_ 7691 7692 def _parse_var_from_options( 7693 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7694 ) -> t.Optional[exp.Var]: 7695 start = self._curr 7696 if not start: 7697 return None 7698 7699 option = start.text.upper() 7700 continuations = options.get(option) 7701 7702 index = self._index 7703 self._advance() 7704 for keywords in continuations or []: 7705 if isinstance(keywords, str): 7706 keywords = (keywords,) 7707 7708 if self._match_text_seq(*keywords): 7709 option = f"{option} {' '.join(keywords)}" 7710 break 7711 else: 7712 if continuations or continuations is None: 7713 if raise_unmatched: 7714 self.raise_error(f"Unknown option {option}") 7715 7716 self._retreat(index) 7717 return None 7718 7719 return exp.var(option) 7720 7721 def _parse_as_command(self, start: Token) -> exp.Command: 7722 while self._curr: 7723 self._advance() 7724 text = self._find_sql(start, self._prev) 7725 size = len(start.text) 7726 self._warn_unsupported() 7727 return exp.Command(this=text[:size], expression=text[size:]) 7728 7729 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7730 settings = [] 7731 7732 self._match_l_paren() 7733 kind = self._parse_id_var() 7734 7735 if self._match(TokenType.L_PAREN): 7736 while True: 7737 key = self._parse_id_var() 7738 value = self._parse_primary() 7739 if not key and value is None: 7740 break 7741 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7742 self._match(TokenType.R_PAREN) 7743 7744 self._match_r_paren() 7745 7746 return self.expression( 7747 exp.DictProperty, 7748 this=this, 7749 kind=kind.this if kind else None, 7750 settings=settings, 7751 ) 7752 7753 def _parse_dict_range(self, this: str) -> exp.DictRange: 7754 self._match_l_paren() 7755 has_min = self._match_text_seq("MIN") 7756 if has_min: 7757 min = self._parse_var() or self._parse_primary() 7758 self._match_text_seq("MAX") 7759 max = self._parse_var() or self._parse_primary() 7760 else: 7761 max = self._parse_var() or self._parse_primary() 7762 min = exp.Literal.number(0) 7763 self._match_r_paren() 7764 return self.expression(exp.DictRange, this=this, min=min, max=max) 7765 7766 def _parse_comprehension( 7767 self, this: t.Optional[exp.Expression] 7768 ) -> t.Optional[exp.Comprehension]: 7769 index = self._index 7770 expression = self._parse_column() 7771 if not self._match(TokenType.IN): 7772 self._retreat(index - 1) 7773 return None 7774 iterator = self._parse_column() 7775 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7776 return self.expression( 7777 exp.Comprehension, 7778 this=this, 7779 expression=expression, 7780 iterator=iterator, 7781 condition=condition, 7782 ) 7783 7784 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7785 if self._match(TokenType.HEREDOC_STRING): 7786 return self.expression(exp.Heredoc, this=self._prev.text) 7787 7788 if not self._match_text_seq("$"): 7789 return None 7790 7791 tags = ["$"] 7792 tag_text = None 7793 7794 if self._is_connected(): 7795 self._advance() 7796 tags.append(self._prev.text.upper()) 7797 else: 7798 self.raise_error("No closing $ found") 7799 7800 if tags[-1] != "$": 7801 if self._is_connected() and self._match_text_seq("$"): 7802 tag_text = tags[-1] 7803 tags.append("$") 7804 else: 7805 self.raise_error("No closing $ found") 7806 7807 heredoc_start = self._curr 7808 7809 while self._curr: 7810 if self._match_text_seq(*tags, advance=False): 7811 this = self._find_sql(heredoc_start, self._prev) 7812 self._advance(len(tags)) 7813 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7814 7815 self._advance() 7816 7817 self.raise_error(f"No closing {''.join(tags)} found") 7818 return None 7819 7820 def _find_parser( 7821 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7822 ) -> t.Optional[t.Callable]: 7823 if not self._curr: 7824 return None 7825 7826 index = self._index 7827 this = [] 7828 while True: 7829 # The current token might be multiple words 7830 curr = self._curr.text.upper() 7831 key = curr.split(" ") 7832 this.append(curr) 7833 7834 self._advance() 7835 result, trie = in_trie(trie, key) 7836 if result == TrieResult.FAILED: 7837 break 7838 7839 if result == TrieResult.EXISTS: 7840 subparser = parsers[" ".join(this)] 7841 return subparser 7842 7843 self._retreat(index) 7844 return None 7845 7846 def _match(self, token_type, advance=True, expression=None): 7847 if not self._curr: 7848 return None 7849 7850 if self._curr.token_type == token_type: 7851 if advance: 7852 self._advance() 7853 self._add_comments(expression) 7854 return True 7855 7856 return None 7857 7858 def _match_set(self, types, advance=True): 7859 if not self._curr: 7860 return None 7861 7862 if self._curr.token_type in types: 7863 if advance: 7864 self._advance() 7865 return True 7866 7867 return None 7868 7869 def _match_pair(self, token_type_a, token_type_b, advance=True): 7870 if not self._curr or not self._next: 7871 return None 7872 7873 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7874 if advance: 7875 self._advance(2) 7876 return True 7877 7878 return None 7879 7880 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7881 if not self._match(TokenType.L_PAREN, expression=expression): 7882 self.raise_error("Expecting (") 7883 7884 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7885 if not self._match(TokenType.R_PAREN, expression=expression): 7886 self.raise_error("Expecting )") 7887 7888 def _match_texts(self, texts, advance=True): 7889 if ( 7890 self._curr 7891 and self._curr.token_type != TokenType.STRING 7892 and self._curr.text.upper() in texts 7893 ): 7894 if advance: 7895 self._advance() 7896 return True 7897 return None 7898 7899 def _match_text_seq(self, *texts, advance=True): 7900 index = self._index 7901 for text in texts: 7902 if ( 7903 self._curr 7904 and self._curr.token_type != TokenType.STRING 7905 and self._curr.text.upper() == text 7906 ): 7907 self._advance() 7908 else: 7909 self._retreat(index) 7910 return None 7911 7912 if not advance: 7913 self._retreat(index) 7914 7915 return True 7916 7917 def _replace_lambda( 7918 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7919 ) -> t.Optional[exp.Expression]: 7920 if not node: 7921 return node 7922 7923 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7924 7925 for column in node.find_all(exp.Column): 7926 typ = lambda_types.get(column.parts[0].name) 7927 if typ is not None: 7928 dot_or_id = column.to_dot() if column.table else column.this 7929 7930 if typ: 7931 dot_or_id = self.expression( 7932 exp.Cast, 7933 this=dot_or_id, 7934 to=typ, 7935 ) 7936 7937 parent = column.parent 7938 7939 while isinstance(parent, exp.Dot): 7940 if not isinstance(parent.parent, exp.Dot): 7941 parent.replace(dot_or_id) 7942 break 7943 parent = parent.parent 7944 else: 7945 if column is node: 7946 node = dot_or_id 7947 else: 7948 column.replace(dot_or_id) 7949 return node 7950 7951 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7952 start = self._prev 7953 7954 # Not to be confused with TRUNCATE(number, decimals) function call 7955 if self._match(TokenType.L_PAREN): 7956 self._retreat(self._index - 2) 7957 return self._parse_function() 7958 7959 # Clickhouse supports TRUNCATE DATABASE as well 7960 is_database = self._match(TokenType.DATABASE) 7961 7962 self._match(TokenType.TABLE) 7963 7964 exists = self._parse_exists(not_=False) 7965 7966 expressions = self._parse_csv( 7967 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7968 ) 7969 7970 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7971 7972 if self._match_text_seq("RESTART", "IDENTITY"): 7973 identity = "RESTART" 7974 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7975 identity = "CONTINUE" 7976 else: 7977 identity = None 7978 7979 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7980 option = self._prev.text 7981 else: 7982 option = None 7983 7984 partition = self._parse_partition() 7985 7986 # Fallback case 7987 if self._curr: 7988 return self._parse_as_command(start) 7989 7990 return self.expression( 7991 exp.TruncateTable, 7992 expressions=expressions, 7993 is_database=is_database, 7994 exists=exists, 7995 cluster=cluster, 7996 identity=identity, 7997 option=option, 7998 partition=partition, 7999 ) 8000 8001 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8002 this = self._parse_ordered(self._parse_opclass) 8003 8004 if not self._match(TokenType.WITH): 8005 return this 8006 8007 op = self._parse_var(any_token=True) 8008 8009 return self.expression(exp.WithOperator, this=this, op=op) 8010 8011 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8012 self._match(TokenType.EQ) 8013 self._match(TokenType.L_PAREN) 8014 8015 opts: t.List[t.Optional[exp.Expression]] = [] 8016 option: exp.Expression | None 8017 while self._curr and not self._match(TokenType.R_PAREN): 8018 if self._match_text_seq("FORMAT_NAME", "="): 8019 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8020 option = self._parse_format_name() 8021 else: 8022 option = self._parse_property() 8023 8024 if option is None: 8025 self.raise_error("Unable to parse option") 8026 break 8027 8028 opts.append(option) 8029 8030 return opts 8031 8032 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8033 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8034 8035 options = [] 8036 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8037 option = self._parse_var(any_token=True) 8038 prev = self._prev.text.upper() 8039 8040 # Different dialects might separate options and values by white space, "=" and "AS" 8041 self._match(TokenType.EQ) 8042 self._match(TokenType.ALIAS) 8043 8044 param = self.expression(exp.CopyParameter, this=option) 8045 8046 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8047 TokenType.L_PAREN, advance=False 8048 ): 8049 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8050 param.set("expressions", self._parse_wrapped_options()) 8051 elif prev == "FILE_FORMAT": 8052 # T-SQL's external file format case 8053 param.set("expression", self._parse_field()) 8054 else: 8055 param.set("expression", self._parse_unquoted_field()) 8056 8057 options.append(param) 8058 self._match(sep) 8059 8060 return options 8061 8062 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8063 expr = self.expression(exp.Credentials) 8064 8065 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8066 expr.set("storage", self._parse_field()) 8067 if self._match_text_seq("CREDENTIALS"): 8068 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8069 creds = ( 8070 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8071 ) 8072 expr.set("credentials", creds) 8073 if self._match_text_seq("ENCRYPTION"): 8074 expr.set("encryption", self._parse_wrapped_options()) 8075 if self._match_text_seq("IAM_ROLE"): 8076 expr.set("iam_role", self._parse_field()) 8077 if self._match_text_seq("REGION"): 8078 expr.set("region", self._parse_field()) 8079 8080 return expr 8081 8082 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8083 return self._parse_field() 8084 8085 def _parse_copy(self) -> exp.Copy | exp.Command: 8086 start = self._prev 8087 8088 self._match(TokenType.INTO) 8089 8090 this = ( 8091 self._parse_select(nested=True, parse_subquery_alias=False) 8092 if self._match(TokenType.L_PAREN, advance=False) 8093 else self._parse_table(schema=True) 8094 ) 8095 8096 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8097 8098 files = self._parse_csv(self._parse_file_location) 8099 credentials = self._parse_credentials() 8100 8101 self._match_text_seq("WITH") 8102 8103 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8104 8105 # Fallback case 8106 if self._curr: 8107 return self._parse_as_command(start) 8108 8109 return self.expression( 8110 exp.Copy, 8111 this=this, 8112 kind=kind, 8113 credentials=credentials, 8114 files=files, 8115 params=params, 8116 ) 8117 8118 def _parse_normalize(self) -> exp.Normalize: 8119 return self.expression( 8120 exp.Normalize, 8121 this=self._parse_bitwise(), 8122 form=self._match(TokenType.COMMA) and self._parse_var(), 8123 ) 8124 8125 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8126 args = self._parse_csv(lambda: self._parse_lambda()) 8127 8128 this = seq_get(args, 0) 8129 decimals = seq_get(args, 1) 8130 8131 return expr_type( 8132 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8133 ) 8134 8135 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8136 if self._match_text_seq("COLUMNS", "(", advance=False): 8137 this = self._parse_function() 8138 if isinstance(this, exp.Columns): 8139 this.set("unpack", True) 8140 return this 8141 8142 return self.expression( 8143 exp.Star, 8144 **{ # type: ignore 8145 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8146 "replace": self._parse_star_op("REPLACE"), 8147 "rename": self._parse_star_op("RENAME"), 8148 }, 8149 ) 8150 8151 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8152 privilege_parts = [] 8153 8154 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8155 # (end of privilege list) or L_PAREN (start of column list) are met 8156 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8157 privilege_parts.append(self._curr.text.upper()) 8158 self._advance() 8159 8160 this = exp.var(" ".join(privilege_parts)) 8161 expressions = ( 8162 self._parse_wrapped_csv(self._parse_column) 8163 if self._match(TokenType.L_PAREN, advance=False) 8164 else None 8165 ) 8166 8167 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8168 8169 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8170 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8171 principal = self._parse_id_var() 8172 8173 if not principal: 8174 return None 8175 8176 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8177 8178 def _parse_grant(self) -> exp.Grant | exp.Command: 8179 start = self._prev 8180 8181 privileges = self._parse_csv(self._parse_grant_privilege) 8182 8183 self._match(TokenType.ON) 8184 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8185 8186 # Attempt to parse the securable e.g. MySQL allows names 8187 # such as "foo.*", "*.*" which are not easily parseable yet 8188 securable = self._try_parse(self._parse_table_parts) 8189 8190 if not securable or not self._match_text_seq("TO"): 8191 return self._parse_as_command(start) 8192 8193 principals = self._parse_csv(self._parse_grant_principal) 8194 8195 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8196 8197 if self._curr: 8198 return self._parse_as_command(start) 8199 8200 return self.expression( 8201 exp.Grant, 8202 privileges=privileges, 8203 kind=kind, 8204 securable=securable, 8205 principals=principals, 8206 grant_option=grant_option, 8207 ) 8208 8209 def _parse_overlay(self) -> exp.Overlay: 8210 return self.expression( 8211 exp.Overlay, 8212 **{ # type: ignore 8213 "this": self._parse_bitwise(), 8214 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8215 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8216 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8217 }, 8218 ) 8219 8220 def _parse_format_name(self) -> exp.Property: 8221 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8222 # for FILE_FORMAT = <format_name> 8223 return self.expression( 8224 exp.Property, 8225 this=exp.var("FORMAT_NAME"), 8226 value=self._parse_string() or self._parse_table_parts(), 8227 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1503 def __init__( 1504 self, 1505 error_level: t.Optional[ErrorLevel] = None, 1506 error_message_context: int = 100, 1507 max_errors: int = 3, 1508 dialect: DialectType = None, 1509 ): 1510 from sqlglot.dialects import Dialect 1511 1512 self.error_level = error_level or ErrorLevel.IMMEDIATE 1513 self.error_message_context = error_message_context 1514 self.max_errors = max_errors 1515 self.dialect = Dialect.get_or_raise(dialect) 1516 self.reset()
1528 def parse( 1529 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1530 ) -> t.List[t.Optional[exp.Expression]]: 1531 """ 1532 Parses a list of tokens and returns a list of syntax trees, one tree 1533 per parsed SQL statement. 1534 1535 Args: 1536 raw_tokens: The list of tokens. 1537 sql: The original SQL string, used to produce helpful debug messages. 1538 1539 Returns: 1540 The list of the produced syntax trees. 1541 """ 1542 return self._parse( 1543 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1544 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1546 def parse_into( 1547 self, 1548 expression_types: exp.IntoType, 1549 raw_tokens: t.List[Token], 1550 sql: t.Optional[str] = None, 1551 ) -> t.List[t.Optional[exp.Expression]]: 1552 """ 1553 Parses a list of tokens into a given Expression type. If a collection of Expression 1554 types is given instead, this method will try to parse the token list into each one 1555 of them, stopping at the first for which the parsing succeeds. 1556 1557 Args: 1558 expression_types: The expression type(s) to try and parse the token list into. 1559 raw_tokens: The list of tokens. 1560 sql: The original SQL string, used to produce helpful debug messages. 1561 1562 Returns: 1563 The target Expression. 1564 """ 1565 errors = [] 1566 for expression_type in ensure_list(expression_types): 1567 parser = self.EXPRESSION_PARSERS.get(expression_type) 1568 if not parser: 1569 raise TypeError(f"No parser registered for {expression_type}") 1570 1571 try: 1572 return self._parse(parser, raw_tokens, sql) 1573 except ParseError as e: 1574 e.errors[0]["into_expression"] = expression_type 1575 errors.append(e) 1576 1577 raise ParseError( 1578 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1579 errors=merge_errors(errors), 1580 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1620 def check_errors(self) -> None: 1621 """Logs or raises any found errors, depending on the chosen error level setting.""" 1622 if self.error_level == ErrorLevel.WARN: 1623 for error in self.errors: 1624 logger.error(str(error)) 1625 elif self.error_level == ErrorLevel.RAISE and self.errors: 1626 raise ParseError( 1627 concat_messages(self.errors, self.max_errors), 1628 errors=merge_errors(self.errors), 1629 )
Logs or raises any found errors, depending on the chosen error level setting.
1631 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1632 """ 1633 Appends an error in the list of recorded errors or raises it, depending on the chosen 1634 error level setting. 1635 """ 1636 token = token or self._curr or self._prev or Token.string("") 1637 start = token.start 1638 end = token.end + 1 1639 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1640 highlight = self.sql[start:end] 1641 end_context = self.sql[end : end + self.error_message_context] 1642 1643 error = ParseError.new( 1644 f"{message}. Line {token.line}, Col: {token.col}.\n" 1645 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1646 description=message, 1647 line=token.line, 1648 col=token.col, 1649 start_context=start_context, 1650 highlight=highlight, 1651 end_context=end_context, 1652 ) 1653 1654 if self.error_level == ErrorLevel.IMMEDIATE: 1655 raise error 1656 1657 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1659 def expression( 1660 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1661 ) -> E: 1662 """ 1663 Creates a new, validated Expression. 1664 1665 Args: 1666 exp_class: The expression class to instantiate. 1667 comments: An optional list of comments to attach to the expression. 1668 kwargs: The arguments to set for the expression along with their respective values. 1669 1670 Returns: 1671 The target expression. 1672 """ 1673 instance = exp_class(**kwargs) 1674 instance.add_comments(comments) if comments else self._add_comments(instance) 1675 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1682 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1683 """ 1684 Validates an Expression, making sure that all its mandatory arguments are set. 1685 1686 Args: 1687 expression: The expression to validate. 1688 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1689 1690 Returns: 1691 The validated expression. 1692 """ 1693 if self.error_level != ErrorLevel.IGNORE: 1694 for error_message in expression.error_messages(args): 1695 self.raise_error(error_message) 1696 1697 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4664 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4665 start = self._index 4666 _, side_token, kind_token = self._parse_join_parts() 4667 4668 side = side_token.text if side_token else None 4669 kind = kind_token.text if kind_token else None 4670 4671 if not self._match_set(self.SET_OPERATIONS): 4672 self._retreat(start) 4673 return None 4674 4675 token_type = self._prev.token_type 4676 4677 if token_type == TokenType.UNION: 4678 operation: t.Type[exp.SetOperation] = exp.Union 4679 elif token_type == TokenType.EXCEPT: 4680 operation = exp.Except 4681 else: 4682 operation = exp.Intersect 4683 4684 comments = self._prev.comments 4685 4686 if self._match(TokenType.DISTINCT): 4687 distinct: t.Optional[bool] = True 4688 elif self._match(TokenType.ALL): 4689 distinct = False 4690 else: 4691 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4692 if distinct is None: 4693 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4694 4695 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4696 "STRICT", "CORRESPONDING" 4697 ) 4698 if self._match_text_seq("CORRESPONDING"): 4699 by_name = True 4700 if not side and not kind: 4701 kind = "INNER" 4702 4703 on_column_list = None 4704 if by_name and self._match_texts(("ON", "BY")): 4705 on_column_list = self._parse_wrapped_csv(self._parse_column) 4706 4707 expression = self._parse_select(nested=True, parse_set_operation=False) 4708 4709 return self.expression( 4710 operation, 4711 comments=comments, 4712 this=this, 4713 distinct=distinct, 4714 by_name=by_name, 4715 expression=expression, 4716 side=side, 4717 kind=kind, 4718 on=on_column_list, 4719 )